Index: head/sys/net/route.c =================================================================== --- head/sys/net/route.c (revision 54349) +++ head/sys/net/route.c (revision 54350) @@ -1,1083 +1,1073 @@ /* * Copyright (c) 1980, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.c 8.2 (Berkeley) 11/15/93 * $FreeBSD$ */ #include "opt_inet.h" #include "opt_mrouting.h" #include #include #include #include #include #include #include #include #include #include #include #define SA(p) ((struct sockaddr *)(p)) struct route_cb route_cb; static struct rtstat rtstat; struct radix_node_head *rt_tables[AF_MAX+1]; static int rttrash; /* routes not in table but not freed */ static void rt_maskedcopy __P((struct sockaddr *, struct sockaddr *, struct sockaddr *)); static void rtable_init __P((void **)); static void rtable_init(table) void **table; { struct domain *dom; for (dom = domains; dom; dom = dom->dom_next) if (dom->dom_rtattach) dom->dom_rtattach(&table[dom->dom_family], dom->dom_rtoffset); } void route_init() { rn_init(); /* initialize all zeroes, all ones, mask table */ rtable_init((void **)rt_tables); } /* * Packet routing routines. */ void rtalloc(ro) register struct route *ro; { if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) return; /* XXX */ ro->ro_rt = rtalloc1(&ro->ro_dst, 1, 0UL); } void rtalloc_ign(ro, ignore) register struct route *ro; u_long ignore; { if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) return; /* XXX */ ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore); } -/* for INET6 */ -void -rtcalloc(ro) - register struct route *ro; -{ - if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) - return; /* XXX */ - ro->ro_rt = rtalloc1(&ro->ro_dst, RTF_CLONING, 0UL); -} - /* * Look up the route that matches the address given * Or, at least try.. Create a cloned route if needed. */ struct rtentry * rtalloc1(dst, report, ignflags) register struct sockaddr *dst; int report; u_long ignflags; { register struct radix_node_head *rnh = rt_tables[dst->sa_family]; register struct rtentry *rt; register struct radix_node *rn; struct rtentry *newrt = 0; struct rt_addrinfo info; u_long nflags; int s = splnet(), err = 0, msgtype = RTM_MISS; /* * Look up the address in the table for that Address Family */ if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) && ((rn->rn_flags & RNF_ROOT) == 0)) { /* * If we find it and it's not the root node, then * get a refernce on the rtentry associated. */ newrt = rt = (struct rtentry *)rn; nflags = rt->rt_flags & ~ignflags; if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) { /* * We are apparently adding (report = 0 in delete). * If it requires that it be cloned, do so. * (This implies it wasn't a HOST route.) */ err = rtrequest(RTM_RESOLVE, dst, SA(0), SA(0), 0, &newrt); if (err) { /* * If the cloning didn't succeed, maybe * what we have will do. Return that. */ newrt = rt; rt->rt_refcnt++; goto miss; } if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) { /* * If the new route specifies it be * externally resolved, then go do that. */ msgtype = RTM_RESOLVE; goto miss; } } else rt->rt_refcnt++; } else { /* * Either we hit the root or couldn't find any match, * Which basically means * "caint get there frm here" */ rtstat.rts_unreach++; miss: if (report) { /* * If required, report the failure to the supervising * Authorities. * For a delete, this is not an error. (report == 0) */ bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; rt_missmsg(msgtype, &info, 0, err); } } splx(s); return (newrt); } /* * Remove a reference count from an rtentry. * If the count gets low enough, take it out of the routing table */ void rtfree(rt) register struct rtentry *rt; { /* * find the tree for that address family */ register struct radix_node_head *rnh = rt_tables[rt_key(rt)->sa_family]; register struct ifaddr *ifa; if (rt == 0 || rnh == 0) panic("rtfree"); /* * decrement the reference count by one and if it reaches 0, * and there is a close function defined, call the close function */ rt->rt_refcnt--; if(rnh->rnh_close && rt->rt_refcnt == 0) { rnh->rnh_close((struct radix_node *)rt, rnh); } /* * If we are no longer "up" (and ref == 0) * then we can free the resources associated * with the route. */ if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) { if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic ("rtfree 2"); /* * the rtentry must have been removed from the routing table * so it is represented in rttrash.. remove that now. */ rttrash--; #ifdef DIAGNOSTIC if (rt->rt_refcnt < 0) { printf("rtfree: %p not freed (neg refs)\n", rt); return; } #endif /* * release references on items we hold them on.. * e.g other routes and ifaddrs. */ if((ifa = rt->rt_ifa)) IFAFREE(ifa); if (rt->rt_parent) { RTFREE(rt->rt_parent); } /* * The key is separatly alloc'd so free it (see rt_setgate()). * This also frees the gateway, as they are always malloc'd * together. */ Free(rt_key(rt)); /* * and the rtentry itself of course */ Free(rt); } } void ifafree(ifa) register struct ifaddr *ifa; { if (ifa == NULL) panic("ifafree"); if (ifa->ifa_refcnt == 0) free(ifa, M_IFADDR); else ifa->ifa_refcnt--; } /* * Force a routing table entry to the specified * destination to go through the given gateway. * Normally called as a result of a routing redirect * message from the network layer. * * N.B.: must be called at splnet * */ void rtredirect(dst, gateway, netmask, flags, src, rtp) struct sockaddr *dst, *gateway, *netmask, *src; int flags; struct rtentry **rtp; { register struct rtentry *rt; int error = 0; short *stat = 0; struct rt_addrinfo info; struct ifaddr *ifa; /* verify the gateway is directly reachable */ if ((ifa = ifa_ifwithnet(gateway)) == 0) { error = ENETUNREACH; goto out; } rt = rtalloc1(dst, 0, 0UL); /* * If the redirect isn't from our current router for this dst, * it's either old or wrong. If it redirects us to ourselves, * we have a routing loop, perhaps as a result of an interface * going down recently. */ #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) if (!(flags & RTF_DONE) && rt && (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) error = EINVAL; else if (ifa_ifwithaddr(gateway)) error = EHOSTUNREACH; if (error) goto done; /* * Create a new entry if we just got back a wildcard entry * or the the lookup failed. This is necessary for hosts * which use routing redirects generated by smart gateways * to dynamically build the routing tables. */ if ((rt == 0) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) goto create; /* * Don't listen to the redirect if it's * for a route to an interface. */ if (rt->rt_flags & RTF_GATEWAY) { if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { /* * Changing from route to net => route to host. * Create new route, rather than smashing route to net. */ create: flags |= RTF_GATEWAY | RTF_DYNAMIC; error = rtrequest((int)RTM_ADD, dst, gateway, netmask, flags, (struct rtentry **)0); stat = &rtstat.rts_dynamic; } else { /* * Smash the current notion of the gateway to * this destination. Should check about netmask!!! */ rt->rt_flags |= RTF_MODIFIED; flags |= RTF_MODIFIED; stat = &rtstat.rts_newgateway; /* * add the key and gateway (in one malloc'd chunk). */ rt_setgate(rt, rt_key(rt), gateway); } } else error = EHOSTUNREACH; done: if (rt) { if (rtp && !error) *rtp = rt; else rtfree(rt); } out: if (error) rtstat.rts_badredirect++; else if (stat != NULL) (*stat)++; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_info[RTAX_AUTHOR] = src; rt_missmsg(RTM_REDIRECT, &info, flags, error); } /* * Routing table ioctl interface. */ int rtioctl(req, data, p) int req; caddr_t data; struct proc *p; { #ifdef INET /* Multicast goop, grrr... */ #ifdef MROUTING return mrt_ioctl(req, data); #else return mrt_ioctl(req, data, p); #endif #else /* INET */ return ENXIO; #endif /* INET */ } struct ifaddr * ifa_ifwithroute(flags, dst, gateway) int flags; struct sockaddr *dst, *gateway; { register struct ifaddr *ifa; if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, * and the interface is a pt to pt link * we should search for the destination * as our clue to the interface. Otherwise * we can use the local address. */ ifa = 0; if (flags & RTF_HOST) { ifa = ifa_ifwithdstaddr(dst); } if (ifa == 0) ifa = ifa_ifwithaddr(gateway); } else { /* * If we are adding a route to a remote net * or host, the gateway may still be on the * other end of a pt to pt link. */ ifa = ifa_ifwithdstaddr(gateway); } if (ifa == 0) ifa = ifa_ifwithnet(gateway); if (ifa == 0) { struct rtentry *rt = rtalloc1(dst, 0, 0UL); if (rt == 0) return (0); rt->rt_refcnt--; if ((ifa = rt->rt_ifa) == 0) return (0); } if (ifa->ifa_addr->sa_family != dst->sa_family) { struct ifaddr *oifa = ifa; ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); if (ifa == 0) ifa = oifa; } return (ifa); } #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) static int rt_fixdelete __P((struct radix_node *, void *)); static int rt_fixchange __P((struct radix_node *, void *)); struct rtfc_arg { struct rtentry *rt0; struct radix_node_head *rnh; }; /* * Do appropriate manipulations of a routing tree given * all the bits of info needed */ int rtrequest(req, dst, gateway, netmask, flags, ret_nrt) int req, flags; struct sockaddr *dst, *gateway, *netmask; struct rtentry **ret_nrt; { int s = splnet(); int error = 0; register struct rtentry *rt; register struct radix_node *rn; register struct radix_node_head *rnh; struct ifaddr *ifa; struct sockaddr *ndst; #define senderr(x) { error = x ; goto bad; } /* * Find the correct routing tree to use for this Address Family */ if ((rnh = rt_tables[dst->sa_family]) == 0) senderr(ESRCH); /* * If we are adding a host route then we don't want to put * a netmask in the tree */ if (flags & RTF_HOST) netmask = 0; switch (req) { case RTM_DELETE: /* * Remove the item from the tree and return it. * Complain if it is not there and do no more processing. */ if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == 0) senderr(ESRCH); if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic ("rtrequest delete"); rt = (struct rtentry *)rn; /* * Now search what's left of the subtree for any cloned * routes which might have been formed from this node. */ if ((rt->rt_flags & RTF_PRCLONING) && netmask) { rnh->rnh_walktree_from(rnh, dst, netmask, rt_fixdelete, rt); } /* * Remove any external references we may have. * This might result in another rtentry being freed if * we held its last reference. */ if (rt->rt_gwroute) { rt = rt->rt_gwroute; RTFREE(rt); (rt = (struct rtentry *)rn)->rt_gwroute = 0; } /* * NB: RTF_UP must be set during the search above, * because we might delete the last ref, causing * rt to get freed prematurely. * eh? then why not just add a reference? * I'm not sure how RTF_UP helps matters. (JRE) */ rt->rt_flags &= ~RTF_UP; /* * give the protocol a chance to keep things in sync. */ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0)); /* * one more rtentry floating around that is not * linked to the routing table. */ rttrash++; /* * If the caller wants it, then it can have it, * but it's up to it to free the rtentry as we won't be * doing it. */ if (ret_nrt) *ret_nrt = rt; else if (rt->rt_refcnt <= 0) { rt->rt_refcnt++; /* make a 1->0 transition */ rtfree(rt); } break; case RTM_RESOLVE: if (ret_nrt == 0 || (rt = *ret_nrt) == 0) senderr(EINVAL); ifa = rt->rt_ifa; flags = rt->rt_flags & ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC); flags |= RTF_WASCLONED; gateway = rt->rt_gateway; if ((netmask = rt->rt_genmask) == 0) flags |= RTF_HOST; goto makeroute; case RTM_ADD: if ((flags & RTF_GATEWAY) && !gateway) panic("rtrequest: GATEWAY but no gateway"); if ((ifa = ifa_ifwithroute(flags, dst, gateway)) == 0) senderr(ENETUNREACH); makeroute: R_Malloc(rt, struct rtentry *, sizeof(*rt)); if (rt == 0) senderr(ENOBUFS); Bzero(rt, sizeof(*rt)); rt->rt_flags = RTF_UP | flags; /* * Add the gateway. Possibly re-malloc-ing the storage for it * also add the rt_gwroute if possible. */ if ((error = rt_setgate(rt, dst, gateway)) != 0) { Free(rt); senderr(error); } /* * point to the (possibly newly malloc'd) dest address. */ ndst = rt_key(rt); /* * make sure it contains the value we want (masked if needed). */ if (netmask) { rt_maskedcopy(dst, ndst, netmask); } else Bcopy(dst, ndst, dst->sa_len); /* * Note that we now have a reference to the ifa. * This moved from below so that rnh->rnh_addaddr() can * examine the ifa and ifa->ifa_ifp if it so desires. */ ifa->ifa_refcnt++; rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask, rnh, rt->rt_nodes); if (rn == 0) { struct rtentry *rt2; /* * Uh-oh, we already have one of these in the tree. * We do a special hack: if the route that's already * there was generated by the protocol-cloning * mechanism, then we just blow it away and retry * the insertion of the new one. */ rt2 = rtalloc1(dst, 0, RTF_PRCLONING); if (rt2 && rt2->rt_parent) { rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt2), rt2->rt_gateway, rt_mask(rt2), rt2->rt_flags, 0); RTFREE(rt2); rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask, rnh, rt->rt_nodes); } else if (rt2) { /* undo the extra ref we got */ RTFREE(rt2); } } /* * If it still failed to go into the tree, * then un-make it (this should be a function) */ if (rn == 0) { if (rt->rt_gwroute) rtfree(rt->rt_gwroute); if (rt->rt_ifa) { IFAFREE(rt->rt_ifa); } Free(rt_key(rt)); Free(rt); senderr(EEXIST); } rt->rt_parent = 0; /* * If we got here from RESOLVE, then we are cloning * so clone the rest, and note that we * are a clone (and increment the parent's references) */ if (req == RTM_RESOLVE) { rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */ if ((*ret_nrt)->rt_flags & RTF_PRCLONING) { rt->rt_parent = (*ret_nrt); (*ret_nrt)->rt_refcnt++; } } /* * if this protocol has something to add to this then * allow it to do that as well. */ if (ifa->ifa_rtrequest) ifa->ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : 0)); /* * We repeat the same procedure from rt_setgate() here because * it doesn't fire when we call it there because the node * hasn't been added to the tree yet. */ if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) { struct rtfc_arg arg; arg.rnh = rnh; arg.rt0 = rt; rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), rt_fixchange, &arg); } /* * actually return a resultant rtentry and * give the caller a single reference. */ if (ret_nrt) { *ret_nrt = rt; rt->rt_refcnt++; } break; } bad: splx(s); return (error); } /* * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family'' * (i.e., the routes related to it by the operation of cloning). This * routine is iterated over all potential former-child-routes by way of * rnh->rnh_walktree_from() above, and those that actually are children of * the late parent (passed in as VP here) are themselves deleted. */ static int rt_fixdelete(rn, vp) struct radix_node *rn; void *vp; { struct rtentry *rt = (struct rtentry *)rn; struct rtentry *rt0 = vp; if (rt->rt_parent == rt0 && !(rt->rt_flags & RTF_PINNED)) { return rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); } return 0; } /* * This routine is called from rt_setgate() to do the analogous thing for * adds and changes. There is the added complication in this case of a * middle insert; i.e., insertion of a new network route between an older * network route and (cloned) host routes. For this reason, a simple check * of rt->rt_parent is insufficient; each candidate route must be tested * against the (mask, value) of the new route (passed as before in vp) * to see if the new route matches it. Unfortunately, this has the obnoxious * property of also triggering for insertion /above/ a pre-existing network * route and clones. Sigh. This may be fixed some day. * * XXX - it may be possible to do fixdelete() for changes and reserve this * routine just for adds. I'm not sure why I thought it was necessary to do * changes this way. */ #ifdef DEBUG static int rtfcdebug = 0; #endif static int rt_fixchange(rn, vp) struct radix_node *rn; void *vp; { struct rtentry *rt = (struct rtentry *)rn; struct rtfc_arg *ap = vp; struct rtentry *rt0 = ap->rt0; struct radix_node_head *rnh = ap->rnh; u_char *xk1, *xm1, *xk2; int i, len; #ifdef DEBUG if (rtfcdebug) printf("rt_fixchange: rt %p, rt0 %p\n", rt, rt0); #endif if (!rt->rt_parent || (rt->rt_flags & RTF_PINNED)) { #ifdef DEBUG if(rtfcdebug) printf("no parent or pinned\n"); #endif return 0; } if (rt->rt_parent == rt0) { #ifdef DEBUG if(rtfcdebug) printf("parent match\n"); #endif return rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); } /* * There probably is a function somewhere which does this... * if not, there should be. */ len = imin(((struct sockaddr *)rt_key(rt0))->sa_len, ((struct sockaddr *)rt_key(rt))->sa_len); xk1 = (u_char *)rt_key(rt0); xm1 = (u_char *)rt_mask(rt0); xk2 = (u_char *)rt_key(rt); for (i = rnh->rnh_treetop->rn_off; i < len; i++) { if ((xk2[i] & xm1[i]) != xk1[i]) { #ifdef DEBUG if(rtfcdebug) printf("no match\n"); #endif return 0; } } /* * OK, this node is a clone, and matches the node currently being * changed/added under the node's mask. So, get rid of it. */ #ifdef DEBUG if(rtfcdebug) printf("deleting\n"); #endif return rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); } int rt_setgate(rt0, dst, gate) struct rtentry *rt0; struct sockaddr *dst, *gate; { caddr_t new, old; int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len); register struct rtentry *rt = rt0; struct radix_node_head *rnh = rt_tables[dst->sa_family]; /* * A host route with the destination equal to the gateway * will interfere with keeping LLINFO in the routing * table, so disallow it. */ if (((rt0->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) == (RTF_HOST|RTF_GATEWAY)) && (dst->sa_len == gate->sa_len) && (bcmp(dst, gate, dst->sa_len) == 0)) { /* * The route might already exist if this is an RTM_CHANGE * or a routing redirect, so try to delete it. */ if (rt_key(rt0)) rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt0), rt0->rt_gateway, rt_mask(rt0), rt0->rt_flags, 0); return EADDRNOTAVAIL; } /* * Both dst and gateway are stored in the same malloc'd chunk * (If I ever get my hands on....) * if we need to malloc a new chunk, then keep the old one around * till we don't need it any more. */ if (rt->rt_gateway == 0 || glen > ROUNDUP(rt->rt_gateway->sa_len)) { old = (caddr_t)rt_key(rt); R_Malloc(new, caddr_t, dlen + glen); if (new == 0) return ENOBUFS; rt->rt_nodes->rn_key = new; } else { /* * otherwise just overwrite the old one */ new = rt->rt_nodes->rn_key; old = 0; } /* * copy the new gateway value into the memory chunk */ Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen); /* * if we are replacing the chunk (or it's new) we need to * replace the dst as well */ if (old) { Bcopy(dst, new, dlen); Free(old); } /* * If there is already a gwroute, it's now almost definitly wrong * so drop it. */ if (rt->rt_gwroute) { rt = rt->rt_gwroute; RTFREE(rt); rt = rt0; rt->rt_gwroute = 0; } /* * Cloning loop avoidance: * In the presence of protocol-cloning and bad configuration, * it is possible to get stuck in bottomless mutual recursion * (rtrequest rt_setgate rtalloc1). We avoid this by not allowing * protocol-cloning to operate for gateways (which is probably the * correct choice anyway), and avoid the resulting reference loops * by disallowing any route to run through itself as a gateway. * This is obviously mandatory when we get rt->rt_output(). */ if (rt->rt_flags & RTF_GATEWAY) { rt->rt_gwroute = rtalloc1(gate, 1, RTF_PRCLONING); if (rt->rt_gwroute == rt) { RTFREE(rt->rt_gwroute); rt->rt_gwroute = 0; return EDQUOT; /* failure */ } } /* * This isn't going to do anything useful for host routes, so * don't bother. Also make sure we have a reasonable mask * (we don't yet have one during adds). */ if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) { struct rtfc_arg arg; arg.rnh = rnh; arg.rt0 = rt; rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), rt_fixchange, &arg); } return 0; } static void rt_maskedcopy(src, dst, netmask) struct sockaddr *src, *dst, *netmask; { register u_char *cp1 = (u_char *)src; register u_char *cp2 = (u_char *)dst; register u_char *cp3 = (u_char *)netmask; u_char *cplim = cp2 + *cp3; u_char *cplim2 = cp2 + *cp1; *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ cp3 += 2; if (cplim > cplim2) cplim = cplim2; while (cp2 < cplim) *cp2++ = *cp1++ & *cp3++; if (cp2 < cplim2) bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); } /* * Set up a routing table entry, normally * for an interface. */ int rtinit(ifa, cmd, flags) register struct ifaddr *ifa; int cmd, flags; { register struct rtentry *rt; register struct sockaddr *dst; register struct sockaddr *deldst; struct mbuf *m = 0; struct rtentry *nrt = 0; int error; dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; /* * If it's a delete, check that if it exists, it's on the correct * interface or we might scrub a route to another ifa which would * be confusing at best and possibly worse. */ if (cmd == RTM_DELETE) { /* * It's a delete, so it should already exist.. * If it's a net, mask off the host bits * (Assuming we have a mask) */ if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { m = m_get(M_DONTWAIT, MT_SONAME); if (m == NULL) return(ENOBUFS); deldst = mtod(m, struct sockaddr *); rt_maskedcopy(dst, deldst, ifa->ifa_netmask); dst = deldst; } /* * Get an rtentry that is in the routing tree and * contains the correct info. (if this fails, can't get there). * We set "report" to FALSE so that if it doesn't exist, * it doesn't report an error or clone a route, etc. etc. */ rt = rtalloc1(dst, 0, 0UL); if (rt) { /* * Ok so we found the rtentry. it has an extra reference * for us at this stage. we won't need that so * lop that off now. */ rt->rt_refcnt--; if (rt->rt_ifa != ifa) { /* * If the interface in the rtentry doesn't match * the interface we are using, then we don't * want to delete it, so return an error. * This seems to be the only point of * this whole RTM_DELETE clause. */ if (m) (void) m_free(m); return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); } } /* XXX */ #if 0 else { /* * One would think that as we are deleting, and we know * it doesn't exist, we could just return at this point * with an "ELSE" clause, but apparently not.. */ return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); } #endif } /* * Do the actual request */ error = rtrequest(cmd, dst, ifa->ifa_addr, ifa->ifa_netmask, flags | ifa->ifa_flags, &nrt); if (m) (void) m_free(m); /* * If we are deleting, and we found an entry, then * it's been removed from the tree.. now throw it away. */ if (cmd == RTM_DELETE && error == 0 && (rt = nrt)) { /* * notify any listenning routing agents of the change */ rt_newaddrmsg(cmd, ifa, error, nrt); if (rt->rt_refcnt <= 0) { rt->rt_refcnt++; /* need a 1->0 transition to free */ rtfree(rt); } } /* * We are adding, and we have a returned routing entry. * We need to sanity check the result. */ if (cmd == RTM_ADD && error == 0 && (rt = nrt)) { /* * We just wanted to add it.. we don't actually need a reference */ rt->rt_refcnt--; /* * If it came back with an unexpected interface, then it must * have already existed or something. (XXX) */ if (rt->rt_ifa != ifa) { if (!(rt->rt_ifa->ifa_ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK))) printf("rtinit: wrong ifa (%p) was (%p)\n", ifa, rt->rt_ifa); /* * Ask that the protocol in question * remove anything it has associated with * this route and ifaddr. */ if (rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0)); /* * Remove the reference to its ifaddr. */ IFAFREE(rt->rt_ifa); /* * And substitute in references to the ifaddr * we are adding. */ rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; rt->rt_rmx.rmx_mtu = ifa->ifa_ifp->if_mtu; /*XXX*/ ifa->ifa_refcnt++; /* * Now ask the protocol to check if it needs * any special processing in its new form. */ if (ifa->ifa_rtrequest) ifa->ifa_rtrequest(RTM_ADD, rt, SA(0)); } /* * notify any listenning routing agents of the change */ rt_newaddrmsg(cmd, ifa, error, nrt); } return (error); } SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, route_init, 0); Index: head/sys/net/route.h =================================================================== --- head/sys/net/route.h (revision 54349) +++ head/sys/net/route.h (revision 54350) @@ -1,298 +1,297 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.h 8.3 (Berkeley) 4/19/94 * $FreeBSD$ */ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ /* * Kernel resident routing tables. * * The routing tables are initialized when interface addresses * are set by making entries for all directly connected interfaces. */ /* * A route consists of a destination address and a reference * to a routing entry. These are often held by protocols * in their control blocks, e.g. inpcb. */ struct route { struct rtentry *ro_rt; struct sockaddr ro_dst; }; /* * These numbers are used by reliable protocols for determining * retransmission behavior and are included in the routing structure. */ struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ u_long rmx_mtu; /* MTU for this path */ u_long rmx_hopcount; /* max hops expected */ u_long rmx_expire; /* lifetime for route, e.g. redirect */ u_long rmx_recvpipe; /* inbound delay-bandwidth product */ u_long rmx_sendpipe; /* outbound delay-bandwidth product */ u_long rmx_ssthresh; /* outbound gateway buffer limit */ u_long rmx_rtt; /* estimated round trip time */ u_long rmx_rttvar; /* estimated rtt variance */ u_long rmx_pksent; /* packets sent using this route */ u_long rmx_filler[4]; /* will be used for T/TCP later */ }; /* * rmx_rtt and rmx_rttvar are stored as microseconds; * RTTTOPRHZ(rtt) converts to a value suitable for use * by a protocol slowtimo counter. */ #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ #define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ)) /* * XXX kernel function pointer `rt_output' is visible to applications. */ struct mbuf; /* * We distinguish between routes to hosts and routes to networks, * preferring the former if available. For each route we infer * the interface to use from the gateway address supplied when * the route was entered. Routes that forward packets through * gateways are marked so that the output routines know to address the * gateway rather than the ultimate destination. */ #ifndef RNF_NORMAL #include #endif struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ #define rt_key(r) ((struct sockaddr *)((r)->rt_nodes->rn_key)) #define rt_mask(r) ((struct sockaddr *)((r)->rt_nodes->rn_mask)) struct sockaddr *rt_gateway; /* value */ long rt_refcnt; /* # held references */ u_long rt_flags; /* up/down?, host/net */ struct ifnet *rt_ifp; /* the answer: interface to use */ struct ifaddr *rt_ifa; /* the answer: interface to use */ struct sockaddr *rt_genmask; /* for generation of cloned routes */ caddr_t rt_llinfo; /* pointer to link level info cache */ struct rt_metrics rt_rmx; /* metrics used by rx'ing protocols */ struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ int (*rt_output) __P((struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *)); /* output routine for this (rt,if) */ struct rtentry *rt_parent; /* cloning parent of this route */ void *rt_filler2; /* more filler */ }; /* * Following structure necessary for 4.3 compatibility; * We should eventually move it to a compat file. */ struct ortentry { u_long rt_hash; /* to speed lookups */ struct sockaddr rt_dst; /* key */ struct sockaddr rt_gateway; /* value */ short rt_flags; /* up/down?, host/net */ short rt_refcnt; /* # held references */ u_long rt_use; /* raw # packets forwarded */ struct ifnet *rt_ifp; /* the answer: interface to use */ }; #define rt_use rt_rmx.rmx_pksent #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ #define RTF_HOST 0x4 /* host entry (net otherwise) */ #define RTF_REJECT 0x8 /* host or net unreachable */ #define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */ #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ #define RTF_DONE 0x40 /* message confirmed */ /* 0x80 unused */ #define RTF_CLONING 0x100 /* generate new routes on use */ #define RTF_XRESOLVE 0x200 /* external daemon resolves name */ #define RTF_LLINFO 0x400 /* generated by link layer (e.g. ARP) */ #define RTF_STATIC 0x800 /* manually added */ #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ #define RTF_PROTO2 0x4000 /* protocol specific routing flag */ #define RTF_PROTO1 0x8000 /* protocol specific routing flag */ #define RTF_PRCLONING 0x10000 /* protocol requires cloning */ #define RTF_WASCLONED 0x20000 /* route generated through cloning */ #define RTF_PROTO3 0x40000 /* protocol specific routing flag */ /* 0x80000 unused */ #define RTF_PINNED 0x100000 /* future use */ #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ /* 0x1000000 and up unassigned */ /* * Routing statistics. */ struct rtstat { short rts_badredirect; /* bogus redirect calls */ short rts_dynamic; /* routes created by redirects */ short rts_newgateway; /* routes modified by redirects */ short rts_unreach; /* lookups which failed */ short rts_wildcard; /* lookups satisfied by a wildcard */ }; /* * Structures for routing messages. */ struct rt_msghdr { u_short rtm_msglen; /* to skip over non-understood messages */ u_char rtm_version; /* future binary compatibility */ u_char rtm_type; /* message type */ u_short rtm_index; /* index for associated ifp */ int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ int rtm_addrs; /* bitmask identifying sockaddrs in msg */ pid_t rtm_pid; /* identify sender */ int rtm_seq; /* for sender to identify action */ int rtm_errno; /* why failed */ int rtm_use; /* from rtentry */ u_long rtm_inits; /* which metrics we are initializing */ struct rt_metrics rtm_rmx; /* metrics themselves */ }; #define RTM_VERSION 5 /* Up the ante and ignore older versions */ /* * Message types. */ #define RTM_ADD 0x1 /* Add Route */ #define RTM_DELETE 0x2 /* Delete Route */ #define RTM_CHANGE 0x3 /* Change Metrics or flags */ #define RTM_GET 0x4 /* Report Metrics */ #define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ #define RTM_REDIRECT 0x6 /* Told to use different route */ #define RTM_MISS 0x7 /* Lookup failed on this address */ #define RTM_LOCK 0x8 /* fix specified metrics */ #define RTM_OLDADD 0x9 /* caused by SIOCADDRT */ #define RTM_OLDDEL 0xa /* caused by SIOCDELRT */ #define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */ #define RTM_NEWADDR 0xc /* address being added to iface */ #define RTM_DELADDR 0xd /* address being removed from iface */ #define RTM_IFINFO 0xe /* iface going up/down etc. */ #define RTM_NEWMADDR 0xf /* mcast group membership being added to if */ #define RTM_DELMADDR 0x10 /* mcast group membership being deleted */ /* * Bitmask values for rtm_inits and rmx_locks. */ #define RTV_MTU 0x1 /* init or lock _mtu */ #define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ #define RTV_EXPIRE 0x4 /* init or lock _expire */ #define RTV_RPIPE 0x8 /* init or lock _recvpipe */ #define RTV_SPIPE 0x10 /* init or lock _sendpipe */ #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ /* * Bitmask values for rtm_addrs. */ #define RTA_DST 0x1 /* destination sockaddr present */ #define RTA_GATEWAY 0x2 /* gateway sockaddr present */ #define RTA_NETMASK 0x4 /* netmask sockaddr present */ #define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ #define RTA_IFP 0x10 /* interface name sockaddr present */ #define RTA_IFA 0x20 /* interface addr sockaddr present */ #define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ #define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ /* * Index offsets for sockaddr array for alternate internal encoding. */ #define RTAX_DST 0 /* destination sockaddr present */ #define RTAX_GATEWAY 1 /* gateway sockaddr present */ #define RTAX_NETMASK 2 /* netmask sockaddr present */ #define RTAX_GENMASK 3 /* cloning mask sockaddr present */ #define RTAX_IFP 4 /* interface name sockaddr present */ #define RTAX_IFA 5 /* interface addr sockaddr present */ #define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ #define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ #define RTAX_MAX 8 /* size of array to allocate */ struct rt_addrinfo { int rti_addrs; struct sockaddr *rti_info[RTAX_MAX]; }; struct route_cb { int ip_count; int ip6_count; int ipx_count; int ns_count; int iso_count; int any_count; }; #ifdef KERNEL #define RTFREE(rt) \ do { \ if ((rt)->rt_refcnt <= 1) \ rtfree(rt); \ else \ (rt)->rt_refcnt--; \ } while (0) extern struct route_cb route_cb; extern struct radix_node_head *rt_tables[AF_MAX+1]; struct ifmultiaddr; struct proc; void route_init __P((void)); void rt_ifmsg __P((struct ifnet *)); void rt_missmsg __P((int, struct rt_addrinfo *, int, int)); void rt_newaddrmsg __P((int, struct ifaddr *, int, struct rtentry *)); void rt_newmaddrmsg __P((int, struct ifmultiaddr *)); int rt_setgate __P((struct rtentry *, struct sockaddr *, struct sockaddr *)); void rtalloc __P((struct route *)); void rtalloc_ign __P((struct route *, u_long)); -void rtcalloc __P((struct route *)); /* for INET6 */ struct rtentry * rtalloc1 __P((struct sockaddr *, int, u_long)); void rtfree __P((struct rtentry *)); int rtinit __P((struct ifaddr *, int, int)); int rtioctl __P((int, caddr_t, struct proc *)); void rtredirect __P((struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, struct rtentry **)); int rtrequest __P((int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **)); #endif #endif Index: head/sys/netinet6/frag6.c =================================================================== --- head/sys/netinet6/frag6.c (revision 54349) +++ head/sys/netinet6/frag6.c (revision 54350) @@ -1,573 +1,573 @@ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Define it to get a correct behavior on per-interface statistics. * You will need to perform an extra routing table lookup, per fragment, * to do it. This may, or may not be, a performance hit. */ #define IN6_IFSTAT_STRICT static void frag6_enq __P((struct ip6asfrag *, struct ip6asfrag *)); static void frag6_deq __P((struct ip6asfrag *)); static void frag6_insque __P((struct ip6q *, struct ip6q *)); static void frag6_remque __P((struct ip6q *)); static void frag6_freef __P((struct ip6q *)); int frag6_doing_reass; u_int frag6_nfragpackets; struct ip6q ip6q; /* ip6 reassemble queue */ #if !defined(M_FTABLE) MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header"); #endif /* * Initialise reassembly queue and fragment identifier. */ void frag6_init() { struct timeval tv; /* * in many cases, random() here does NOT return random number * as initialization during bootstrap time occur in fixed order. */ microtime(&tv); ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q; ip6_id = random() ^ tv.tv_usec; } /* * Fragment input */ int frag6_input(mp, offp, proto) struct mbuf **mp; int *offp, proto; { struct mbuf *m = *mp, *t; struct ip6_hdr *ip6; struct ip6_frag *ip6f; struct ip6q *q6; struct ip6asfrag *af6, *ip6af; int offset = *offp, nxt, i, next; int first_frag = 0; u_short fragoff, frgpartlen; struct ifnet *dstifp; #ifdef IN6_IFSTAT_STRICT static struct route_in6 ro; struct sockaddr_in6 *dst; #endif IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); dstifp = NULL; #ifdef IN6_IFSTAT_STRICT /* find the destination interface of the packet. */ dst = (struct sockaddr_in6 *)&ro.ro_dst; if (ro.ro_rt && ((ro.ro_rt->rt_flags & RTF_UP) == 0 || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) { RTFREE(ro.ro_rt); ro.ro_rt = (struct rtentry *)0; } if (ro.ro_rt == NULL) { bzero(dst, sizeof(*dst)); dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_addr = ip6->ip6_dst; } - rtcalloc((struct route *)&ro); + rtalloc((struct route *)&ro); if (ro.ro_rt != NULL && ro.ro_rt->rt_ifa != NULL) dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp; #else /* we are violating the spec, this is not the destination interface */ if ((m->m_flags & M_PKTHDR) != 0) dstifp = m->m_pkthdr.rcvif; #endif /* jumbo payload can't contain a fragment header */ if (ip6->ip6_plen == 0) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); in6_ifstat_inc(dstifp, ifs6_reass_fail); return IPPROTO_DONE; } /* * check whether fragment packet's fragment length is * multiple of 8 octets. * sizeof(struct ip6_frag) == 8 * sizeof(struct ip6_hdr) = 40 */ if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); in6_ifstat_inc(dstifp, ifs6_reass_fail); return IPPROTO_DONE; } ip6stat.ip6s_fragments++; in6_ifstat_inc(dstifp, ifs6_reass_reqd); /* * Presence of header sizes in mbufs * would confuse code below. */ offset += sizeof(struct ip6_frag); m->m_data += offset; m->m_len -= offset; for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next) if (ip6f->ip6f_ident == q6->ip6q_ident && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)) break; if (q6 == &ip6q) { /* * the first fragment to arrive, create a reassembly queue. */ first_frag = 1; frag6_nfragpackets++; /* * Enforce upper bound on number of fragmented packets * for which we attempt reassembly; * If maxfrag is 0, never accept fragments. * If maxfrag is -1, accept all fragments without limitation. */ if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets) { ip6stat.ip6s_fragoverflow++; in6_ifstat_inc(dstifp, ifs6_reass_fail); frag6_freef(ip6q.ip6q_prev); } q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE, M_DONTWAIT); if (q6 == NULL) goto dropfrag; frag6_insque(q6, &ip6q); q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; q6->ip6q_ident = ip6f->ip6f_ident; q6->ip6q_arrive = 0; /* Is it used anywhere? */ q6->ip6q_ttl = IPV6_FRAGTTL; q6->ip6q_src = ip6->ip6_src; q6->ip6q_dst = ip6->ip6_dst; q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ } /* * If it's the 1st fragment, record the length of the * unfragmentable part and the next header of the fragment header. */ fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); if (fragoff == 0) { q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag); q6->ip6q_nxt = ip6f->ip6f_nxt; } /* * Check that the reassembled packet would not exceed 65535 bytes * in size. * If it would exceed, discard the fragment and return an ICMP error. */ frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; if (q6->ip6q_unfrglen >= 0) { /* The 1st fragment has already arrived. */ if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { m->m_data -= offset; m->m_len += offset; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset - sizeof(struct ip6_frag) + 2); return(IPPROTO_DONE); } } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { m->m_data -= offset; m->m_len += offset; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset - sizeof(struct ip6_frag) + 2); return(IPPROTO_DONE); } /* * If it's the first fragment, do the above check for each * fragment already stored in the reassembly queue. */ if (fragoff == 0) { struct ip6asfrag *af6dwn; for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6dwn) { af6dwn = af6->ip6af_down; if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen > IPV6_MAXPACKET) { struct mbuf *merr = IP6_REASS_MBUF(af6); struct ip6_hdr *ip6err; int erroff = af6->ip6af_offset; /* dequeue the fragment. */ frag6_deq(af6); /* adjust pointer. */ merr->m_data -= af6->ip6af_offset; merr->m_len += af6->ip6af_offset; ip6err = mtod(merr, struct ip6_hdr *); /* * Restore source and destination addresses * in the erroneous IPv6 header. */ ip6err->ip6_src = q6->ip6q_src; ip6err->ip6_dst = q6->ip6q_dst; icmp6_error(merr, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff - sizeof(struct ip6_frag) + 2); } } } /* Override the IPv6 header */ ip6af = (struct ip6asfrag *)ip6; ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; ip6af->ip6af_off = fragoff; ip6af->ip6af_frglen = frgpartlen; ip6af->ip6af_offset = offset; IP6_REASS_MBUF(ip6af) = m; if (first_frag) { af6 = (struct ip6asfrag *)q6; goto insert; } /* * Find a segment which begins after this one does. */ for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6->ip6af_down) if (af6->ip6af_off > ip6af->ip6af_off) break; /* * If the incoming framgent overlaps some existing fragments in * the reassembly queue, drop it, since it is dangerous to override * existing fragments from a security point of view. */ if (af6->ip6af_up != (struct ip6asfrag *)q6) { i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen - ip6af->ip6af_off; if (i > 0) { log(LOG_ERR, "%d bytes of a fragment from %s " "overlaps the previous fragment\n", i, ip6_sprintf(&q6->ip6q_src)); goto dropfrag; } } if (af6 != (struct ip6asfrag *)q6) { i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; if (i > 0) { log(LOG_ERR, "%d bytes of a fragment from %s " "overlaps the succeeding fragment", i, ip6_sprintf(&q6->ip6q_src)); goto dropfrag; } } insert: /* * Stick new segment in its place; * check for complete reassembly. * Move to front of packet queue, as we are * the most recently active fragmented packet. */ frag6_enq(ip6af, af6->ip6af_up); next = 0; for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6->ip6af_down) { if (af6->ip6af_off != next) { frag6_doing_reass = 0; return IPPROTO_DONE; } next += af6->ip6af_frglen; } if (af6->ip6af_up->ip6af_mff) { frag6_doing_reass = 0; return IPPROTO_DONE; } /* * Reassembly is complete; concatenate fragments. */ ip6af = q6->ip6q_down; t = m = IP6_REASS_MBUF(ip6af); af6 = ip6af->ip6af_down; while (af6 != (struct ip6asfrag *)q6) { while (t->m_next) t = t->m_next; t->m_next = IP6_REASS_MBUF(af6); af6 = af6->ip6af_down; } /* adjust offset to point where the original next header starts */ offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); ip6 = (struct ip6_hdr *)ip6af; ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); ip6->ip6_src = q6->ip6q_src; ip6->ip6_dst = q6->ip6q_dst; nxt = q6->ip6q_nxt; /* * Delete frag6 header with as a few cost as possible. */ if (offset < m->m_len) ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag), offset); else { ovbcopy(mtod(m, caddr_t), (caddr_t)ip6 + offset, m->m_len); m->m_data -= sizeof(struct ip6_frag); } m->m_data -= offset; m->m_len += offset; /* * Store NXT to the original. */ { char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */ *prvnxtp = nxt; } frag6_remque(q6); free(q6, M_FTABLE); frag6_nfragpackets--; if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ int plen = 0; for (t = m; t; t = t->m_next) plen += t->m_len; m->m_pkthdr.len = plen; } ip6stat.ip6s_reassembled++; in6_ifstat_inc(dstifp, ifs6_reass_ok); /* * Tell launch routine the next header */ *mp = m; *offp = offset; frag6_doing_reass = 0; return nxt; dropfrag: in6_ifstat_inc(dstifp, ifs6_reass_fail); ip6stat.ip6s_fragdropped++; m_freem(m); return IPPROTO_DONE; } /* * Free a fragment reassembly header and all * associated datagrams. */ void frag6_freef(q6) struct ip6q *q6; { struct ip6asfrag *af6, *down6; for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = down6) { struct mbuf *m = IP6_REASS_MBUF(af6); down6 = af6->ip6af_down; frag6_deq(af6); /* * Return ICMP time exceeded error for the 1st fragment. * Just free other fragments. */ if (af6->ip6af_off == 0) { struct ip6_hdr *ip6; /* adjust pointer */ m->m_data -= af6->ip6af_offset; m->m_len += af6->ip6af_offset; ip6 = mtod(m, struct ip6_hdr *); /* restoure source and destination addresses */ ip6->ip6_src = q6->ip6q_src; ip6->ip6_dst = q6->ip6q_dst; icmp6_error(m, ICMP6_TIME_EXCEEDED, ICMP6_TIME_EXCEED_REASSEMBLY, 0); } else m_freem(m); } frag6_remque(q6); free(q6, M_FTABLE); frag6_nfragpackets--; } /* * Put an ip fragment on a reassembly chain. * Like insque, but pointers in middle of structure. */ void frag6_enq(af6, up6) struct ip6asfrag *af6, *up6; { af6->ip6af_up = up6; af6->ip6af_down = up6->ip6af_down; up6->ip6af_down->ip6af_up = af6; up6->ip6af_down = af6; } /* * To frag6_enq as remque is to insque. */ void frag6_deq(af6) struct ip6asfrag *af6; { af6->ip6af_up->ip6af_down = af6->ip6af_down; af6->ip6af_down->ip6af_up = af6->ip6af_up; } void frag6_insque(new, old) struct ip6q *new, *old; { new->ip6q_prev = old; new->ip6q_next = old->ip6q_next; old->ip6q_next->ip6q_prev= new; old->ip6q_next = new; } void frag6_remque(p6) struct ip6q *p6; { p6->ip6q_prev->ip6q_next = p6->ip6q_next; p6->ip6q_next->ip6q_prev = p6->ip6q_prev; } /* * IP timer processing; * if a timer expires on a reassembly * queue, discard it. */ void frag6_slowtimo() { struct ip6q *q6; int s = splnet(); frag6_doing_reass = 1; q6 = ip6q.ip6q_next; if (q6) while (q6 != &ip6q) { --q6->ip6q_ttl; q6 = q6->ip6q_next; if (q6->ip6q_prev->ip6q_ttl == 0) { ip6stat.ip6s_fragtimeout++; /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ frag6_freef(q6->ip6q_prev); } } /* * If we are over the maximum number of fragments * (due to the limit being lowered), drain off * enough to get down to the new limit. */ while (frag6_nfragpackets > (u_int)ip6_maxfragpackets) { ip6stat.ip6s_fragoverflow++; /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ frag6_freef(ip6q.ip6q_prev); } frag6_doing_reass = 0; splx(s); } /* * Drain off all datagram fragments. */ void frag6_drain() { if (frag6_doing_reass) return; while (ip6q.ip6q_next != &ip6q) { ip6stat.ip6s_fragdropped++; /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ frag6_freef(ip6q.ip6q_next); } } Index: head/sys/netinet6/in6_pcb.c =================================================================== --- head/sys/netinet6/in6_pcb.c (revision 54349) +++ head/sys/netinet6/in6_pcb.c (revision 54350) @@ -1,1123 +1,1123 @@ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 * $FreeBSD$ */ #include "opt_key.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "faith.h" #ifdef IPSEC #include #include #include #ifdef KEY_DEBUG #include #else #define DPRINTF(lev,arg) #define DDO(lev, stmt) #define DP(x, y, z) #endif /* KEY_DEBUG */ #endif /* IPSEC */ struct in6_addr zeroin6_addr; int in6_pcbbind(inp, nam, p) register struct inpcb *inp; struct sockaddr *nam; struct proc *p; { struct socket *so = inp->inp_socket; unsigned short *lastport; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); int error; if (!in6_ifaddr) /* XXX broken! */ return (EADDRNOTAVAIL); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) return(EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = 1; if (nam) { sin6 = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof(*sin6)) return(EINVAL); /* * family check. */ if (nam->sa_family != AF_INET6) return(EAFNOSUPPORT); /* * If the scope of the destination is link-local, embed the * interface index in the address. */ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) { /* XXX boundary check is assumed to be already done. */ /* XXX sin6_scope_id is weaker than advanced-api. */ struct in6_pktinfo *pi; if (inp->in6p_outputopts && (pi = inp->in6p_outputopts->ip6po_pktinfo) && pi->ipi6_ifindex) { sin6->sin6_addr.s6_addr16[1] = htons(pi->ipi6_ifindex); } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && inp->in6p_moptions && inp->in6p_moptions->im6o_multicast_ifp) { sin6->sin6_addr.s6_addr16[1] = htons(inp->in6p_moptions->im6o_multicast_ifp->if_index); } else if (sin6->sin6_scope_id) { /* boundary check */ if (sin6->sin6_scope_id < 0 || if_index < sin6->sin6_scope_id) { return ENXIO; /* XXX EINVAL? */ } sin6->sin6_addr.s6_addr16[1] = htons(sin6->sin6_scope_id & 0xffff);/*XXX*/ /* this must be cleared for ifa_ifwithaddr() */ sin6->sin6_scope_id = 0; } } lport = sin6->sin6_port; if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow compepte duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if (so->so_options & SO_REUSEADDR) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct ifaddr *ia = NULL; sin6->sin6_port = 0; /* yech... */ if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0) return(EADDRNOTAVAIL); /* * XXX: bind to an anycast address might accidentally * cause sending a packet with anycast source address. */ if (ia && ((struct in6_ifaddr *)ia)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { return(EADDRNOTAVAIL); } } if (lport) { struct inpcb *t; /* GROSS */ if (ntohs(lport) < IPV6PORT_RESERVED && p && suser_xxx(0, p, PRISON_ROOT)) return(EACCES); if (so->so_cred->cr_uid != 0 && !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { t = in6_pcblookup_local(inp->inp_pcbinfo, &sin6->sin6_addr, lport, INPLOOKUP_WILDCARD); if (t && (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || (t->inp_socket->so_options & SO_REUSEPORT) == 0) && (so->so_cred->cr_uid != t->inp_socket->so_cred->cr_uid)) return (EADDRINUSE); } t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, wild); if (t && (reuseport & t->inp_socket->so_options) == 0) return(EADDRINUSE); } inp->in6p_laddr = sin6->sin6_addr; } if (lport == 0) { ushort first, last; int count; inp->inp_flags |= INP_ANONPORT; if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { if (p && (error = suser_xxx(0, p, PRISON_ROOT))) return error; first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ lastport = &pcbinfo->lastlow; } else { first = ipport_firstauto; /* sysctl */ last = ipport_lastauto; lastport = &pcbinfo->lastport; } /* * Simple check to ensure all ports are not used up causing * a deadlock here. * * We split the two cases (up and down) so that the direction * is not being tested on each round of the loop. */ if (first > last) { /* * counting down */ count = first - last; do { if (count-- < 0) { /* completely used? */ /* * Undo any address bind that may have * occurred above. */ inp->in6p_laddr = in6addr_any; return (EAGAIN); } --*lastport; if (*lastport > first || *lastport < last) *lastport = first; lport = htons(*lastport); } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport, wild)); } else { /* * counting up */ count = last - first; do { if (count-- < 0) { /* completely used? */ /* * Undo any address bind that may have * occurred above. */ inp->in6p_laddr = in6addr_any; return (EAGAIN); } ++*lastport; if (*lastport < first || *lastport > last) *lastport = first; lport = htons(*lastport); } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport, wild)); } } inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } inp->in6p_flowinfo = sin6 ? sin6->sin6_flowinfo : 0; /*XXX*/ return(0); } /* * Transform old in6_pcbconnect() into an inner subroutine for new * in6_pcbconnect(): Do some validity-checking on the remote * address (in mbuf 'nam') and then determine local host address * (i.e., which interface) to use to access that remote host. * * This preserves definition of in6_pcbconnect(), while supporting a * slightly different version for T/TCP. (This is more than * a bit of a kludge, but cleaning up the internal interfaces would * have forced minor changes in every protocol). */ int in6_pcbladdr(inp, nam, plocal_addr6) register struct inpcb *inp; struct sockaddr *nam; struct in6_addr **plocal_addr6; { register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; struct in6_pktinfo *pi; struct ifnet *ifp = NULL; int error = 0; if (nam->sa_len != sizeof (*sin6)) return (EINVAL); if (sin6->sin6_family != AF_INET6) return (EAFNOSUPPORT); if (sin6->sin6_port == 0) return (EADDRNOTAVAIL); /* * If the scope of the destination is link-local, embed the interface * index in the address. */ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) { /* XXX boundary check is assumed to be already done. */ /* XXX sin6_scope_id is weaker than advanced-api. */ if (inp->in6p_outputopts && (pi = inp->in6p_outputopts->ip6po_pktinfo) && pi->ipi6_ifindex) { sin6->sin6_addr.s6_addr16[1] = htons(pi->ipi6_ifindex); ifp = ifindex2ifnet[pi->ipi6_ifindex]; } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && inp->in6p_moptions && inp->in6p_moptions->im6o_multicast_ifp) { sin6->sin6_addr.s6_addr16[1] = htons(inp->in6p_moptions->im6o_multicast_ifp->if_index); ifp = ifindex2ifnet[inp->in6p_moptions->im6o_multicast_ifp->if_index]; } else if (sin6->sin6_scope_id) { /* boundary check */ if (sin6->sin6_scope_id < 0 || if_index < sin6->sin6_scope_id) { return ENXIO; /* XXX EINVAL? */ } sin6->sin6_addr.s6_addr16[1] = htons(sin6->sin6_scope_id & 0xffff);/*XXX*/ ifp = ifindex2ifnet[sin6->sin6_scope_id]; } } if (in6_ifaddr) { /* * If the destination address is UNSPECIFIED addr, * use the loopback addr, e.g ::1. */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) sin6->sin6_addr = in6addr_loopback; } { /* * XXX: in6_selectsrc might replace the bound local address * with the address specified by setsockopt(IPV6_PKTINFO). * Is it the intended behavior? */ *plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, inp->in6p_moptions, &inp->in6p_route, &inp->in6p_laddr, &error); if (*plocal_addr6 == 0) { if (error == 0) error = EADDRNOTAVAIL; return(error); } /* * Don't do pcblookup call here; return interface in * plocal_addr6 * and exit to caller, that will do the lookup. */ } if (inp->in6p_route.ro_rt) ifp = inp->in6p_route.ro_rt->rt_ifp; inp->in6p_ip6_hlim = (u_int8_t)in6_selecthlim(inp, ifp); return(0); } /* * Outer subroutine: * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int in6_pcbconnect(inp, nam, p) register struct inpcb *inp; struct sockaddr *nam; struct proc *p; { struct in6_addr *addr6; register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; int error; /* * Call inner routine, to assign local interface address. */ if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) return(error); if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL) != NULL) { return (EADDRINUSE); } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (inp->inp_lport == 0) { error = in6_pcbbind(inp, (struct sockaddr *)0, p); if (error) return (error); } inp->in6p_laddr = *addr6; } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; /* * xxx kazu flowlabel is necessary for connect? * but if this line is missing, the garbage value remains. */ inp->in6p_flowinfo = sin6->sin6_flowinfo; in_pcbrehash(inp); return (0); } /* * Return an IPv6 address, which is the most appropriate for given * destination and user specified options. * If necessary, this function lookups the routing table and return * an entry to the caller for later use. */ struct in6_addr * in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) struct sockaddr_in6 *dstsock; struct ip6_pktopts *opts; struct ip6_moptions *mopts; struct route_in6 *ro; struct in6_addr *laddr; int *errorp; { struct in6_addr *dst; struct in6_ifaddr *ia6 = 0; struct in6_pktinfo *pi = NULL; dst = &dstsock->sin6_addr; *errorp = 0; /* * If the source address is explicitly specified by the caller, * use it. */ if (opts && (pi = opts->ip6po_pktinfo) && !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) return(&pi->ipi6_addr); /* * If the source address is not specified but the socket(if any) * is already bound, use the bound address. */ if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) return(laddr); /* * If the caller doesn't specify the source address but * the outgoing interface, use an address associated with * the interface. */ if (pi && pi->ipi6_ifindex) { /* XXX boundary check is assumed to be already done. */ ia6 = in6_ifawithscope(ifindex2ifnet[pi->ipi6_ifindex], dst); if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } /* * If the destination address is a link-local unicast address or * a multicast address, and if the outgoing interface is specified * by the sin6_scope_id filed, use an address associated with the * interface. * XXX: We're now trying to define more specific semantics of * sin6_scope_id field, so this part will be rewritten in * the near future. */ if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) && dstsock->sin6_scope_id) { /* * I'm not sure if boundary check for scope_id is done * somewhere... */ if (dstsock->sin6_scope_id < 0 || if_index < dstsock->sin6_scope_id) { *errorp = ENXIO; /* XXX: better error? */ return(0); } ia6 = in6_ifawithscope(ifindex2ifnet[dstsock->sin6_scope_id], dst); if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } /* * If the destination address is a multicast address and * the outgoing interface for the address is specified * by the caller, use an address associated with the interface. * There is a sanity check here; if the destination has node-local * scope, the outgoing interfacde should be a loopback address. * Even if the outgoing interface is not specified, we also * choose a loopback interface as the outgoing interface. */ if (IN6_IS_ADDR_MULTICAST(dst)) { struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL; if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) { ifp = &loif[0]; } if (ifp) { ia6 = in6_ifawithscope(ifp, dst); if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&ia6->ia_addr.sin6_addr); } } /* * If the next hop address for the packet is specified * by caller, use an address associated with the route * to the next hop. */ { struct sockaddr_in6 *sin6_next; struct rtentry *rt; if (opts && opts->ip6po_nexthop) { sin6_next = satosin6(opts->ip6po_nexthop); rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL); if (rt) { ia6 = in6_ifawithscope(rt->rt_ifp, dst); if (ia6 == 0) ia6 = ifatoia6(rt->rt_ifa); } if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } } /* * If route is known or can be allocated now, * our src addr is taken from the i/f, else punt. */ if (ro) { if (ro->ro_rt && !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst)) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if (ro->ro_rt == (struct rtentry *)0 || ro->ro_rt->rt_ifp == (struct ifnet *)0) { /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); ro->ro_dst.sin6_family = AF_INET6; ro->ro_dst.sin6_len = sizeof(struct sockaddr_in6); ro->ro_dst.sin6_addr = *dst; if (IN6_IS_ADDR_MULTICAST(dst)) { ro->ro_rt = rtalloc1(&((struct route *)ro) ->ro_dst, 0, 0UL); } else { - rtcalloc((struct route *)ro); + rtalloc((struct route *)ro); } } /* * in_pcbconnect() checks out IFF_LOOPBACK to skip using * the address. But we don't know why it does so. * It is necessary to ensure the scope even for lo0 * so doesn't check out IFF_LOOPBACK. */ if (ro->ro_rt) { ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst); if (ia6 == 0) /* xxx scope error ?*/ ia6 = ifatoia6(ro->ro_rt->rt_ifa); } if (ia6 == 0) { *errorp = EHOSTUNREACH; /* no route */ return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } *errorp = EADDRNOTAVAIL; return(0); } /* * Default hop limit selection. The precedence is as follows: * 1. Hoplimit valued specified via ioctl. * 2. (If the outgoing interface is detected) the current * hop limit of the interface specified by router advertisement. * 3. The system default hoplimit. */ int in6_selecthlim(in6p, ifp) struct in6pcb *in6p; struct ifnet *ifp; { if (in6p && in6p->in6p_hops >= 0) return(in6p->in6p_hops); else if (ifp) return(nd_ifinfo[ifp->if_index].chlim); else return(ip6_defhlim); } void in6_pcbdisconnect(inp) struct inpcb *inp; { bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); inp->inp_fport = 0; in_pcbrehash(inp); if (inp->inp_socket->so_state & SS_NOFDREF) in6_pcbdetach(inp); } void in6_pcbdetach(inp) struct inpcb *inp; { struct socket *so = inp->inp_socket; struct inpcbinfo *ipi = inp->inp_pcbinfo; #ifdef IPSEC if (sotoinpcb(so) != 0) key_freeso(so); ipsec6_delete_pcbpolicy(inp); #endif /* IPSEC */ inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); sotoinpcb(so) = 0; sofree(so); if (inp->in6p_options) m_freem(inp->in6p_options); if (inp->in6p_outputopts) { if (inp->in6p_outputopts->ip6po_rthdr && inp->in6p_outputopts->ip6po_route.ro_rt) RTFREE(inp->in6p_outputopts->ip6po_route.ro_rt); if (inp->in6p_outputopts->ip6po_m) (void)m_free(inp->in6p_outputopts->ip6po_m); free(inp->in6p_outputopts, M_IP6OPT); } if (inp->in6p_route.ro_rt) rtfree(inp->in6p_route.ro_rt); ip6_freemoptions(inp->in6p_moptions); inp->inp_vflag = 0; zfreei(ipi->ipi_zone, inp); } /* * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was * modified to match the pru_sockaddr() and pru_peeraddr() entry points * in struct pr_usrreqs, so that protocols can just reference then directly * without the need for a wrapper function. The socket must have a valid * (i.e., non-nil) PCB, but it should be impossible to get an invalid one * except through a kernel programming error, so it is acceptable to panic * (or in this case trap) if the PCB is invalid. (Actually, we don't trap * because there actually /is/ a programming error somewhere... XXX) */ int in6_setsockaddr(so, nam) struct socket *so; struct sockaddr **nam; { int s; register struct inpcb *inp; register struct sockaddr_in6 *sin6; /* * Do the malloc first in case it blocks. */ MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK); bzero(sin6, sizeof *sin6); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); s = splnet(); inp = sotoinpcb(so); if (!inp) { splx(s); free(sin6, M_SONAME); return EINVAL; } sin6->sin6_port = inp->inp_lport; sin6->sin6_addr = inp->in6p_laddr; splx(s); if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); else sin6->sin6_scope_id = 0; /*XXX*/ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_addr.s6_addr16[1] = 0; *nam = (struct sockaddr *)sin6; return 0; } int in6_setpeeraddr(so, nam) struct socket *so; struct sockaddr **nam; { int s; struct inpcb *inp; register struct sockaddr_in6 *sin6; /* * Do the malloc first in case it blocks. */ MALLOC(sin6, struct sockaddr_in6 *, sizeof(*sin6), M_SONAME, M_WAITOK); bzero((caddr_t)sin6, sizeof (*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(struct sockaddr_in6); s = splnet(); inp = sotoinpcb(so); if (!inp) { splx(s); free(sin6, M_SONAME); return EINVAL; } sin6->sin6_port = inp->inp_fport; sin6->sin6_addr = inp->in6p_faddr; splx(s); if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); else sin6->sin6_scope_id = 0; /*XXX*/ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_addr.s6_addr16[1] = 0; *nam = (struct sockaddr *)sin6; return 0; } int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp = sotoinpcb(so); int error; if (inp == NULL) return EINVAL; if (inp->inp_vflag & INP_IPV4) { error = in_setsockaddr(so, nam); if (error == NULL) in6_sin_2_v4mapsin6_in_sock(nam); } else error = in6_setsockaddr(so, nam); return error; } int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp = sotoinpcb(so); int error; if (inp == NULL) return EINVAL; if (inp->inp_vflag & INP_IPV4) { error = in_setpeeraddr(so, nam); if (error == NULL) in6_sin_2_v4mapsin6_in_sock(nam); } else error = in6_setpeeraddr(so, nam); return error; } /* * Pass some notification to all connections of a protocol * associated with address dst. The local address and/or port numbers * may be specified to limit the search. The "usual action" will be * taken, depending on the ctlinput cmd. The caller must filter any * cmds that are uninteresting (e.g., no error in the map). * Call the protocol specific routine (if any) to report * any errors for each matching socket. * * Must be called at splnet. */ void in6_pcbnotify(head, dst, fport_arg, laddr6, lport_arg, cmd, notify) struct inpcbhead *head; struct sockaddr *dst; u_int fport_arg, lport_arg; struct in6_addr *laddr6; int cmd; void (*notify) __P((struct inpcb *, int)); { struct inpcb *inp, *oinp; struct in6_addr faddr6; u_short fport = fport_arg, lport = lport_arg; int errno, s; if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET6) return; faddr6 = ((struct sockaddr_in6 *)dst)->sin6_addr; if (IN6_IS_ADDR_UNSPECIFIED(&faddr6)) return; /* * Redirects go to all references to the destination, * and use in_rtchange to invalidate the route cache. * Dead host indications: notify all references to the destination. * Otherwise, if we have knowledge of the local port and address, * deliver only to that socket. */ if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { fport = 0; lport = 0; bzero((caddr_t)laddr6, sizeof(*laddr6)); if (cmd != PRC_HOSTDEAD) notify = in6_rtchange; } errno = inet6ctlerrmap[cmd]; s = splnet(); for (inp = LIST_FIRST(head); inp != NULL;) { if ((inp->inp_vflag & INP_IPV6) == NULL) { inp = LIST_NEXT(inp, inp_list); continue; } if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &faddr6) || inp->inp_socket == 0 || (lport && inp->inp_lport != lport) || (!IN6_IS_ADDR_UNSPECIFIED(laddr6) && !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr6)) || (fport && inp->inp_fport != fport)) { inp = LIST_NEXT(inp, inp_list); continue; } oinp = inp; inp = LIST_NEXT(inp, inp_list); if (notify) (*notify)(oinp, errno); } splx(s); } /* * Lookup a PCB based on the local address and port. */ struct inpcb * in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) struct inpcbinfo *pcbinfo; struct in6_addr *laddr; u_int lport_arg; int wild_okay; { register struct inpcb *inp; int matchwild = 3, wildcard; u_short lport = lport_arg; if (!wild_okay) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { if ((inp->inp_vflag & INP_IPV6) == NULL) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_lport == lport) { /* * Found. */ return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if ((inp->inp_vflag & INP_IPV6) == NULL) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) wildcard++; if (!IN6_IS_ADDR_UNSPECIFIED( &inp->in6p_laddr)) { if (IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; else if (!IN6_ARE_ADDR_EQUAL( &inp->in6p_laddr, laddr)) continue; } else { if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) { break; } } } } return (match); } } /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached * routing information. If the route was created dynamically * (by a redirect), time to try a default gateway again. */ void in6_losing(in6p) struct inpcb *in6p; { struct rtentry *rt; struct rt_addrinfo info; if ((rt = in6p->in6p_route.ro_rt) != NULL) { in6p->in6p_route.ro_rt = 0; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&in6p->in6p_route.ro_dst; info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); if (rt->rt_flags & RTF_DYNAMIC) (void)rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); else /* * A new route can be allocated * the next time output is attempted. */ rtfree(rt); } } /* * After a routing change, flush old routing * and allocate a (hopefully) better one. */ void in6_rtchange(inp, errno) struct inpcb *inp; int errno; { if (inp->in6p_route.ro_rt) { rtfree(inp->in6p_route.ro_rt); inp->in6p_route.ro_rt = 0; /* * A new route can be allocated the next time * output is attempted. */ } } /* * Lookup PCB in hash list. */ struct inpcb * in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) struct inpcbinfo *pcbinfo; struct in6_addr *faddr, *laddr; u_int fport_arg, lport_arg; int wildcard; struct ifnet *ifp; { struct inpcbhead *head; register struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; /* * First look for an exact match. */ head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { if ((inp->inp_vflag & INP_IPV6) == NULL) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && inp->inp_lport == lport) { /* * Found. */ return (inp); } } if (wildcard) { struct inpcb *local_wild = NULL; head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { if ((inp->inp_vflag & INP_IPV6) == NULL) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && inp->inp_lport == lport) { #if defined(NFAITH) && NFAITH > 0 if (ifp && ifp->if_type == IFT_FAITH && (inp->inp_flags & INP_FAITH) == 0) continue; #endif if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) return (inp); else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) local_wild = inp; } } return (local_wild); } /* * Not found. */ return (NULL); } void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) { struct ip6_hdr *ip; ip = mtod(m, struct ip6_hdr *); bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_addr = ip->ip6_src; if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = (m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) ? m->m_pkthdr.rcvif->if_index : 0; return; } Index: head/sys/netinet6/ip6_output.c =================================================================== --- head/sys/netinet6/ip6_output.c (revision 54349) +++ head/sys/netinet6/ip6_output.c (revision 54350) @@ -1,2166 +1,2162 @@ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 */ #include "opt_key.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #include #ifdef KEY_DEBUG #include #else #define DPRINTF(lev,arg) #define DDO(lev, stmt) #define DP(x, y, z) #endif /* KEY_DEBUG */ #endif /* IPSEC */ #include "loop.h" #include #ifdef IPV6FIREWALL #include #endif static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options"); struct ip6_exthdrs { struct mbuf *ip6e_ip6; struct mbuf *ip6e_hbh; struct mbuf *ip6e_dest1; struct mbuf *ip6e_rthdr; struct mbuf *ip6e_dest2; }; static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *, struct socket *, struct sockopt *sopt)); static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *)); static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **)); static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int)); static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int, struct ip6_frag **)); static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t)); static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *)); /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). * This function may modify ver and hlim only. * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. */ int ip6_output(m0, opt, ro, flags, im6o, ifpp) struct mbuf *m0; struct ip6_pktopts *opt; struct route_in6 *ro; int flags; struct ip6_moptions *im6o; struct ifnet **ifpp; /* XXX: just for statistics */ { struct ip6_hdr *ip6, *mhip6; struct ifnet *ifp; struct mbuf *m = m0; int hlen, tlen, len, off; struct route_in6 ip6route; struct sockaddr_in6 *dst; int error = 0; struct in6_ifaddr *ia; u_long mtu; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; struct in6_addr finaldst; struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; int needipsec = 0; #ifdef IPSEC int needipsectun = 0; struct socket *so; struct secpolicy *sp = NULL; /* for AH processing. stupid to have "socket" variable in IP layer... */ so = (struct socket *)m->m_pkthdr.rcvif; m->m_pkthdr.rcvif = NULL; ip6 = mtod(m, struct ip6_hdr *); #endif /* IPSEC */ #define MAKE_EXTHDR(hp,mp) \ { \ if (hp) { \ struct ip6_ext *eh = (struct ip6_ext *)(hp); \ error = ip6_copyexthdr((mp), (caddr_t)(hp), \ ((eh)->ip6e_len + 1) << 3); \ if (error) \ goto freehdrs; \ } \ } bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); /* Destination options header(1st part) */ MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); /* Routing header */ MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); /* Destination options header(2nd part) */ MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); } #ifdef IPSEC /* get a security policy for this packet */ if (so == NULL) sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error); else sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); if (sp == NULL) { ipsec6stat.out_inval++; goto bad; } error = 0; /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: /* * This packet is just discarded. */ ipsec6stat.out_polvio++; goto bad; case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: /* no need to do IPsec. */ needipsec = 0; break; case IPSEC_POLICY_IPSEC: if (sp->req == NULL) { /* XXX should be panic ? */ printf("ip6_output: No IPsec request specified.\n"); error = EINVAL; goto bad; } needipsec = 1; break; case IPSEC_POLICY_ENTRUST: default: printf("ip6_output: Invalid policy found. %d\n", sp->policy); } #endif /* IPSEC */ /* * Calculate the total length of the extension header chain. * Keep the length of the unfragmentable part for fragmentation. */ optlen = 0; if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len; if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len; if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len; unfragpartlen = optlen + sizeof(struct ip6_hdr); /* NOTE: we don't add AH/ESP length here. do that later. */ if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; /* * If we need IPsec, or there is at least one extension header, * separate IP6 header from the payload. */ if ((needipsec || optlen) && !hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); /* adjust mbuf packet header length */ m->m_pkthdr.len += optlen; plen = m->m_pkthdr.len - sizeof(*ip6); /* If this is a jumbo payload, insert a jumbo payload option. */ if (plen > IPV6_MAXPACKET) { if (!hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) goto freehdrs; ip6->ip6_plen = 0; } else ip6->ip6_plen = htons(plen); /* * Concatenate headers and fill in next header fields. * Here we have, on "m" * IPv6 payload * and we insert headers accordingly. Finally, we should be getting: * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] * * during the header composing process, "m" points to IPv6 header. * "mprev" points to an extension header prior to esp. */ { u_char *nexthdrp = &ip6->ip6_nxt; struct mbuf *mprev = m; /* * we treat dest2 specially. this makes IPsec processing * much easier. * * result: IPv6 dest2 payload * m and mprev will point to IPv6 header. */ if (exthdrs.ip6e_dest2) { if (!hdrsplit) panic("assumption failed: hdr not split"); exthdrs.ip6e_dest2->m_next = m->m_next; m->m_next = exthdrs.ip6e_dest2; *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_DSTOPTS; } #define MAKE_CHAIN(m,mp,p,i)\ {\ if (m) {\ if (!hdrsplit) \ panic("assumption failed: hdr not split"); \ *mtod((m), u_char *) = *(p);\ *(p) = (i);\ p = mtod((m), u_char *);\ (m)->m_next = (mp)->m_next;\ (mp)->m_next = (m);\ (mp) = (m);\ }\ } /* * result: IPv6 hbh dest1 rthdr dest2 payload * m will point to IPv6 header. mprev will point to the * extension header prior to dest2 (rthdr in the above case). */ MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS); MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); #ifdef IPSEC if (!needipsec) goto skip_ipsec2; /* * pointers after IPsec headers are not valid any more. * other pointers need a great care too. * (IPsec routines should not mangle mbufs prior to AH/ESP) */ exthdrs.ip6e_dest2 = NULL; { struct ip6_rthdr *rh = NULL; int segleft_org = 0; struct ipsec_output_state state; if (exthdrs.ip6e_rthdr) { rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *); segleft_org = rh->ip6r_segleft; rh->ip6r_segleft = 0; } bzero(&state, sizeof(state)); state.m = m; error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags, &needipsectun); m = state.m; if (error) { /* mbuf is already reclaimed in ipsec6_output_trans. */ m = NULL; switch (error) { case EHOSTUNREACH: case ENETUNREACH: case EMSGSIZE: case ENOBUFS: case ENOMEM: break; default: printf("ip6_output (ipsec): error code %d\n", error); /*fall through*/ case ENOENT: /* don't show these error codes to the user */ error = 0; break; } goto bad; } if (exthdrs.ip6e_rthdr) { /* ah6_output doesn't modify mbuf chain */ rh->ip6r_segleft = segleft_org; } } skip_ipsec2:; #endif } /* * If there is a routing header, replace destination address field * with the first hop of the routing header. */ if (exthdrs.ip6e_rthdr) { struct ip6_rthdr *rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *)); struct ip6_rthdr0 *rh0; finaldst = ip6->ip6_dst; switch(rh->ip6r_type) { case IPV6_RTHDR_TYPE_0: rh0 = (struct ip6_rthdr0 *)rh; ip6->ip6_dst = rh0->ip6r0_addr[0]; bcopy((caddr_t)&rh0->ip6r0_addr[1], (caddr_t)&rh0->ip6r0_addr[0], sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1) ); rh0->ip6r0_addr[rh0->ip6r0_segleft - 1] = finaldst; break; default: /* is it possible? */ error = EINVAL; goto bad; } } /* Source address validation */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && (flags & IPV6_DADOUTPUT) == 0) { error = EOPNOTSUPP; ip6stat.ip6s_badscope++; goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { error = EOPNOTSUPP; ip6stat.ip6s_badscope++; goto bad; } ip6stat.ip6s_localout++; /* * Route packet. */ if (ro == 0) { ro = &ip6route; bzero((caddr_t)ro, sizeof(*ro)); } ro_pmtu = ro; if (opt && opt->ip6po_rthdr) ro = &opt->ip6po_route; dst = (struct sockaddr_in6 *)&ro->ro_dst; /* * If there is a cached route, * check that it is to the same destination * and is still up. If not, free it and try again. */ if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if (ro->ro_rt == 0) { bzero(dst, sizeof(*dst)); dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_addr = ip6->ip6_dst; } #ifdef IPSEC if (needipsec && needipsectun) { struct ipsec_output_state state; /* * All the extension headers will become inaccessible * (since they can be encrypted). * Don't panic, we need no more updates to extension headers * on inner IPv6 packet (since they are now encapsulated). * * IPv6 [ESP|AH] IPv6 [extension headers] payload */ bzero(&exthdrs, sizeof(exthdrs)); exthdrs.ip6e_ip6 = m; bzero(&state, sizeof(state)); state.m = m; state.ro = (struct route *)ro; state.dst = (struct sockaddr *)dst; error = ipsec6_output_tunnel(&state, sp, flags); m = state.m; ro = (struct route_in6 *)state.ro; dst = (struct sockaddr_in6 *)state.dst; if (error) { /* mbuf is already reclaimed in ipsec6_output_tunnel. */ m0 = m = NULL; m = NULL; switch (error) { case EHOSTUNREACH: case ENETUNREACH: case EMSGSIZE: case ENOBUFS: case ENOMEM: break; default: printf("ip6_output (ipsec): error code %d\n", error); /*fall through*/ case ENOENT: /* don't show these error codes to the user */ error = 0; break; } goto bad; } exthdrs.ip6e_ip6 = m; } #endif /*IPESC*/ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { /* Unicast */ #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) /* xxx * interface selection comes here * if an interface is specified from an upper layer, * ifp must point it. */ + if (ro->ro_rt == 0) + rtalloc((struct route *)ro); if (ro->ro_rt == 0) { - if (ro == &ip6route) /* xxx kazu */ - rtalloc((struct route *)ro); - else - rtcalloc((struct route *)ro); - } - if (ro->ro_rt == 0) { ip6stat.ip6s_noroute++; error = EHOSTUNREACH; /* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */ goto bad; } ia = ifatoia6(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway; m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ in6_ifstat_inc(ifp, ifs6_out_request); /* * Check if there is the outgoing interface conflicts with * the interface specified by ifi6_ifindex(if specified). * Note that loopback interface is always okay. * (this happens when we are sending packet toward my * interface) */ if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex) { if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) { ip6stat.ip6s_noroute++; in6_ifstat_inc(ifp, ifs6_out_discard); error = EHOSTUNREACH; goto bad; } } if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; } else { /* Multicast */ struct in6_multi *in6m; m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; /* * See if the caller provided any multicast options */ ifp = NULL; if (im6o != NULL) { ip6->ip6_hlim = im6o->im6o_multicast_hlim; if (im6o->im6o_multicast_ifp != NULL) ifp = im6o->im6o_multicast_ifp; } else ip6->ip6_hlim = ip6_defmcasthlim; /* * See if the caller provided the outgoing interface * as an ancillary data. * Boundary check for ifindex is assumed to be already done. */ if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex) ifp = ifindex2ifnet[opt->ip6po_pktinfo->ipi6_ifindex]; /* * If the destination is a node-local scope multicast, * the packet should be loop-backed only. */ if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) { /* * If the outgoing interface is already specified, * it should be a loopback interface. */ if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) { ip6stat.ip6s_badscope++; error = ENETUNREACH; /* XXX: better error? */ /* XXX correct ifp? */ in6_ifstat_inc(ifp, ifs6_out_discard); goto bad; } else { ifp = &loif[0]; } } if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; /* * If caller did not provide an interface lookup a * default in the routing table. This is either a * default for the speicfied group (i.e. a host * route), or a multicast default (a route for the * ``net'' ff00::/8). */ if (ifp == NULL) { if (ro->ro_rt == 0) { ro->ro_rt = rtalloc1((struct sockaddr *) &ro->ro_dst, 0, 0UL); } if (ro->ro_rt == 0) { ip6stat.ip6s_noroute++; error = EHOSTUNREACH; /* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */ goto bad; } ia = ifatoia6(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; } if ((flags & IPV6_FORWARDING) == 0) in6_ifstat_inc(ifp, ifs6_out_request); in6_ifstat_inc(ifp, ifs6_out_mcast); /* * Confirm that the outgoing interface supports multicast. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) { ip6stat.ip6s_noroute++; in6_ifstat_inc(ifp, ifs6_out_discard); error = ENETUNREACH; goto bad; } IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m); if (in6m != NULL && (im6o == NULL || im6o->im6o_multicast_loop)) { /* * If we belong to the destination multicast group * on the outgoing interface, and the caller did not * forbid loopback, loop back a copy. */ ip6_mloopback(ifp, m, dst); } /* * Multicasts with a hoplimit of zero may be looped back, * above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip6_mloopback() will * loop back a copy if this host actually belongs to the * destination group on the loopback interface. */ if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) { m_freem(m); goto done; } } /* * Fill the outgoing inteface to tell the upper layer * to increment per-interface statistics. */ if (ifpp) *ifpp = ifp; /* * Determine path MTU. */ if (ro_pmtu != ro) { /* The first hop and the final destination may differ. */ struct sockaddr_in6 *sin6_fin = (struct sockaddr_in6 *)&ro_pmtu->ro_dst; if (ro_pmtu->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr, &finaldst))) { RTFREE(ro_pmtu->ro_rt); ro_pmtu->ro_rt = (struct rtentry *)0; } if (ro_pmtu->ro_rt == 0) { bzero(sin6_fin, sizeof(*sin6_fin)); sin6_fin->sin6_family = AF_INET6; sin6_fin->sin6_len = sizeof(struct sockaddr_in6); sin6_fin->sin6_addr = finaldst; - rtcalloc((struct route *)ro_pmtu); + rtalloc((struct route *)ro_pmtu); } } if (ro_pmtu->ro_rt != NULL) { u_int32_t ifmtu = nd_ifinfo[ifp->if_index].linkmtu; mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; if (mtu > ifmtu) { /* * The MTU on the route is larger than the MTU on * the interface! This shouldn't happen, unless the * MTU of the interface has been changed after the * interface was brought up. Change the MTU in the * route to match the interface MTU (as long as the * field isn't locked). */ mtu = ifmtu; if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */ } } else { mtu = nd_ifinfo[ifp->if_index].linkmtu; } /* * Fake link-local scope-class addresses */ if ((ifp->if_flags & IFF_LOOPBACK) == 0) { if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) ip6->ip6_src.s6_addr16[1] = 0; if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) ip6->ip6_dst.s6_addr16[1] = 0; } #ifdef IPV6FIREWALL /* * Check with the firewall... */ if (ip6_fw_chk_ptr) { u_short port = 0; /* If ipfw says divert, we have to just drop packet */ if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) { m_freem(m); goto done; } if (!m) { error = EACCES; goto done; } } #endif /* * If the outgoing packet contains a hop-by-hop options header, * it must be examined and processed even by the source node. * (RFC 2460, section 4.) */ if (exthdrs.ip6e_hbh) { struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); u_int32_t dummy1; /* XXX unused */ u_int32_t dummy2; /* XXX unused */ /* * XXX: if we have to send an ICMPv6 error to the sender, * we need the M_LOOP flag since icmp6_error() expects * the IPv6 and the hop-by-hop options header are * continuous unless the flag is set. */ m->m_flags |= M_LOOP; m->m_pkthdr.rcvif = ifp; if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), &dummy1, &dummy2) < 0) { /* m was already freed at this point */ error = EINVAL;/* better error? */ goto done; } m->m_flags &= ~M_LOOP; /* XXX */ m->m_pkthdr.rcvif = NULL; } /* * Send the packet to the outgoing interface. * If necessary, do IPv6 fragmentation before sending. */ tlen = m->m_pkthdr.len; if (tlen <= mtu #ifdef notyet /* * On any link that cannot convey a 1280-octet packet in one piece, * link-specific fragmentation and reassembly must be provided at * a layer below IPv6. [RFC 2460, sec.5] * Thus if the interface has ability of link-level fragmentation, * we can just send the packet even if the packet size is * larger than the link's MTU. * XXX: IFF_FRAGMENTABLE (or such) flag has not been defined yet... */ || ifp->if_flags & IFF_FRAGMENTABLE #endif ) { #if defined(__NetBSD__) && defined(IFA_STATS) if (IFA_STATS) { struct in6_ifaddr *ia6; ip6 = mtod(m, struct ip6_hdr *); ia6 = in6_ifawithifp(ifp, &ip6->ip6_src); if (ia6) { ia->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len; } } #endif error = nd6_output(ifp, m, dst, ro->ro_rt); goto done; } else if (mtu < IPV6_MMTU) { /* * note that path MTU is never less than IPV6_MMTU * (see icmp6_input). */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else { struct mbuf **mnext, *m_frgpart; struct ip6_frag *ip6f; u_int32_t id = htonl(ip6_id++); u_char nextproto; /* * Too large for the destination or interface; * fragment if possible. * Must be able to put at least 8 bytes per fragment. */ hlen = unfragpartlen; if (mtu > IPV6_MAXPACKET) mtu = IPV6_MAXPACKET; len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; if (len < 8) { error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } mnext = &m->m_nextpkt; /* * Change the next header field of the last header in the * unfragmentable part. */ if (exthdrs.ip6e_rthdr) { nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_dest1) { nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_hbh) { nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; } else { nextproto = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_FRAGMENT; } /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto chain. */ m0 = m; for (off = hlen; off < tlen; off += len) { MGETHDR(m, M_DONTWAIT, MT_HEADER); if (!m) { error = ENOBUFS; ip6stat.ip6s_odropped++; goto sendorfree; } m->m_flags = m0->m_flags & M_COPYFLAGS; *mnext = m; mnext = &m->m_nextpkt; m->m_data += max_linkhdr; mhip6 = mtod(m, struct ip6_hdr *); *mhip6 = *ip6; m->m_len = sizeof(*mhip6); error = ip6_insertfraghdr(m0, m, hlen, &ip6f); if (error) { ip6stat.ip6s_odropped++; goto sendorfree; } ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); if (off + len >= tlen) len = tlen - off; else ip6f->ip6f_offlg |= IP6F_MORE_FRAG; mhip6->ip6_plen = htons((u_short)(len + hlen + sizeof(*ip6f) - sizeof(struct ip6_hdr))); if ((m_frgpart = m_copy(m0, off, len)) == 0) { error = ENOBUFS; ip6stat.ip6s_odropped++; goto sendorfree; } m_cat(m, m_frgpart); m->m_pkthdr.len = len + hlen + sizeof(*ip6f); m->m_pkthdr.rcvif = (struct ifnet *)0; ip6f->ip6f_reserved = 0; ip6f->ip6f_ident = id; ip6f->ip6f_nxt = nextproto; ip6stat.ip6s_ofragments++; in6_ifstat_inc(ifp, ifs6_out_fragcreat); } in6_ifstat_inc(ifp, ifs6_out_fragok); } /* * Remove leading garbages. */ sendorfree: m = m0->m_nextpkt; m0->m_nextpkt = 0; m_freem(m0); for (m0 = m; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) { #if defined(__NetBSD__) && defined(IFA_STATS) if (IFA_STATS) { struct in6_ifaddr *ia6; ip6 = mtod(m, struct ip6_hdr *); ia6 = in6_ifawithifp(ifp, &ip6->ip6_src); if (ia6) { ia->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len; } } #endif error = nd6_output(ifp, m, dst, ro->ro_rt); } else m_freem(m); } if (error == 0) ip6stat.ip6s_fragmented++; done: if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */ RTFREE(ro->ro_rt); } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { RTFREE(ro_pmtu->ro_rt); } #ifdef IPSEC if (sp != NULL) key_freesp(sp); #endif /* IPSEC */ return(error); freehdrs: m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ m_freem(exthdrs.ip6e_dest1); m_freem(exthdrs.ip6e_rthdr); m_freem(exthdrs.ip6e_dest2); /* fall through */ bad: m_freem(m); goto done; } static int ip6_copyexthdr(mp, hdr, hlen) struct mbuf **mp; caddr_t hdr; int hlen; { struct mbuf *m; if (hlen > MCLBYTES) return(ENOBUFS); /* XXX */ MGET(m, M_DONTWAIT, MT_DATA); if (!m) return(ENOBUFS); if (hlen > MLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); return(ENOBUFS); } } m->m_len = hlen; if (hdr) bcopy(hdr, mtod(m, caddr_t), hlen); *mp = m; return(0); } /* * Insert jumbo payload option. */ static int ip6_insert_jumboopt(exthdrs, plen) struct ip6_exthdrs *exthdrs; u_int32_t plen; { struct mbuf *mopt; u_char *optbuf; #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ /* * If there is no hop-by-hop options header, allocate new one. * If there is one but it doesn't have enough space to store the * jumbo payload option, allocate a cluster to store the whole options. * Otherwise, use it to store the options. */ if (exthdrs->ip6e_hbh == 0) { MGET(mopt, M_DONTWAIT, MT_DATA); if (mopt == 0) return(ENOBUFS); mopt->m_len = JUMBOOPTLEN; optbuf = mtod(mopt, u_char *); optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ exthdrs->ip6e_hbh = mopt; } else { struct ip6_hbh *hbh; mopt = exthdrs->ip6e_hbh; if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { caddr_t oldoptp = mtod(mopt, caddr_t); int oldoptlen = mopt->m_len; if (mopt->m_flags & M_EXT) return(ENOBUFS); /* XXX */ MCLGET(mopt, M_DONTWAIT); if ((mopt->m_flags & M_EXT) == 0) return(ENOBUFS); bcopy(oldoptp, mtod(mopt, caddr_t), oldoptlen); optbuf = mtod(mopt, caddr_t) + oldoptlen; mopt->m_len = oldoptlen + JUMBOOPTLEN; } else { optbuf = mtod(mopt, u_char *) + mopt->m_len; mopt->m_len += JUMBOOPTLEN; } optbuf[0] = IP6OPT_PADN; optbuf[1] = 1; /* * Adjust the header length according to the pad and * the jumbo payload option. */ hbh = mtod(mopt, struct ip6_hbh *); hbh->ip6h_len += (JUMBOOPTLEN >> 3); } /* fill in the option. */ optbuf[2] = IP6OPT_JUMBO; optbuf[3] = 4; *(u_int32_t *)&optbuf[4] = htonl(plen + JUMBOOPTLEN); /* finally, adjust the packet header length */ exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; return(0); #undef JUMBOOPTLEN } /* * Insert fragment header and copy unfragmentable header portions. */ static int ip6_insertfraghdr(m0, m, hlen, frghdrp) struct mbuf *m0, *m; int hlen; struct ip6_frag **frghdrp; { struct mbuf *n, *mlast; if (hlen > sizeof(struct ip6_hdr)) { n = m_copym(m0, sizeof(struct ip6_hdr), hlen - sizeof(struct ip6_hdr), M_DONTWAIT); if (n == 0) return(ENOBUFS); m->m_next = n; } else n = m; /* Search for the last mbuf of unfragmentable part. */ for (mlast = n; mlast->m_next; mlast = mlast->m_next) ; if ((mlast->m_flags & M_EXT) == 0 && M_TRAILINGSPACE(mlast) < sizeof(struct ip6_frag)) { /* use the trailing space of the last mbuf for the fragment hdr */ *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len); mlast->m_len += sizeof(struct ip6_frag); m->m_pkthdr.len += sizeof(struct ip6_frag); } else { /* allocate a new mbuf for the fragment header */ struct mbuf *mfrg; MGET(mfrg, M_DONTWAIT, MT_DATA); if (mfrg == 0) return(ENOBUFS); mfrg->m_len = sizeof(struct ip6_frag); *frghdrp = mtod(mfrg, struct ip6_frag *); mlast->m_next = mfrg; } return(0); } /* * IP6 socket option processing. */ int ip6_ctloutput(so, sopt) struct socket *so; struct sockopt *sopt; { int privileged; register struct inpcb *in6p = sotoinpcb(so); int error, optval; int level, op, optname; int optlen; struct proc *p; if (sopt) { level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; p = sopt->sopt_p; } else { panic("ip6_ctloutput: arg soopt is NULL"); } error = optval = 0; privileged = (p == 0 || suser(p)) ? 0 : 1; if (level == IPPROTO_IPV6) { switch (op) { case SOPT_SET: switch (optname) { case IPV6_PKTOPTIONS: { struct mbuf *m; error = soopt_getm(sopt, &m); /* XXX */ if (error != NULL) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != NULL) break; return (ip6_pcbopts(&in6p->in6p_outputopts, m, so, sopt)); } case IPV6_HOPOPTS: case IPV6_DSTOPTS: if (!privileged) { error = EPERM; break; } /* fall through */ case IPV6_UNICAST_HOPS: case IPV6_RECVOPTS: case IPV6_RECVRETOPTS: case IPV6_RECVDSTADDR: case IPV6_PKTINFO: case IPV6_HOPLIMIT: case IPV6_RTHDR: case IPV6_CHECKSUM: case IPV6_FAITH: case IPV6_BINDV6ONLY: if (optlen != sizeof(int)) error = EINVAL; else { error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_UNICAST_HOPS: if (optval < -1 || optval >= 256) error = EINVAL; else { /* -1 = kernel default */ in6p->in6p_hops = optval; if ((in6p->in6p_vflag & INP_IPV4) != 0) in6p->inp_ip_ttl = optval; } break; #define OPTSET(bit) \ if (optval) \ in6p->in6p_flags |= bit; \ else \ in6p->in6p_flags &= ~bit; case IPV6_RECVOPTS: OPTSET(IN6P_RECVOPTS); break; case IPV6_RECVRETOPTS: OPTSET(IN6P_RECVRETOPTS); break; case IPV6_RECVDSTADDR: OPTSET(IN6P_RECVDSTADDR); break; case IPV6_PKTINFO: OPTSET(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: OPTSET(IN6P_HOPLIMIT); break; case IPV6_HOPOPTS: OPTSET(IN6P_HOPOPTS); break; case IPV6_DSTOPTS: OPTSET(IN6P_DSTOPTS); break; case IPV6_RTHDR: OPTSET(IN6P_RTHDR); break; case IPV6_CHECKSUM: in6p->in6p_cksum = optval; break; case IPV6_FAITH: OPTSET(IN6P_FAITH); break; case IPV6_BINDV6ONLY: OPTSET(IN6P_BINDV6ONLY); break; } } break; #undef OPTSET case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_JOIN_GROUP: case IPV6_LEAVE_GROUP: { struct mbuf *m; if (sopt->sopt_valsize > MLEN) { error = EMSGSIZE; break; } /* XXX */ MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER); if (m == 0) { error = ENOBUFS; break; } m->m_len = sopt->sopt_valsize; error = sooptcopyin(sopt, mtod(m, char *), m->m_len, m->m_len); error = ip6_setmoptions(sopt->sopt_name, &in6p->in6p_moptions, m); (void)m_free(m); } break; case IPV6_PORTRANGE: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optval) { case IPV6_PORTRANGE_DEFAULT: in6p->in6p_flags &= ~(IN6P_LOWPORT); in6p->in6p_flags &= ~(IN6P_HIGHPORT); break; case IPV6_PORTRANGE_HIGH: in6p->in6p_flags &= ~(IN6P_LOWPORT); in6p->in6p_flags |= IN6P_HIGHPORT; break; case IPV6_PORTRANGE_LOW: in6p->in6p_flags &= ~(IN6P_HIGHPORT); in6p->in6p_flags |= IN6P_LOWPORT; break; default: error = EINVAL; break; } break; #ifdef IPSEC case IPV6_IPSEC_POLICY: { caddr_t req = NULL; struct mbuf *m; if (error = soopt_getm(sopt, &m)) /* XXX */ break; if (error = soopt_mcopyin(sopt, m)) /* XXX */ break; if (m != 0) req = mtod(m, caddr_t); error = ipsec6_set_policy(in6p, optname, req, privileged); m_freem(m); } break; #endif /* IPSEC */ #ifdef IPV6FIREWALL case IPV6_FW_ADD: case IPV6_FW_DEL: case IPV6_FW_FLUSH: case IPV6_FW_ZERO: { struct mbuf *m; struct mbuf **mp = &m; if (ip6_fw_ctl_ptr == NULL) return EINVAL; if (error = soopt_getm(sopt, &m)) /* XXX */ break; if (error = soopt_mcopyin(sopt, m)) /* XXX */ break; error = (*ip6_fw_ctl_ptr)(optname, mp); m = *mp; } break; #endif default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (optname) { case IPV6_OPTIONS: case IPV6_RETOPTS: error = ENOPROTOOPT; break; case IPV6_PKTOPTIONS: if (in6p->in6p_options) { error = soopt_mcopyout(sopt, in6p->in6p_options); } else sopt->sopt_valsize = 0; break; case IPV6_HOPOPTS: case IPV6_DSTOPTS: if (!privileged) { error = EPERM; break; } /* fall through */ case IPV6_UNICAST_HOPS: case IPV6_RECVOPTS: case IPV6_RECVRETOPTS: case IPV6_RECVDSTADDR: case IPV6_PKTINFO: case IPV6_HOPLIMIT: case IPV6_RTHDR: case IPV6_CHECKSUM: case IPV6_FAITH: case IPV6_BINDV6ONLY: switch (optname) { case IPV6_UNICAST_HOPS: optval = in6p->in6p_hops; break; #define OPTBIT(bit) (in6p->in6p_flags & bit ? 1 : 0) case IPV6_RECVOPTS: optval = OPTBIT(IN6P_RECVOPTS); break; case IPV6_RECVRETOPTS: optval = OPTBIT(IN6P_RECVRETOPTS); break; case IPV6_RECVDSTADDR: optval = OPTBIT(IN6P_RECVDSTADDR); break; case IPV6_PKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_HOPOPTS: optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_DSTOPTS: optval = OPTBIT(IN6P_DSTOPTS); break; case IPV6_RTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_CHECKSUM: optval = in6p->in6p_cksum; break; case IPV6_FAITH: optval = OPTBIT(IN6P_FAITH); break; case IPV6_BINDV6ONLY: optval = OPTBIT(IN6P_BINDV6ONLY); break; case IPV6_PORTRANGE: { int flags; flags = in6p->in6p_flags; if (flags & IN6P_HIGHPORT) optval = IPV6_PORTRANGE_HIGH; else if (flags & IN6P_LOWPORT) optval = IPV6_PORTRANGE_LOW; else optval = 0; break; } } error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_JOIN_GROUP: case IPV6_LEAVE_GROUP: { struct mbuf *m; error = ip6_getmoptions(sopt->sopt_name, in6p->in6p_moptions, &m); if (error == 0) error = sooptcopyout(sopt, mtod(m, char *), m->m_len); m_freem(m); } break; #ifdef IPSEC case IPV6_IPSEC_POLICY: { caddr_t req = NULL; int len = 0; struct mbuf *m; struct mbuf **mp = &m; if (m != 0) { req = mtod(m, caddr_t); len = m->m_len; } error = ipsec6_get_policy(in6p, req, mp); if (error == 0) error = soopt_mcopyout(sopt, m); /*XXX*/ m_freem(m); break; } #endif /* IPSEC */ #ifdef IPV6FIREWALL case IPV6_FW_GET: { struct mbuf *m; struct mbuf **mp = &m; if (ip6_fw_ctl_ptr == NULL) { return EINVAL; } error = (*ip6_fw_ctl_ptr)(optname, mp); if (error == 0) error = soopt_mcopyout(sopt, m); /* XXX */ if (m) m_freem(m); } break; #endif default: error = ENOPROTOOPT; break; } break; } } else { error = EINVAL; } return(error); } /* * Set up IP6 options in pcb for insertion in output packets. * Store in mbuf with pointer in pcbopt, adding pseudo-option * with destination address if source routed. */ static int ip6_pcbopts(pktopt, m, so, sopt) struct ip6_pktopts **pktopt; register struct mbuf *m; struct socket *so; struct sockopt *sopt; { register struct ip6_pktopts *opt = *pktopt; int error = 0; struct proc *p = sopt->sopt_p; int priv = 0; /* turn off any old options. */ if (opt) { if (opt->ip6po_m) (void)m_free(opt->ip6po_m); } else opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK); *pktopt = 0; if (!m || m->m_len == 0) { /* * Only turning off any previous options. */ if (opt) free(opt, M_IP6OPT); if (m) (void)m_free(m); return(0); } /* set options specified by user. */ if (p && !suser(p)) priv = 1; if ((error = ip6_setpktoptions(m, opt, priv)) != 0) { (void)m_free(m); return(error); } *pktopt = opt; return(0); } /* * Set the IP6 multicast options in response to user setsockopt(). */ static int ip6_setmoptions(optname, im6op, m) int optname; struct ip6_moptions **im6op; struct mbuf *m; { int error = 0; u_int loop, ifindex; struct ipv6_mreq *mreq; struct ifnet *ifp; struct ip6_moptions *im6o = *im6op; struct route_in6 ro; struct sockaddr_in6 *dst; struct in6_multi_mship *imm; struct proc *p = curproc; /* XXX */ if (im6o == NULL) { /* * No multicast option buffer attached to the pcb; * allocate one and initialize to default values. */ im6o = (struct ip6_moptions *) malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK); if (im6o == NULL) return(ENOBUFS); *im6op = im6o; im6o->im6o_multicast_ifp = NULL; im6o->im6o_multicast_hlim = ip6_defmcasthlim; im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP; LIST_INIT(&im6o->im6o_memberships); } switch (optname) { case IPV6_MULTICAST_IF: /* * Select the interface for outgoing multicast packets. */ if (m == NULL || m->m_len != sizeof(u_int)) { error = EINVAL; break; } ifindex = *(mtod(m, u_int *)); if (ifindex < 0 || if_index < ifindex) { error = ENXIO; /* XXX EINVAL? */ break; } ifp = ifindex2ifnet[ifindex]; if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; break; } im6o->im6o_multicast_ifp = ifp; break; case IPV6_MULTICAST_HOPS: { /* * Set the IP6 hoplimit for outgoing multicast packets. */ int optval; if (m == NULL || m->m_len != sizeof(int)) { error = EINVAL; break; } optval = *(mtod(m, u_int *)); if (optval < -1 || optval >= 256) error = EINVAL; else if (optval == -1) im6o->im6o_multicast_hlim = ip6_defmcasthlim; else im6o->im6o_multicast_hlim = optval; break; } case IPV6_MULTICAST_LOOP: /* * Set the loopback flag for outgoing multicast packets. * Must be zero or one. */ if (m == NULL || m->m_len != sizeof(u_int) || (loop = *(mtod(m, u_int *))) > 1) { error = EINVAL; break; } im6o->im6o_multicast_loop = loop; break; case IPV6_JOIN_GROUP: /* * Add a multicast group membership. * Group must be a valid IP6 multicast address. */ if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { error = EINVAL; break; } mreq = mtod(m, struct ipv6_mreq *); if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { /* * We use the unspecified address to specify to accept * all multicast addresses. Only super user is allowed * to do this. */ if (suser(p)) { error = EACCES; break; } } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { error = EINVAL; break; } /* * If the interface is specified, validate it. */ if (mreq->ipv6mr_interface < 0 || if_index < mreq->ipv6mr_interface) { error = ENXIO; /* XXX EINVAL? */ break; } /* * If no interface was explicitly specified, choose an * appropriate one according to the given multicast address. */ if (mreq->ipv6mr_interface == 0) { /* * If the multicast address is in node-local scope, * the interface should be a loopback interface. * Otherwise, look up the routing table for the * address, and choose the outgoing interface. * XXX: is it a good approach? */ if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) { ifp = &loif[0]; } else { ro.ro_rt = NULL; dst = (struct sockaddr_in6 *)&ro.ro_dst; bzero(dst, sizeof(*dst)); dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = mreq->ipv6mr_multiaddr; rtalloc((struct route *)&ro); if (ro.ro_rt == NULL) { error = EADDRNOTAVAIL; break; } ifp = ro.ro_rt->rt_ifp; rtfree(ro.ro_rt); } } else ifp = ifindex2ifnet[mreq->ipv6mr_interface]; /* * See if we found an interface, and confirm that it * supports multicast */ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; break; } /* * Put interface index into the multicast address, * if the address has link-local scope. */ if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) { mreq->ipv6mr_multiaddr.s6_addr16[1] = htons(mreq->ipv6mr_interface); } /* * See if the membership already exists. */ LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) if (imm->i6mm_maddr->in6m_ifp == ifp && IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, &mreq->ipv6mr_multiaddr)) break; if (imm != NULL) { error = EADDRINUSE; break; } /* * Everything looks good; add a new record to the multicast * address list for the given interface. */ imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK); if (imm == NULL) { error = ENOBUFS; break; } if ((imm->i6mm_maddr = in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) { free(imm, M_IPMADDR); break; } LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); break; case IPV6_LEAVE_GROUP: /* * Drop a multicast group membership. * Group must be a valid IP6 multicast address. */ if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { error = EINVAL; break; } mreq = mtod(m, struct ipv6_mreq *); if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { if (suser(p)) { error = EACCES; break; } } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { error = EINVAL; break; } /* * If an interface address was specified, get a pointer * to its ifnet structure. */ if (mreq->ipv6mr_interface < 0 || if_index < mreq->ipv6mr_interface) { error = ENXIO; /* XXX EINVAL? */ break; } ifp = ifindex2ifnet[mreq->ipv6mr_interface]; /* * Put interface index into the multicast address, * if the address has link-local scope. */ if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) { mreq->ipv6mr_multiaddr.s6_addr16[1] = htons(mreq->ipv6mr_interface); } /* * Find the membership in the membership list. */ LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) { if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) && IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, &mreq->ipv6mr_multiaddr)) break; } if (imm == NULL) { /* Unable to resolve interface */ error = EADDRNOTAVAIL; break; } /* * Give up the multicast address record to which the * membership points. */ LIST_REMOVE(imm, i6mm_chain); in6_delmulti(imm->i6mm_maddr); free(imm, M_IPMADDR); break; default: error = EOPNOTSUPP; break; } /* * If all options have default values, no need to keep the mbuf. */ if (im6o->im6o_multicast_ifp == NULL && im6o->im6o_multicast_hlim == ip6_defmcasthlim && im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP && LIST_EMPTY(&im6o->im6o_memberships)) { free(*im6op, M_IPMOPTS); *im6op = NULL; } return(error); } /* * Return the IP6 multicast options in response to user getsockopt(). */ static int ip6_getmoptions(optname, im6o, mp) int optname; register struct ip6_moptions *im6o; register struct mbuf **mp; { u_int *hlim, *loop, *ifindex; *mp = m_get(M_WAIT, MT_HEADER); /*XXX*/ switch (optname) { case IPV6_MULTICAST_IF: ifindex = mtod(*mp, u_int *); (*mp)->m_len = sizeof(u_int); if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) *ifindex = 0; else *ifindex = im6o->im6o_multicast_ifp->if_index; return(0); case IPV6_MULTICAST_HOPS: hlim = mtod(*mp, u_int *); (*mp)->m_len = sizeof(u_int); if (im6o == NULL) *hlim = ip6_defmcasthlim; else *hlim = im6o->im6o_multicast_hlim; return(0); case IPV6_MULTICAST_LOOP: loop = mtod(*mp, u_int *); (*mp)->m_len = sizeof(u_int); if (im6o == NULL) *loop = ip6_defmcasthlim; else *loop = im6o->im6o_multicast_loop; return(0); default: return(EOPNOTSUPP); } } /* * Discard the IP6 multicast options. */ void ip6_freemoptions(im6o) register struct ip6_moptions *im6o; { struct in6_multi_mship *imm; if (im6o == NULL) return; while ((imm = LIST_FIRST(&im6o->im6o_memberships)) != NULL) { LIST_REMOVE(imm, i6mm_chain); if (imm->i6mm_maddr) in6_delmulti(imm->i6mm_maddr); free(imm, M_IPMADDR); } free(im6o, M_IPMOPTS); } /* * Set IPv6 outgoing packet options based on advanced API. */ int ip6_setpktoptions(control, opt, priv) struct mbuf *control; struct ip6_pktopts *opt; int priv; { register struct cmsghdr *cm = 0; if (control == 0 || opt == 0) return(EINVAL); bzero(opt, sizeof(*opt)); opt->ip6po_hlim = -1; /* -1 means to use default hop limit */ /* * XXX: Currently, we assume all the optional information is stored * in a single mbuf. */ if (control->m_next) return(EINVAL); opt->ip6po_m = control; for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len), control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { cm = mtod(control, struct cmsghdr *); if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) return(EINVAL); if (cm->cmsg_level != IPPROTO_IPV6) continue; switch(cm->cmsg_type) { case IPV6_PKTINFO: if (cm->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo))) return(EINVAL); opt->ip6po_pktinfo = (struct in6_pktinfo *)CMSG_DATA(cm); if (opt->ip6po_pktinfo->ipi6_ifindex && IN6_IS_ADDR_LINKLOCAL(&opt->ip6po_pktinfo->ipi6_addr)) opt->ip6po_pktinfo->ipi6_addr.s6_addr16[1] = htons(opt->ip6po_pktinfo->ipi6_ifindex); if (opt->ip6po_pktinfo->ipi6_ifindex > if_index || opt->ip6po_pktinfo->ipi6_ifindex < 0) { return(ENXIO); } if (!IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) { struct ifaddr *ia; struct sockaddr_in6 sin6; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(sin6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = opt->ip6po_pktinfo->ipi6_addr; ia = ifa_ifwithaddr(sin6tosa(&sin6)); if (ia == NULL || (opt->ip6po_pktinfo->ipi6_ifindex && (ia->ifa_ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex))) { return(EADDRNOTAVAIL); } /* * Check if the requested source address is * indeed a unicast address assigned to the * node. */ if (IN6_IS_ADDR_MULTICAST(&opt->ip6po_pktinfo->ipi6_addr)) return(EADDRNOTAVAIL); } break; case IPV6_HOPLIMIT: if (cm->cmsg_len != CMSG_LEN(sizeof(int))) return(EINVAL); opt->ip6po_hlim = *(int *)CMSG_DATA(cm); if (opt->ip6po_hlim < -1 || opt->ip6po_hlim > 255) return(EINVAL); break; case IPV6_NEXTHOP: if (!priv) return(EPERM); if (cm->cmsg_len < sizeof(u_char) || cm->cmsg_len < CMSG_LEN(*CMSG_DATA(cm))) return(EINVAL); opt->ip6po_nexthop = (struct sockaddr *)CMSG_DATA(cm); break; case IPV6_HOPOPTS: if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_hbh))) return(EINVAL); opt->ip6po_hbh = (struct ip6_hbh *)CMSG_DATA(cm); if (cm->cmsg_len != CMSG_LEN((opt->ip6po_hbh->ip6h_len + 1) << 3)) return(EINVAL); break; case IPV6_DSTOPTS: if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_dest))) return(EINVAL); /* * If there is no routing header yet, the destination * options header should be put on the 1st part. * Otherwise, the header should be on the 2nd part. * (See RFC 2460, section 4.1) */ if (opt->ip6po_rthdr == NULL) { opt->ip6po_dest1 = (struct ip6_dest *)CMSG_DATA(cm); if (cm->cmsg_len != CMSG_LEN((opt->ip6po_dest1->ip6d_len + 1) << 3)) return(EINVAL); } else { opt->ip6po_dest2 = (struct ip6_dest *)CMSG_DATA(cm); if (cm->cmsg_len != CMSG_LEN((opt->ip6po_dest2->ip6d_len + 1) << 3)) return(EINVAL); } break; case IPV6_RTHDR: if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_rthdr))) return(EINVAL); opt->ip6po_rthdr = (struct ip6_rthdr *)CMSG_DATA(cm); if (cm->cmsg_len != CMSG_LEN((opt->ip6po_rthdr->ip6r_len + 1) << 3)) return(EINVAL); switch(opt->ip6po_rthdr->ip6r_type) { case IPV6_RTHDR_TYPE_0: if (opt->ip6po_rthdr->ip6r_segleft == 0) return(EINVAL); break; default: return(EINVAL); } break; default: return(ENOPROTOOPT); } } return(0); } /* * Routine called from ip6_output() to loop back a copy of an IP6 multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be &loif -- easier than replicating that code here. */ void ip6_mloopback(ifp, m, dst) struct ifnet *ifp; register struct mbuf *m; register struct sockaddr_in6 *dst; { struct mbuf *copym; copym = m_copy(m, 0, M_COPYALL); if (copym != NULL) { (void)if_simloop(ifp, copym, (struct sockaddr *)dst, NULL); } } /* * Chop IPv6 header off from the payload. */ static int ip6_splithdr(m, exthdrs) struct mbuf *m; struct ip6_exthdrs *exthdrs; { struct mbuf *mh; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); if (m->m_len > sizeof(*ip6)) { MGETHDR(mh, M_DONTWAIT, MT_HEADER); if (mh == 0) { m_freem(m); return ENOBUFS; } M_COPY_PKTHDR(mh, m); MH_ALIGN(mh, sizeof(*ip6)); m->m_flags &= ~M_PKTHDR; m->m_len -= sizeof(*ip6); m->m_data += sizeof(*ip6); mh->m_next = m; m = mh; m->m_len = sizeof(*ip6); bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); } exthdrs->ip6e_ip6 = m; return 0; } /* * Compute IPv6 extension header length. */ int ip6_optlen(in6p) struct in6pcb *in6p; { int len; if (!in6p->in6p_outputopts) return 0; len = 0; #define elen(x) \ (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) len += elen(in6p->in6p_outputopts->ip6po_hbh); len += elen(in6p->in6p_outputopts->ip6po_dest1); len += elen(in6p->in6p_outputopts->ip6po_rthdr); len += elen(in6p->in6p_outputopts->ip6po_dest2); return len; #undef elen }