Index: projects/routing/share/man/man9/rtentry.9 =================================================================== --- projects/routing/share/man/man9/rtentry.9 (revision 274334) +++ projects/routing/share/man/man9/rtentry.9 (revision 274335) @@ -1,252 +1,248 @@ .\" .\" Copyright 1996 Massachusetts Institute of Technology .\" .\" Permission to use, copy, modify, and distribute this software and .\" its documentation for any purpose and without fee is hereby .\" granted, provided that both the above copyright notice and this .\" permission notice appear in all copies, that both the above .\" copyright notice and this permission notice appear in all .\" supporting documentation, and that the name of M.I.T. not be used .\" in advertising or publicity pertaining to distribution of the .\" software without specific, written prior permission. M.I.T. makes .\" no representations about the suitability of this software for any .\" purpose. It is provided "as is" without express or implied .\" warranty. .\" .\" THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS .\" ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, .\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF .\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT .\" SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, .\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT .\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF .\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND .\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, .\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd March 5, 2014 .Dt RTENTRY 9 .Os .Sh NAME .Nm rtentry .Nd structure of an entry in the kernel routing table .Sh SYNOPSIS .In sys/types.h .In sys/socket.h .In net/route.h .Sh DESCRIPTION The kernel provides a common mechanism by which all protocols can store and retrieve entries from a central table of routes. Parts of this mechanism are also used to interact with user-level processes by means of a socket in the .Xr route 4 pseudo-protocol family. The .In net/route.h header file defines the structures and manifest constants used in this facility. .Pp The basic structure of a route is defined by .Vt "struct rtentry" , which includes the following fields: .Bl -tag -offset indent -width 6n .It Vt "struct radix_node rt_nodes[2]" ; Glue used by the radix-tree routines. These members also include in their substructure the key (i.e., destination address) and mask used when the route was created. The .Fn rt_key rt and .Fn rt_mask rt macros can be used to extract this information (in the form of a .Vt "struct sockaddr *" ) given a .Vt "struct rtentry *" . .It Vt "struct sockaddr *rt_gateway" ; The .Dq target of the route, which can either represent a destination in its own right (some protocols will put a link-layer address here), or some intermediate stop on the way to that destination (if the .Dv RTF_GATEWAY flag is set). .It Vt "int rt_flags" ; See below. If the .Dv RTF_UP flag is not present, the .Fn rtfree function will delete the route from the radix tree when the last reference drops. .It Vt "int rt_refcnt" ; Route entries are reference-counted; this field indicates the number of external (to the radix tree) references. .It Vt "struct ifnet *rt_ifp" ; .It Vt "struct ifaddr *rt_ifa" ; These two fields represent the .Dq answer , as it were, to the question posed by a route lookup; that is, they name the interface and interface address to be used in sending a packet to the destination or set of destinations which this route represents. .It Vt "u_long rt_mtu"; See description of rmx_mtu below. .It Vt "u_long rt_weight"; See description of rmx_weight below. .It Vt "u_long rt_expire"; See description of rmx_expire below. -.It Vt "counter64_t rt_pksent"; -See description of rmx_pksent below. .It Vt "struct rtentry *rt_gwroute" ; This member is a reference to a route whose destination is .Va rt_gateway . It is only used for .Dv RTF_GATEWAY routes. .It Vt "struct mtx rt_mtx" ; Mutex to lock this routing entry. .El .Pp The following flag bits are defined: .Bl -tag -offset indent -width ".Dv RTF_BLACKHOLE" -compact .It Dv RTF_UP The route is not deleted. .It Dv RTF_GATEWAY The route points to an intermediate destination and not the ultimate recipient; the .Va rt_gateway and .Va rt_gwroute fields name that destination. .It Dv RTF_HOST This is a host route. .It Dv RTF_REJECT The destination is presently unreachable. This should result in an .Er EHOSTUNREACH error from output routines. .It Dv RTF_DYNAMIC This route was created dynamically by .Fn rtredirect . .It Dv RTF_MODIFIED This route was modified by .Fn rtredirect . .It Dv RTF_DONE Used only in the .Xr route 4 protocol, indicating that the request was executed. .It Dv RTF_XRESOLVE When this route is returned as a result of a lookup, send a report on the .Xr route 4 interface requesting that an external process perform resolution for this route. .It Dv RTF_STATIC Indicates that this route was manually added by means of the .Xr route 8 command. .It Dv RTF_BLACKHOLE Requests that output sent via this route be discarded. .It Dv RTF_PROTO1 .It Dv RTF_PROTO2 .It Dv RTF_PROTO3 Protocol-specific. .It Dv RTF_PINNED Indicates that this route is immutable to a routing protocol. .It Dv RTF_LOCAL Indicates that the destination of this route is an address configured as belonging to this system. .It Dv RTF_BROADCAST Indicates that the destination is a broadcast address. .It Dv RTF_MULTICAST Indicates that the destination is a multicast address. .El .Pp Several metrics are supplied in .Vt "struct rt_metrics" passed with routing control messages via .Xr route 4 API. Currently only -.Vt rmx_mtu , rmx_expire , +.Vt rmx_mtu , and -.Vt rmx_pksent +.Vt rmx_expire metrics are supplied. All others are ignored. .Pp The following metrics are defined by .Vt "struct rt_metrics" : .Bl -tag -offset indent -width 6n .It Vt "u_long rmx_locks" ; Flag bits indicating which metrics the kernel is not permitted to dynamically modify. .It Vt "u_long rmx_mtu" ; MTU for this path. .It Vt "u_long rmx_hopcount" ; Number of intermediate systems on the path to this destination. .It Vt "u_long rmx_expire" ; The time (a la .Xr time 3 ) at which this route should expire, or zero if it should never expire. It is the responsibility of individual protocol suites to ensure that routes are actually deleted once they expire. .It Vt "u_long rmx_recvpipe" ; Nominally, the bandwidth-delay product for the path .Em from the destination .Em to this system. In practice, this value is used to set the size of the receive buffer (and thus the window in sliding-window protocols like .Tn TCP ) . .It Vt "u_long rmx_sendpipe" ; As before, but in the opposite direction. .It Vt "u_long rmx_ssthresh" ; The slow-start threshold used in .Tn TCP congestion-avoidance. .It Vt "u_long rmx_rtt" ; The round-trip time to this destination, in units of .Dv RMX_RTTUNIT per second. .It Vt "u_long rmx_rttvar" ; The average deviation of the round-trip time to this destination, in units of .Dv RMX_RTTUNIT per second. -.It Vt "u_long rmx_pksent" ; -A count of packets successfully sent via this route. .It Vt "u_long rmx_filler[4]" ; .\" XXX badly named Empty space available for protocol-specific information. .El .Sh SEE ALSO .Xr route 4 , .Xr route 8 , .Xr rtalloc 9 .Sh HISTORY The .Vt rtentry structure first appeared in .Bx 4.2 . The radix-tree representation of the routing table and the .Vt rt_metrics structure first appeared in .Bx 4.3 reno . .Sh AUTHORS This manual page was written by .An Garrett Wollman . .Sh BUGS There are a number of historical relics remaining in this interface. The .Va rt_gateway and .Va rmx_filler fields could be named better. Index: projects/routing/sys/net/route.c =================================================================== --- projects/routing/sys/net/route.c (revision 274334) +++ projects/routing/sys/net/route.c (revision 274335) @@ -1,2034 +1,1994 @@ /*- * Copyright (c) 1980, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 * $FreeBSD$ */ /************************************************************************ * Note: In this file a 'fib' is a "forwarding information base" * * Which is the new name for an in kernel routing (next hop) table. * ***********************************************************************/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include "opt_sctp.h" #include "opt_mrouting.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #define RT_MAXFIBS UINT16_MAX /* Kernel config default option. */ #ifdef ROUTETABLES #if ROUTETABLES <= 0 #error "ROUTETABLES defined too low" #endif #if ROUTETABLES > RT_MAXFIBS #error "ROUTETABLES defined too big" #endif #define RT_NUMFIBS ROUTETABLES #endif /* ROUTETABLES */ /* Initialize to default if not otherwise set. */ #ifndef RT_NUMFIBS #define RT_NUMFIBS 1 #endif #if defined(INET) || defined(INET6) #ifdef SCTP extern void sctp_addr_change(struct ifaddr *ifa, int cmd); #endif /* SCTP */ #endif /* This is read-only.. */ u_int rt_numfibs = RT_NUMFIBS; SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, ""); /* * By default add routes to all fibs for new interfaces. * Once this is set to 0 then only allocate routes on interface * changes for the FIB of the caller when adding a new set of addresses * to an interface. XXX this is a shotgun aproach to a problem that needs * a more fine grained solution.. that will come. * XXX also has the problems getting the FIB from curthread which will not * always work given the fib can be overridden and prefixes can be added * from the network stack context. */ VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1; SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(rt_add_addr_allfibs), 0, ""); VNET_DEFINE(struct rtstat, rtstat); #define V_rtstat VNET(rtstat) VNET_DEFINE(struct rib_head *, rt_tables); #define V_rt_tables VNET(rt_tables) VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ #define V_rttrash VNET(rttrash) /* * Convert a 'struct radix_node *' to a 'struct rtentry *'. * The operation can be done safely (in this code) because a * 'struct rtentry' starts with two 'struct radix_node''s, the first * one representing leaf nodes in the routing tree, which is * what the code in radix.c passes us as a 'struct radix_node'. * * But because there are a lot of assumptions in this conversion, * do not cast explicitly, but always use the macro below. */ #define RNTORT(p) ((struct rtentry *)(p)) static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ #define V_rtzone VNET(rtzone) static int rtrequest1_fib_change(struct rib_head *, struct rt_addrinfo *, struct rtentry **, u_int); static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *); static int rt_ifdelroute(struct rtentry *rt, void *arg); /* * handler for net.my_fibnum */ static int sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) { int fibnum; int error; fibnum = curthread->td_proc->p_fibnum; error = sysctl_handle_int(oidp, &fibnum, 0, req); return (error); } SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); static __inline struct rib_head ** rt_tables_get_rnh_ptr(int table, int fam) { struct rib_head **rh; KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.", __func__)); KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.", __func__)); /* rh is [fib=0][af=0]. */ rh = (struct rib_head **)V_rt_tables; /* Get the offset to the requested table and fam. */ rh += table * (AF_MAX+1) + fam; return (rh); } struct rib_head * rt_tables_get_rnh(int table, int fam) { return (*rt_tables_get_rnh_ptr(table, fam)); } /* * route initialization must occur before ip6_init2(), which happenas at * SI_ORDER_MIDDLE. */ static void route_init(void) { /* whack the tunable ints into line. */ if (rt_numfibs > RT_MAXFIBS) rt_numfibs = RT_MAXFIBS; if (rt_numfibs == 0) rt_numfibs = 1; } SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); -static int -rtentry_zinit(void *mem, int size, int how) -{ - struct rtentry *rt = mem; - - rt->rt_pksent = counter_u64_alloc(how); - if (rt->rt_pksent == NULL) - return (ENOMEM); - - RT_LOCK_INIT(rt); - - return (0); -} - static void -rtentry_zfini(void *mem, int size) -{ - struct rtentry *rt = mem; - - RT_LOCK_DESTROY(rt); - counter_u64_free(rt->rt_pksent); -} - -static int -rtentry_ctor(void *mem, int size, void *arg, int how) -{ - struct rtentry *rt = mem; - - bzero(rt, offsetof(struct rtentry, rt_endzero)); - counter_u64_zero(rt->rt_pksent); - - return (0); -} - -static void -rtentry_dtor(void *mem, int size, void *arg) -{ - struct rtentry *rt = mem; - - RT_UNLOCK_COND(rt); -} - -static void vnet_route_init(const void *unused __unused) { struct domain *dom; struct rib_head **rh; int table; int fam; V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO); V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), - rtentry_ctor, rtentry_dtor, - rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0); + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); for (dom = domains; dom; dom = dom->dom_next) { if (dom->dom_rtattach == NULL) continue; for (table = 0; table < rt_numfibs; table++) { fam = dom->dom_family; if (table != 0 && fam != AF_INET6 && fam != AF_INET) break; rh = rt_tables_get_rnh_ptr(table, fam); if (rh == NULL) panic("%s: rh NULL", __func__); dom->dom_rtattach((void **)rh, 0); } } } VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, vnet_route_init, 0); #ifdef VIMAGE static void vnet_route_uninit(const void *unused __unused) { int table; int fam; struct domain *dom; struct rib_head **rh; for (dom = domains; dom; dom = dom->dom_next) { if (dom->dom_rtdetach == NULL) continue; for (table = 0; table < rt_numfibs; table++) { fam = dom->dom_family; if (table != 0 && fam != AF_INET6 && fam != AF_INET) break; rh = rt_tables_get_rnh_ptr(table, fam); if (rh == NULL) panic("%s: rh NULL", __func__); dom->dom_rtdetach((void **)rh, 0); } } free(V_rt_tables, M_RTABLE); uma_zdestroy(V_rtzone); } VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, vnet_route_uninit, 0); #endif struct rib_head * rt_table_init(int offset) { struct rib_head *rh; rh = malloc(sizeof(struct rib_head), M_RTABLE, M_WAITOK | M_ZERO); /* XXX: These details should be hidded inside radix.c */ /* Init masks tree */ rn_inithead_internal(&rh->head, rh->rnh_nodes, offset); rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0); rh->head.s.rnh_masks = &rh->rmhead.head; rh->rmhead.head.s.mask_nodes = rh->rmhead.mask_nodes; /* Init locks */ rw_init(&rh->rib_lock, "rib head"); /* Finally, set base callbacks */ rh->rnh_addaddr = rn_addroute; rh->rnh_deladdr = rn_delete; rh->rnh_matchaddr = rn_match; rh->rnh_lookup = rn_lookup; rh->rnh_walktree = rn_walktree; rh->rnh_walktree_from = rn_walktree_from; return (rh); } void rt_table_destroy(struct rib_head *rh) { /* Assume table is already empty */ rw_destroy(&rh->rib_lock); free(rh, M_RTABLE); } #ifndef _SYS_SYSPROTO_H_ struct setfib_args { int fibnum; }; #endif int sys_setfib(struct thread *td, struct setfib_args *uap) { if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) return EINVAL; td->td_proc->p_fibnum = uap->fibnum; return (0); } /* * Packet routing routines. */ /* * Legacy function for SCTP support. */ void rtalloc_ign(struct route *ro, u_long ignore) { struct rtentry *rt; if ((rt = ro->ro_rt) != NULL) { if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) return; RTFREE(rt); ro->ro_rt = NULL; } ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } /* * Look up the route that matches the address given * Or, at least try.. Create a cloned route if needed. * * The returned route, if any, is locked. */ struct rtentry * rtalloc1(struct sockaddr *dst, int report, u_long ignflags) { return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); } struct rtentry * rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) { struct rib_head *rh; struct radix_node *rn; struct rtentry *newrt; struct rt_addrinfo info; int err = 0, msgtype = RTM_MISS; int needlock; KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); rh = rt_tables_get_rnh(fibnum, dst->sa_family); newrt = NULL; if (rh == NULL) goto miss; /* * Look up the address in the table for that Address Family */ needlock = !(ignflags & RTF_RNH_LOCKED); if (needlock) RIB_RLOCK(rh); #ifdef INVARIANTS else RIB_LOCK_ASSERT(rh); #endif rn = rh->rnh_matchaddr(dst, &rh->head); if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { newrt = RNTORT(rn); RT_LOCK(newrt); RT_ADDREF(newrt); if (needlock) RIB_RUNLOCK(rh); goto done; } else if (needlock) RIB_RUNLOCK(rh); /* * Either we hit the root or couldn't find any match, * Which basically means * "caint get there frm here" */ miss: V_rtstat.rts_unreach++; if (report) { /* * If required, report the failure to the supervising * Authorities. * For a delete, this is not an error. (report == 0) */ bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; rt_missmsg_fib(msgtype, &info, 0, err, fibnum); } done: if (newrt) RT_LOCK_ASSERT(newrt); return (newrt); } /* * Remove a reference count from an rtentry. * If the count gets low enough, take it out of the routing table */ void rtfree(struct rtentry *rt) { struct rib_head *rh; KASSERT(rt != NULL,("%s: NULL rt", __func__)); rh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); KASSERT(rh != NULL,("%s: NULL rh", __func__)); RT_LOCK_ASSERT(rt); /* * The callers should use RTFREE_LOCKED() or RTFREE(), so * we should come here exactly with the last reference. */ RT_REMREF(rt); if (rt->rt_refcnt > 0) { log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); goto done; } /* * On last reference give the "close method" a chance * to cleanup private state. This also permits (for * IPv4 and IPv6) a chance to decide if the routing table * entry should be purged immediately or at a later time. * When an immediate purge is to happen the close routine * typically calls rtexpunge which clears the RTF_UP flag * on the entry so that the code below reclaims the storage. */ if (rt->rt_refcnt == 0 && rh->rnh_close) rh->rnh_close((struct radix_node *)rt, &rh->head); /* * If we are no longer "up" (and ref == 0) * then we can free the resources associated * with the route. */ if ((rt->rt_flags & RTF_UP) == 0) { if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic("rtfree 2"); /* * the rtentry must have been removed from the routing table * so it is represented in rttrash.. remove that now. */ V_rttrash--; #ifdef DIAGNOSTIC if (rt->rt_refcnt < 0) { printf("rtfree: %p not freed (neg refs)\n", rt); goto done; } #endif /* * release references on items we hold them on.. * e.g other routes and ifaddrs. */ if (rt->rt_ifa) ifa_free(rt->rt_ifa); /* * The key is separatly alloc'd so free it (see rt_setgate()). * This also frees the gateway, as they are always malloc'd * together. */ Free(rt_key(rt)); /* * and the rtentry itself of course */ uma_zfree(V_rtzone, rt); return; } done: RT_UNLOCK(rt); } /* * Force a routing table entry to the specified * destination to go through the given gateway. * Normally called as a result of a routing redirect * message from the network layer. */ void rtredirect(struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct sockaddr *src) { rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB); } void rtredirect_fib(struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct sockaddr *src, u_int fibnum) { struct rtentry *rt, *rt0 = NULL; int error = 0; short *stat = NULL; struct rt_addrinfo info; struct ifaddr *ifa; struct rib_head *rh; ifa = NULL; rh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rh == NULL) { error = EAFNOSUPPORT; goto out; } /* verify the gateway is directly reachable */ if ((ifa = ifa_ifwithnet(gateway, 0, fibnum)) == NULL) { error = ENETUNREACH; goto out; } rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ /* * If the redirect isn't from our current router for this dst, * it's either old or wrong. If it redirects us to ourselves, * we have a routing loop, perhaps as a result of an interface * going down recently. */ if (!(flags & RTF_DONE) && rt && (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) error = EINVAL; else if (ifa_ifwithaddr_check(gateway)) error = EHOSTUNREACH; if (error) goto done; /* * Create a new entry if we just got back a wildcard entry * or the lookup failed. This is necessary for hosts * which use routing redirects generated by smart gateways * to dynamically build the routing tables. */ if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) goto create; /* * Don't listen to the redirect if it's * for a route to an interface. */ if (rt->rt_flags & RTF_GATEWAY) { if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { /* * Changing from route to net => route to host. * Create new route, rather than smashing route to net. */ create: rt0 = rt; rt = NULL; flags |= RTF_GATEWAY | RTF_DYNAMIC; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_ifa = ifa; info.rti_flags = flags; if (rt0 != NULL) RT_UNLOCK(rt0); /* drop lock to avoid LOR with rh */ error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); if (rt != NULL) { RT_LOCK(rt); if (rt0 != NULL) EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); flags = rt->rt_flags; } if (rt0 != NULL) RTFREE(rt0); stat = &V_rtstat.rts_dynamic; } else { struct rtentry *gwrt; /* * Smash the current notion of the gateway to * this destination. Should check about netmask!!! */ rt->rt_flags |= RTF_MODIFIED; flags |= RTF_MODIFIED; stat = &V_rtstat.rts_newgateway; /* * add the key and gateway (in one malloc'd chunk). */ RT_UNLOCK(rt); RIB_WLOCK(rh); RT_LOCK(rt); rt_setgate(rt, rt_key(rt), gateway); gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED); RIB_WUNLOCK(rh); EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst); RTFREE_LOCKED(gwrt); } } else error = EHOSTUNREACH; done: if (rt) RTFREE_LOCKED(rt); out: if (error) V_rtstat.rts_badredirect++; else if (stat != NULL) (*stat)++; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_info[RTAX_AUTHOR] = src; rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); if (ifa != NULL) ifa_free(ifa); } int rtioctl(u_long req, caddr_t data) { return (rtioctl_fib(req, data, RT_DEFAULT_FIB)); } /* * Routing table ioctl interface. */ int rtioctl_fib(u_long req, caddr_t data, u_int fibnum) { /* * If more ioctl commands are added here, make sure the proper * super-user checks are being performed because it is possible for * prison-root to make it this far if raw sockets have been enabled * in jails. */ #ifdef INET /* Multicast goop, grrr... */ return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; #else /* INET */ return ENXIO; #endif /* INET */ } struct ifaddr * ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway, u_int fibnum) { struct ifaddr *ifa; int not_found = 0; if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, * and the interface is a pt to pt link * we should search for the destination * as our clue to the interface. Otherwise * we can use the local address. */ ifa = NULL; if (flags & RTF_HOST) ifa = ifa_ifwithdstaddr(dst, fibnum); if (ifa == NULL) ifa = ifa_ifwithaddr(gateway); } else { /* * If we are adding a route to a remote net * or host, the gateway may still be on the * other end of a pt to pt link. */ ifa = ifa_ifwithdstaddr(gateway, fibnum); } if (ifa == NULL) ifa = ifa_ifwithnet(gateway, 0, fibnum); if (ifa == NULL) { struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum); if (rt == NULL) return (NULL); /* * dismiss a gateway that is reachable only * through the default router */ switch (gateway->sa_family) { case AF_INET: if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) not_found = 1; break; case AF_INET6: if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr)) not_found = 1; break; default: break; } if (!not_found && rt->rt_ifa != NULL) { ifa = rt->rt_ifa; ifa_ref(ifa); } RT_REMREF(rt); RT_UNLOCK(rt); if (not_found || ifa == NULL) return (NULL); } if (ifa->ifa_addr->sa_family != dst->sa_family) { struct ifaddr *oifa = ifa; ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); if (ifa == NULL) ifa = oifa; else ifa_free(oifa); } return (ifa); } /* * Do appropriate manipulations of a routing tree given * all the bits of info needed */ int rtrequest(int req, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) { return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, RT_DEFAULT_FIB)); } int rtrequest_fib(int req, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct rtentry **ret_nrt, u_int fibnum) { struct rt_addrinfo info; if (dst->sa_len == 0) return(EINVAL); bzero((caddr_t)&info, sizeof(info)); info.rti_flags = flags; info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; return rtrequest1_fib(req, &info, ret_nrt, fibnum); } void rt_foreach_fib(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f, void *arg) { struct rib_head *rh; uint32_t fibnum; int i; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { /* Do we want some specific family? */ if (af != AF_UNSPEC) { rh = rt_tables_get_rnh(fibnum, af); if (rh == NULL) continue; if (setwa_f != NULL) setwa_f(rh, fibnum, i, arg); RIB_WLOCK(rh); rh->rnh_walktree(&rh->head, (walktree_f_t *)wa_f, arg); RIB_WUNLOCK(rh); continue; } for (i = 1; i <= AF_MAX; i++) { rh = rt_tables_get_rnh(fibnum, i); if (rh == NULL) continue; if (setwa_f != NULL) setwa_f(rh, fibnum, i, arg); RIB_WLOCK(rh); rh->rnh_walktree(&rh->head, (walktree_f_t *)wa_f, arg); RIB_WUNLOCK(rh); } } } /* * Delete Routes for a Network Interface * * Called for each routing entry via the rh->rnh_walktree() call above * to delete all route entries referencing a detaching network interface. * * Arguments: * rt pointer to rtentry * arg argument passed to rh->rnh_walktree() - detaching interface * * Returns: * 0 successful * errno failed - reason indicated */ static int rt_ifdelroute(struct rtentry *rt, void *arg) { struct ifnet *ifp = arg; int err; if (rt->rt_ifp != ifp) return (0); /* * Protect (sorta) against walktree recursion problems * with cloned routes */ if ((rt->rt_flags & RTF_UP) == 0) return (0); err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags | RTF_RNH_LOCKED | RTF_PINNED, (struct rtentry **) NULL, rt->rt_fibnum); if (err != 0) log(LOG_WARNING, "rt_ifdelroute: error %d\n", err); return (0); } /* * Delete all remaining routes using this interface * Unfortuneatly the only way to do this is to slog through * the entire routing table looking for routes which point * to this interface...oh well... */ void rt_flushifroutes(struct ifnet *ifp) { rt_foreach_fib(AF_UNSPEC, NULL, rt_ifdelroute, ifp); } /* * These (questionable) definitions of apparent local variables apply * to the next two functions. XXXXXX!!! */ #define dst info->rti_info[RTAX_DST] #define gateway info->rti_info[RTAX_GATEWAY] #define netmask info->rti_info[RTAX_NETMASK] #define ifaaddr info->rti_info[RTAX_IFA] #define ifpaddr info->rti_info[RTAX_IFP] #define flags info->rti_flags int rt_getifa(struct rt_addrinfo *info) { return (rt_getifa_fib(info, RT_DEFAULT_FIB)); } /* * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, * it will be referenced so the caller must free it. */ int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) { struct ifaddr *ifa; int error = 0; /* * ifp may be specified by sockaddr_dl * when protocol address is ambiguous. */ if (info->rti_ifp == NULL && ifpaddr != NULL && ifpaddr->sa_family == AF_LINK && (ifa = ifa_ifwithnet(ifpaddr, 0, fibnum)) != NULL) { info->rti_ifp = ifa->ifa_ifp; ifa_free(ifa); } if (info->rti_ifa == NULL && ifaaddr != NULL) info->rti_ifa = ifa_ifwithaddr(ifaaddr); if (info->rti_ifa == NULL) { struct sockaddr *sa; sa = ifaaddr != NULL ? ifaaddr : (gateway != NULL ? gateway : dst); if (sa != NULL && info->rti_ifp != NULL) info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); else if (dst != NULL && gateway != NULL) info->rti_ifa = ifa_ifwithroute(flags, dst, gateway, fibnum); else if (sa != NULL) info->rti_ifa = ifa_ifwithroute(flags, sa, sa, fibnum); } if ((ifa = info->rti_ifa) != NULL) { if (info->rti_ifp == NULL) info->rti_ifp = ifa->ifa_ifp; } else error = ENETUNREACH; return (error); } /* * Expunges references to a route that's about to be reclaimed. * The route must be locked. */ int rt_expunge(struct rib_head *rh, struct rtentry *rt) { #if !defined(RADIX_MPATH) struct radix_node *rn; #else struct rt_addrinfo info; int fib; struct rtentry *rt0; #endif struct ifaddr *ifa; int error = 0; RT_LOCK_ASSERT(rt); RIB_LOCK_ASSERT(rh); #ifdef RADIX_MPATH fib = rt->rt_fibnum; bzero(&info, sizeof(info)); info.rti_ifp = rt->rt_ifp; info.rti_flags = RTF_RNH_LOCKED; info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr; RT_UNLOCK(rt); error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib); if (error == 0 && rt0 != NULL) { rt = rt0; RT_LOCK(rt); } else if (error != 0) { RT_LOCK(rt); return (error); } #else /* * Remove the item from the tree; it should be there, * but when callers invoke us blindly it may not (sigh). */ rn = rh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rh->head); if (rn == NULL) { error = ESRCH; goto bad; } KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0, ("unexpected flags 0x%x", rn->rn_flags)); KASSERT(rt == RNTORT(rn), ("lookup mismatch, rt %p rn %p", rt, rn)); #endif /* RADIX_MPATH */ rt->rt_flags &= ~RTF_UP; /* * Give the protocol a chance to keep things in sync. */ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) { struct rt_addrinfo info; bzero((caddr_t)&info, sizeof(info)); info.rti_flags = rt->rt_flags; info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); ifa->ifa_rtrequest(RTM_DELETE, rt, &info); } /* * one more rtentry floating around that is not * linked to the routing table. */ V_rttrash++; #if !defined(RADIX_MPATH) bad: #endif return (error); } #if 0 int p_sockaddr(char *buf, int buflen, struct sockaddr *s); int rt_print(char *buf, int buflen, struct rtentry *rt); int p_sockaddr(char *buf, int buflen, struct sockaddr *s) { void *paddr = NULL; switch (s->sa_family) { case AF_INET: paddr = &((struct sockaddr_in *)s)->sin_addr; break; case AF_INET6: paddr = &((struct sockaddr_in6 *)s)->sin6_addr; break; } if (paddr == NULL) return (0); if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL) return (0); return (strlen(buf)); } int rt_print(char *buf, int buflen, struct rtentry *rt) { struct sockaddr *addr, *mask; int i = 0; addr = rt_key(rt); mask = rt_mask(rt); i = p_sockaddr(buf, buflen, addr); if (!(rt->rt_flags & RTF_HOST)) { buf[i++] = '/'; i += p_sockaddr(buf + i, buflen - i, mask); } if (rt->rt_flags & RTF_GATEWAY) { buf[i++] = '>'; i += p_sockaddr(buf + i, buflen - i, rt->rt_gateway); } return (i); } #endif #ifdef RADIX_MPATH static int rn_mpath_update(int req, struct rt_addrinfo *info, struct rib_head *rh, struct rtentry **ret_nrt) { /* * if we got multipath routes, we require users to specify * a matching RTAX_GATEWAY. */ struct rtentry *rt, *rto = NULL; struct radix_node *rn; int error = 0; rn = rh->rnh_lookup(dst, netmask, rh); if (rn == NULL) return (ESRCH); rto = rt = RNTORT(rn); rt = rt_mpath_matchgate(rt, gateway); if (rt == NULL) return (ESRCH); /* * this is the first entry in the chain */ if (rto == rt) { rn = rn_mpath_next((struct radix_node *)rt); /* * there is another entry, now it's active */ if (rn) { rto = RNTORT(rn); RT_LOCK(rto); rto->rt_flags |= RTF_UP; RT_UNLOCK(rto); } else if (rt->rt_flags & RTF_GATEWAY) { /* * For gateway routes, we need to * make sure that we we are deleting * the correct gateway. * rt_mpath_matchgate() does not * check the case when there is only * one route in the chain. */ if (gateway && (rt->rt_gateway->sa_len != gateway->sa_len || memcmp(rt->rt_gateway, gateway, gateway->sa_len))) error = ESRCH; else { /* * remove from tree before returning it * to the caller */ rn = rh->rnh_deladdr(dst, netmask, rh); KASSERT(rt == RNTORT(rn), ("radix node disappeared")); goto gwdelete; } } /* * use the normal delete code to remove * the first entry */ if (req != RTM_DELETE) goto nondelete; error = ENOENT; goto done; } /* * if the entry is 2nd and on up */ if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt)) panic ("rtrequest1: rt_mpath_deldup"); gwdelete: RT_LOCK(rt); RT_ADDREF(rt); if (req == RTM_DELETE) { rt->rt_flags &= ~RTF_UP; /* * One more rtentry floating around that is not * linked to the routing table. rttrash will be decremented * when RTFREE(rt) is eventually called. */ V_rttrash++; } nondelete: if (req != RTM_DELETE) panic("unrecognized request %d", req); /* * If the caller wants it, then it can have it, * but it's up to it to free the rtentry as we won't be * doing it. */ if (ret_nrt) { *ret_nrt = rt; RT_UNLOCK(rt); } else RTFREE_LOCKED(rt); done: return (error); } #endif int rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) { int error = 0, needlock = 0; struct rtentry *rt; #ifdef FLOWTABLE struct rtentry *rt0; #endif struct radix_node *rn; struct rib_head *rh; struct ifaddr *ifa; struct sockaddr *ndst; struct sockaddr_storage mdst; #define senderr(x) { error = x ; goto bad; } KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We support multiple FIBs. */ break; default: fibnum = RT_DEFAULT_FIB; break; } /* * Find the correct routing tree to use for this Address Family */ rh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rh == NULL) return (EAFNOSUPPORT); needlock = ((flags & RTF_RNH_LOCKED) == 0); flags &= ~RTF_RNH_LOCKED; if (needlock) RIB_WLOCK(rh); else RIB_LOCK_ASSERT(rh); /* * If we are adding a host route then we don't want to put * a netmask in the tree, nor do we want to clone it. */ if (flags & RTF_HOST) netmask = NULL; switch (req) { case RTM_DELETE: if (netmask) { rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); dst = (struct sockaddr *)&mdst; } #ifdef RADIX_MPATH if (rn_mpath_capable(rh)) { error = rn_mpath_update(req, info, rh, ret_nrt); /* * "bad" holds true for the success case * as well */ if (error != ENOENT) goto bad; error = 0; } #endif if ((flags & RTF_PINNED) == 0) { /* Check if target route can be deleted */ rt = (struct rtentry *)rh->rnh_lookup(dst, netmask, &rh->head); if ((rt != NULL) && (rt->rt_flags & RTF_PINNED)) senderr(EADDRINUSE); } /* * Remove the item from the tree and return it. * Complain if it is not there and do no more processing. */ rn = rh->rnh_deladdr(dst, netmask, &rh->head); if (rn == NULL) senderr(ESRCH); if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic ("rtrequest delete"); rt = RNTORT(rn); RT_LOCK(rt); RT_ADDREF(rt); rt->rt_flags &= ~RTF_UP; /* * give the protocol a chance to keep things in sync. */ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) ifa->ifa_rtrequest(RTM_DELETE, rt, info); /* * One more rtentry floating around that is not * linked to the routing table. rttrash will be decremented * when RTFREE(rt) is eventually called. */ V_rttrash++; /* * If the caller wants it, then it can have it, * but it's up to it to free the rtentry as we won't be * doing it. */ if (ret_nrt) { *ret_nrt = rt; RT_UNLOCK(rt); } else RTFREE_LOCKED(rt); break; case RTM_RESOLVE: /* * resolve was only used for route cloning * here for compat */ break; case RTM_ADD: if ((flags & RTF_GATEWAY) && !gateway) senderr(EINVAL); if (dst && gateway && (dst->sa_family != gateway->sa_family) && (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) senderr(EINVAL); if (info->rti_ifa == NULL) { error = rt_getifa_fib(info, fibnum); if (error) senderr(error); } else ifa_ref(info->rti_ifa); ifa = info->rti_ifa; - rt = uma_zalloc(V_rtzone, M_NOWAIT); + rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); if (rt == NULL) { ifa_free(ifa); senderr(ENOBUFS); } + RT_LOCK_INIT(rt); rt->rt_flags = RTF_UP | flags; rt->rt_fibnum = fibnum; /* * Add the gateway. Possibly re-malloc-ing the storage for it. */ RT_LOCK(rt); if ((error = rt_setgate(rt, dst, gateway)) != 0) { + RT_LOCK_DESTROY(rt); ifa_free(ifa); uma_zfree(V_rtzone, rt); senderr(error); } /* * point to the (possibly newly malloc'd) dest address. */ ndst = (struct sockaddr *)rt_key(rt); /* * make sure it contains the value we want (masked if needed). */ if (netmask) { rt_maskedcopy(dst, ndst, netmask); } else bcopy(dst, ndst, dst->sa_len); /* * We use the ifa reference returned by rt_getifa_fib(). * This moved from below so that rh->rnh_addaddr() can * examine the ifa and ifa->ifa_ifp if it so desires. */ rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; rt->rt_weight = 1; rt_setmetrics(info, rt); #ifdef RADIX_MPATH /* do not permit exactly the same dst/mask/gw pair */ if (rn_mpath_capable(rh) && rt_mpath_conflict(rh, rt, netmask)) { ifa_free(rt->rt_ifa); Free(rt_key(rt)); + RT_LOCK_DESTROY(rt); uma_zfree(V_rtzone, rt); senderr(EEXIST); } #endif #ifdef FLOWTABLE rt0 = NULL; /* "flow-table" only supports IPv6 and IPv4 at the moment. */ switch (dst->sa_family) { #ifdef INET6 case AF_INET6: #endif #ifdef INET case AF_INET: #endif #if defined(INET6) || defined(INET) rn = rh->rnh_matchaddr(dst, rh); if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { struct sockaddr *mask; u_char *m, *n; int len; /* * compare mask to see if the new route is * more specific than the existing one */ rt0 = RNTORT(rn); RT_LOCK(rt0); RT_ADDREF(rt0); RT_UNLOCK(rt0); /* * A host route is already present, so * leave the flow-table entries as is. */ if (rt0->rt_flags & RTF_HOST) { RTFREE(rt0); rt0 = NULL; } else if (!(flags & RTF_HOST) && netmask) { mask = rt_mask(rt0); len = mask->sa_len; m = (u_char *)mask; n = (u_char *)netmask; while (len-- > 0) { if (*n != *m) break; n++; m++; } if (len == 0 || (*n < *m)) { RTFREE(rt0); rt0 = NULL; } } } #endif/* INET6 || INET */ } #endif /* FLOWTABLE */ /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rh->rnh_addaddr(ndst, netmask, &rh->head, rt->rt_nodes); /* * If it still failed to go into the tree, * then un-make it (this should be a function) */ if (rn == NULL) { ifa_free(rt->rt_ifa); Free(rt_key(rt)); uma_zfree(V_rtzone, rt); #ifdef FLOWTABLE if (rt0 != NULL) RTFREE(rt0); #endif senderr(EEXIST); } #ifdef FLOWTABLE else if (rt0 != NULL) { flowtable_route_flush(dst->sa_family, rt0); RTFREE(rt0); } #endif /* * If this protocol has something to add to this then * allow it to do that as well. */ if (ifa->ifa_rtrequest) ifa->ifa_rtrequest(req, rt, info); /* * actually return a resultant rtentry and * give the caller a single reference. */ if (ret_nrt) { *ret_nrt = rt; RT_ADDREF(rt); } RT_UNLOCK(rt); break; case RTM_CHANGE: error = rtrequest1_fib_change(rh, info, ret_nrt, fibnum); break; default: error = EOPNOTSUPP; } bad: if (needlock) RIB_WUNLOCK(rh); return (error); #undef senderr } #undef dst #undef gateway #undef netmask #undef ifaaddr #undef ifpaddr #undef flags static int rtrequest1_fib_change(struct rib_head *rh, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) { struct rtentry *rt = NULL; int error = 0; int free_ifa = 0; int family, mtu; rt = (struct rtentry *)rh->rnh_lookup(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], &rh->head); if (rt == NULL) return (ESRCH); #ifdef RADIX_MPATH /* * If we got multipath routes, * we require users to specify a matching RTAX_GATEWAY. */ if (rn_mpath_capable(rh)) { rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); if (rt == NULL) return (ESRCH); } #endif RT_LOCK(rt); rt_setmetrics(info, rt); /* * New gateway could require new ifaddr, ifp; * flags may also be different; ifp may be specified * by ll sockaddr when protocol address is ambiguous */ if (((rt->rt_flags & RTF_GATEWAY) && info->rti_info[RTAX_GATEWAY] != NULL) || info->rti_info[RTAX_IFP] != NULL || (info->rti_info[RTAX_IFA] != NULL && !sa_equal(info->rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) { error = rt_getifa_fib(info, fibnum); if (info->rti_ifa != NULL) free_ifa = 1; if (error != 0) goto bad; } /* Check if outgoing interface has changed */ if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa && rt->rt_ifa != NULL && rt->rt_ifa->ifa_rtrequest != NULL) { rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, info); ifa_free(rt->rt_ifa); } /* Update gateway address */ if (info->rti_info[RTAX_GATEWAY] != NULL) { error = rt_setgate(rt, rt_key(rt), info->rti_info[RTAX_GATEWAY]); if (error != 0) goto bad; rt->rt_flags &= ~RTF_GATEWAY; rt->rt_flags |= (RTF_GATEWAY & info->rti_flags); } if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa) { ifa_ref(info->rti_ifa); rt->rt_ifa = info->rti_ifa; rt->rt_ifp = info->rti_ifp; } /* Allow some flags to be toggled on change. */ rt->rt_flags &= ~RTF_FMASK; rt->rt_flags |= info->rti_flags & RTF_FMASK; if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest != NULL) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info); /* Ensure route MTU is not bigger than interface MTU */ if (rt->rt_ifp != NULL) { family = info->rti_info[RTAX_DST]->sa_family; mtu = if_getmtu_family(rt->rt_ifp, family); if (rt->rt_mtu > mtu) rt->rt_mtu = mtu; } if (ret_nrt) { *ret_nrt = rt; RT_ADDREF(rt); } bad: RT_UNLOCK(rt); if (free_ifa != 0) ifa_free(info->rti_ifa); return (error); } static void rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt) { if (info->rti_mflags & RTV_MTU) rt->rt_mtu = info->rti_rmx->rmx_mtu; if (info->rti_mflags & RTV_WEIGHT) rt->rt_weight = info->rti_rmx->rmx_weight; /* Kernel -> userland timebase conversion. */ if (info->rti_mflags & RTV_EXPIRE) rt->rt_expire = info->rti_rmx->rmx_expire ? info->rti_rmx->rmx_expire - time_second + time_uptime : 0; } int rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) { /* XXX dst may be overwritten, can we move this to below */ int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); #ifdef INVARIANTS struct rib_head *rh; rh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family); #endif RT_LOCK_ASSERT(rt); RIB_LOCK_ASSERT(rh); /* * Prepare to store the gateway in rt->rt_gateway. * Both dst and gateway are stored one after the other in the same * malloc'd chunk. If we have room, we can reuse the old buffer, * rt_gateway already points to the right place. * Otherwise, malloc a new block and update the 'dst' address. */ if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) { caddr_t new; R_Malloc(new, caddr_t, dlen + glen); if (new == NULL) return ENOBUFS; /* * XXX note, we copy from *dst and not *rt_key(rt) because * rt_setgate() can be called to initialize a newly * allocated route entry, in which case rt_key(rt) == NULL * (and also rt->rt_gateway == NULL). * Free()/free() handle a NULL argument just fine. */ bcopy(dst, new, dlen); Free(rt_key(rt)); /* free old block, if any */ rt_key(rt) = (struct sockaddr *)new; rt->rt_gateway = (struct sockaddr *)(new + dlen); } /* * Copy the new gateway value into the memory chunk. */ bcopy(gate, rt->rt_gateway, glen); return (0); } void rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) { u_char *cp1 = (u_char *)src; u_char *cp2 = (u_char *)dst; u_char *cp3 = (u_char *)netmask; u_char *cplim = cp2 + *cp3; u_char *cplim2 = cp2 + *cp1; *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ cp3 += 2; if (cplim > cplim2) cplim = cplim2; while (cp2 < cplim) *cp2++ = *cp1++ & *cp3++; if (cp2 < cplim2) bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); } /* * Set up a routing table entry, normally * for an interface. */ #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ static inline int rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; struct rt_addrinfo info; int error = 0; int startfib, endfib; char tempbuf[_SOCKADDR_TMPSIZE]; int didwork = 0; int a_failure = 0; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; struct rib_head *rh; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; netmask = NULL; } else { dst = ifa->ifa_addr; netmask = ifa->ifa_netmask; } if (dst->sa_len == 0) return(EINVAL); switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We support multiple FIBs. */ break; default: fibnum = RT_DEFAULT_FIB; break; } if (fibnum == RT_ALL_FIBS) { if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) startfib = endfib = ifa->ifa_ifp->if_fib; else { startfib = 0; endfib = rt_numfibs - 1; } } else { KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); startfib = fibnum; endfib = fibnum; } /* * If it's a delete, check that if it exists, * it's on the correct interface or we might scrub * a route to another ifa which would * be confusing at best and possibly worse. */ if (cmd == RTM_DELETE) { /* * It's a delete, so it should already exist.. * If it's a net, mask off the host bits * (Assuming we have a mask) * XXX this is kinda inet specific.. */ if (netmask != NULL) { rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); dst = (struct sockaddr *)tempbuf; } } /* * Now go through all the requested tables (fibs) and do the * requested action. Realistically, this will either be fib 0 * for protocols that don't do multiple tables or all the * tables for those that do. */ for ( fibnum = startfib; fibnum <= endfib; fibnum++) { if (cmd == RTM_DELETE) { struct radix_node *rn; /* * Look up an rtentry that is in the routing tree and * contains the correct info. */ rh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rh == NULL) /* this table doesn't exist but others might */ continue; RIB_RLOCK(rh); rn = rh->rnh_lookup(dst, netmask, &rh->head); #ifdef RADIX_MPATH if (rn_mpath_capable(rh)) { if (rn == NULL) error = ESRCH; else { rt = RNTORT(rn); /* * for interface route the * rt->rt_gateway is sockaddr_intf * for cloning ARP entries, so * rt_mpath_matchgate must use the * interface address */ rt = rt_mpath_matchgate(rt, ifa->ifa_addr); if (rt == NULL) error = ESRCH; } } #endif error = (rn == NULL || (rn->rn_flags & RNF_ROOT) || RNTORT(rn)->rt_ifa != ifa); RIB_RUNLOCK(rh); if (error) { /* this is only an error if bad on ALL tables */ continue; } } /* * Do the actual request */ bzero((caddr_t)&info, sizeof(info)); info.rti_ifa = ifa; info.rti_flags = flags | (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; info.rti_info[RTAX_DST] = dst; /* * doing this for compatibility reasons */ if (cmd == RTM_ADD) info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; else info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = netmask; error = rtrequest1_fib(cmd, &info, &rt, fibnum); if ((error == EEXIST) && (cmd == RTM_ADD)) { /* * Interface route addition failed. * Atomically delete current prefix generating * RTM_DELETE message, and retry adding * interface prefix. */ rh = rt_tables_get_rnh(fibnum, dst->sa_family); RIB_WLOCK(rh); /* Delete old prefix */ info.rti_ifa = NULL; info.rti_flags = RTF_RNH_LOCKED; error = rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); if (error == 0) { info.rti_ifa = ifa; info.rti_flags = flags | RTF_RNH_LOCKED | (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; error = rtrequest1_fib(cmd, &info, &rt, fibnum); } RIB_WUNLOCK(rh); } if (error == 0 && rt != NULL) { /* * notify any listening routing agents of the change */ RT_LOCK(rt); #ifdef RADIX_MPATH /* * in case address alias finds the first address * e.g. ifconfig bge0 192.0.2.246/24 * e.g. ifconfig bge0 192.0.2.247/24 * the address set in the route is 192.0.2.246 * so we need to replace it with 192.0.2.247 */ if (memcmp(rt->rt_ifa->ifa_addr, ifa->ifa_addr, ifa->ifa_addr->sa_len)) { ifa_free(rt->rt_ifa); ifa_ref(ifa); rt->rt_ifp = ifa->ifa_ifp; rt->rt_ifa = ifa; } #endif /* * doing this for compatibility reasons */ if (cmd == RTM_ADD) { ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = rt->rt_ifp->if_type; ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = rt->rt_ifp->if_index; } RT_ADDREF(rt); RT_UNLOCK(rt); rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum); RT_LOCK(rt); RT_REMREF(rt); if (cmd == RTM_DELETE) { /* * If we are deleting, and we found an entry, * then it's been removed from the tree.. * now throw it away. */ RTFREE_LOCKED(rt); } else { if (cmd == RTM_ADD) { /* * We just wanted to add it.. * we don't actually need a reference. */ RT_REMREF(rt); } RT_UNLOCK(rt); } didwork = 1; } if (error) a_failure = error; } if (cmd == RTM_DELETE) { if (didwork) { error = 0; } else { /* we only give an error if it wasn't in any table */ error = ((flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH); } } else { if (a_failure) { /* return an error if any of them failed */ error = a_failure; } } return (error); } /* * Set up a routing table entry, normally * for an interface. */ int rtinit(struct ifaddr *ifa, int cmd, int flags) { struct sockaddr *dst; int fib = RT_DEFAULT_FIB; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; } else { dst = ifa->ifa_addr; } switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We do support multiple FIBs. */ fib = RT_ALL_FIBS; break; } return (rtinit1(ifa, cmd, flags, fib)); } /* * Announce interface address arrival/withdraw * Returns 0 on success. */ int rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %d", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); #if defined(INET) || defined(INET6) #ifdef SCTP /* * notify the SCTP stack * this will only get called when an address is added/deleted * XXX pass the ifaddr struct instead if ifa->ifa_addr... */ sctp_addr_change(ifa, cmd); #endif /* SCTP */ #endif return (rtsock_addrmsg(cmd, ifa, fibnum)); } /* * Announce route addition/removal. * Users of this function MUST validate input data BEFORE calling. * However we have to be able to handle invalid data: * if some userland app sends us "invalid" route message (invalid mask, * no dst, wrong address families, etc...) we need to pass it back * to app (and any other rtsock consumers) with rtm_errno field set to * non-zero value. * Returns 0 on success. */ int rt_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %d", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__)); return (rtsock_routemsg(cmd, ifp, error, rt, fibnum)); } void rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) { rt_newaddrmsg_fib(cmd, ifa, error, rt, RT_ALL_FIBS); } /* * This is called to generate messages from the routing socket * indicating a network interface has had addresses associated with it. */ void rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %u", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); if (cmd == RTM_ADD) { rt_addrmsg(cmd, ifa, fibnum); if (rt != NULL) rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum); } else { if (rt != NULL) rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum); rt_addrmsg(cmd, ifa, fibnum); } } Index: projects/routing/sys/net/route.h =================================================================== --- projects/routing/sys/net/route.h (revision 274334) +++ projects/routing/sys/net/route.h (revision 274335) @@ -1,374 +1,373 @@ /*- * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.h 8.4 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ -#include #include /* * Kernel resident routing tables. * * The routing tables are initialized when interface addresses * are set by making entries for all directly connected interfaces. */ /* * Legacy structure. */ struct route { struct rtentry *ro_rt; struct llentry *ro_lle; struct in_ifaddr *ro_ia; int ro_flags; struct sockaddr ro_dst; }; struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ u_long rmx_mtu; /* MTU for this path */ u_long rmx_hopcount; /* max hops expected */ u_long rmx_expire; /* lifetime for route, e.g. redirect */ u_long rmx_recvpipe; /* inbound delay-bandwidth product */ u_long rmx_sendpipe; /* outbound delay-bandwidth product */ u_long rmx_ssthresh; /* outbound gateway buffer limit */ u_long rmx_rtt; /* estimated round trip time */ u_long rmx_rttvar; /* estimated rtt variance */ u_long rmx_pksent; /* packets sent using this route */ u_long rmx_weight; /* route weight */ u_long rmx_filler[3]; /* will be used for T/TCP later */ }; /* * rmx_rtt and rmx_rttvar are stored as microseconds; * RTTTOPRHZ(rtt) converts to a value suitable for use * by a protocol slowtimo counter. */ #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ #define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ)) #define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */ #define RT_ALL_FIBS -1 /* Announce event for every fib */ #ifdef _KERNEL extern u_int rt_numfibs; /* number of usable routing tables */ VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */ #define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs) #endif /* * We distinguish between routes to hosts and routes to networks, * preferring the former if available. For each route we infer * the interface to use from the gateway address supplied when * the route was entered. Routes that forward packets through * gateways are marked so that the output routines know to address the * gateway rather than the ultimate destination. */ #ifndef RNF_NORMAL #include #ifdef RADIX_MPATH #include #endif #endif struct rib_head; struct rtentry; struct nhop_prepend; /* * Structure used to pass prepend information * to if_output() routines. */ struct nhop_info { struct nhop_prepend *ni_nh; /* MUST be non-NULL */ uint32_t ni_flags; uint8_t ni_family; uint8_t spare[3]; }; #define RT_NHOP 0x01 #define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */ #if !defined(_KERNEL) || defined(_WANT_RTENTRY) /* This structure is kept for compatibility reasons only */ struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ /* * XXX struct rtentry must begin with a struct radix_node (or two!) * because the code does some casts of a 'struct radix_node *' * to a 'struct rtentry *' */ #define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key))) #define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask))) struct sockaddr *rt_gateway; /* value */ struct ifnet *rt_ifp; /* the answer: interface to use */ struct ifaddr *rt_ifa; /* the answer: interface address to use */ int rt_flags; /* up/down?, host/net */ int rt_refcnt; /* # held references */ u_int rt_fibnum; /* which FIB */ u_long rt_mtu; /* MTU for this path */ u_long rt_weight; /* absolute weight */ u_long rt_expire; /* lifetime for route, e.g. redirect */ #define rt_endzero rt_mtx struct mtx rt_mtx; /* mutex for routing entry */ }; #endif /* !_KERNEL || _WANT_RTENTRY */ #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ #define RTF_HOST 0x4 /* host entry (net otherwise) */ #define RTF_REJECT 0x8 /* host or net unreachable */ #define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */ #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ #define RTF_DONE 0x40 /* message confirmed */ /* 0x80 unused, was RTF_DELCLONE */ /* 0x100 unused, was RTF_CLONING */ #define RTF_XRESOLVE 0x200 /* external daemon resolves name */ #define RTF_LLINFO 0x400 /* DEPRECATED - exists ONLY for backward compatibility */ #define RTF_LLDATA 0x400 /* used by apps to add/del L2 entries */ #define RTF_STATIC 0x800 /* manually added */ #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ #define RTF_PROTO2 0x4000 /* protocol specific routing flag */ #define RTF_PROTO1 0x8000 /* protocol specific routing flag */ /* 0x10000 unused, was RTF_PRCLONING */ /* 0x20000 unused, was RTF_WASCLONED */ #define RTF_PROTO3 0x40000 /* protocol specific routing flag */ /* 0x80000 unused */ #define RTF_PINNED 0x100000 /* route is immutable */ #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ /* 0x8000000 and up unassigned */ #define RTF_STICKY 0x10000000 /* always route dst->src */ #define RTF_RNH_LOCKED 0x40000000 /* radix node head is locked */ #define RTF_GWFLAG_COMPAT 0x80000000 /* a compatibility bit for interacting with existing routing apps */ /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */ #define RTF_FMASK \ (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \ RTF_REJECT | RTF_STATIC | RTF_STICKY) /* * Routing statistics. */ struct rtstat { short rts_badredirect; /* bogus redirect calls */ short rts_dynamic; /* routes created by redirects */ short rts_newgateway; /* routes modified by redirects */ short rts_unreach; /* lookups which failed */ short rts_wildcard; /* lookups satisfied by a wildcard */ }; /* * Structures for routing messages. */ struct rt_msghdr { u_short rtm_msglen; /* to skip over non-understood messages */ u_char rtm_version; /* future binary compatibility */ u_char rtm_type; /* message type */ u_short rtm_index; /* index for associated ifp */ int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ int rtm_addrs; /* bitmask identifying sockaddrs in msg */ pid_t rtm_pid; /* identify sender */ int rtm_seq; /* for sender to identify action */ int rtm_errno; /* why failed */ int rtm_fmask; /* bitmask used in RTM_CHANGE message */ u_long rtm_inits; /* which metrics we are initializing */ struct rt_metrics rtm_rmx; /* metrics themselves */ }; #define RTM_VERSION 5 /* Up the ante and ignore older versions */ /* * Message types. */ #define RTM_ADD 0x1 /* Add Route */ #define RTM_DELETE 0x2 /* Delete Route */ #define RTM_CHANGE 0x3 /* Change Metrics or flags */ #define RTM_GET 0x4 /* Report Metrics */ #define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ #define RTM_REDIRECT 0x6 /* Told to use different route */ #define RTM_MISS 0x7 /* Lookup failed on this address */ #define RTM_LOCK 0x8 /* fix specified metrics */ /* 0x9 */ /* 0xa */ #define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */ #define RTM_NEWADDR 0xc /* address being added to iface */ #define RTM_DELADDR 0xd /* address being removed from iface */ #define RTM_IFINFO 0xe /* iface going up/down etc. */ #define RTM_NEWMADDR 0xf /* mcast group membership being added to if */ #define RTM_DELMADDR 0x10 /* mcast group membership being deleted */ #define RTM_IFANNOUNCE 0x11 /* iface arrival/departure */ #define RTM_IEEE80211 0x12 /* IEEE80211 wireless event */ /* * Bitmask values for rtm_inits and rmx_locks. */ #define RTV_MTU 0x1 /* init or lock _mtu */ #define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ #define RTV_EXPIRE 0x4 /* init or lock _expire */ #define RTV_RPIPE 0x8 /* init or lock _recvpipe */ #define RTV_SPIPE 0x10 /* init or lock _sendpipe */ #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ #define RTV_WEIGHT 0x100 /* init or lock _weight */ /* * Bitmask values for rtm_addrs. */ #define RTA_DST 0x1 /* destination sockaddr present */ #define RTA_GATEWAY 0x2 /* gateway sockaddr present */ #define RTA_NETMASK 0x4 /* netmask sockaddr present */ #define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ #define RTA_IFP 0x10 /* interface name sockaddr present */ #define RTA_IFA 0x20 /* interface addr sockaddr present */ #define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ #define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ /* * Index offsets for sockaddr array for alternate internal encoding. */ #define RTAX_DST 0 /* destination sockaddr present */ #define RTAX_GATEWAY 1 /* gateway sockaddr present */ #define RTAX_NETMASK 2 /* netmask sockaddr present */ #define RTAX_GENMASK 3 /* cloning mask sockaddr present */ #define RTAX_IFP 4 /* interface name sockaddr present */ #define RTAX_IFA 5 /* interface addr sockaddr present */ #define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ #define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ #define RTAX_MAX 8 /* size of array to allocate */ struct rt_addrinfo { int rti_addrs; struct sockaddr *rti_info[RTAX_MAX]; int rti_flags; struct ifaddr *rti_ifa; struct ifnet *rti_ifp; u_long rti_mflags; struct rt_metrics *rti_rmx; }; /* * This macro returns the size of a struct sockaddr when passed * through a routing socket. Basically we round up sa_len to * a multiple of sizeof(long), with a minimum of sizeof(long). * The check for a NULL pointer is just a convenience, probably never used. * The case sa_len == 0 should only apply to empty structures. */ #define SA_SIZE(sa) \ ( (!(sa) || ((struct sockaddr *)(sa))->sa_len == 0) ? \ sizeof(long) : \ 1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) ) #define sa_equal(a, b) ( \ (((struct sockaddr *)(a))->sa_len == ((struct sockaddr *)(b))->sa_len) && \ (bcmp((a), (b), ((struct sockaddr *)(b))->sa_len) == 0)) #ifdef _KERNEL #define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \ || (ifp)->if_link_state == LINK_STATE_UP) struct rib_head *rt_tables_get_rnh(int, int); struct ifmultiaddr; void rt_ieee80211msg(struct ifnet *, int, void *, size_t); void rt_ifannouncemsg(struct ifnet *, int); void rt_ifmsg(struct ifnet *); void rt_missmsg(int, struct rt_addrinfo *, int, int); void rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int); void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *); void rt_newaddrmsg_fib(int, struct ifaddr *, int, struct rtentry *, int); int rt_addrmsg(int, struct ifaddr *, int); int rt_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int); void rt_newmaddrmsg(int, struct ifmultiaddr *); int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *); void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); int rtsock_addrmsg(int, struct ifaddr *, int); int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int); /* * Note the following locking behavior: * * rtalloc1() returns a locked rtentry * * rtfree() and RTFREE_LOCKED() require a locked rtentry * * RTFREE() uses an unlocked entry. */ int rt_expunge(struct rib_head *, struct rtentry *); void rtfree(struct rtentry *); int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *); typedef int rt_walktree_f_t(struct rtentry *, void *); typedef void rt_setwarg_t(struct rib_head *, uint32_t, int, void *); void rt_foreach_fib(int af, rt_setwarg_t *, rt_walktree_f_t *, void *); void rt_flushifroutes(struct ifnet *ifp); /* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */ /* Thes are used by old code not yet converted to use multiple FIBS */ int rt_getifa(struct rt_addrinfo *); void rtalloc_ign(struct route *ro, u_long ignflags); struct rtentry *rtalloc1(struct sockaddr *, int, u_long); int rtinit(struct ifaddr *, int, int); int rtioctl(u_long, caddr_t); void rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *); int rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **); /* XXX MRT NEW VERSIONS THAT USE FIBs * For now the protocol indepedent versions are the same as the AF_INET ones * but this will change.. */ int rt_getifa_fib(struct rt_addrinfo *, u_int fibnum); struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int); int rtioctl_fib(u_long, caddr_t, u_int); void rtredirect_fib(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, u_int); int rtrequest_fib(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int); #include typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *); EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn); #endif #endif Index: projects/routing/sys/net/route_internal.h =================================================================== --- projects/routing/sys/net/route_internal.h (revision 274334) +++ projects/routing/sys/net/route_internal.h (revision 274335) @@ -1,138 +1,137 @@ /*- * Copyright (c) 2014 * Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NET_ROUTE_INTERNAL_H_ #define _NET_ROUTE_INTERNAL_H_ struct rib_head { struct radix_head head; rn_matchaddr_f_t *rnh_matchaddr; /* longest match for sockaddr */ rn_addaddr_f_t *rnh_addaddr; /* add based on sockaddr*/ rn_deladdr_f_t *rnh_deladdr; /* remove based on sockaddr */ rn_lookup_f_t *rnh_lookup; /* exact match for sockaddr */ rn_walktree_t *rnh_walktree; /* traverse tree */ rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */ rn_close_t *rnh_close; /*do something when the last ref drops*/ struct radix_node rnh_nodes[3]; /* empty tree for common case */ struct rwlock rib_lock; /* locks entire radix tree */ struct radix_mask_head rmhead; /* masks radix head */ }; #define RIB_RLOCK(rh) rw_rlock(&(rh)->rib_lock) #define RIB_RUNLOCK(rh) rw_runlock(&(rh)->rib_lock) #define RIB_WLOCK(rh) rw_wlock(&(rh)->rib_lock) #define RIB_WUNLOCK(rh) rw_wunlock(&(rh)->rib_lock) #define RIB_LOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_LOCKED) #define RIB_WLOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_WLOCKED) struct rib_head *rt_table_init(int offset); void rt_table_destroy(struct rib_head *rh); struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ /* * XXX struct rtentry must begin with a struct radix_node (or two!) * because the code does some casts of a 'struct radix_node *' * to a 'struct rtentry *' */ #define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key))) #define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask))) struct sockaddr *rt_gateway; /* value */ struct ifnet *rt_ifp; /* the answer: interface to use */ struct ifaddr *rt_ifa; /* the answer: interface address to use */ int rt_flags; /* up/down?, host/net */ int rt_refcnt; /* # held references */ u_int rt_fibnum; /* which FIB */ u_long rt_mtu; /* MTU for this path */ u_long rt_weight; /* absolute weight */ u_long rt_expire; /* lifetime for route, e.g. redirect */ -#define rt_endzero rt_pksent - counter_u64_t rt_pksent; /* packets sent using this route */ +#define rt_endzero rt_mtx struct mtx rt_mtx; /* mutex for routing entry */ }; #define RT_LOCK_INIT(_rt) \ mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK) #define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx) #define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx) #define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx) #define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED) #define RT_UNLOCK_COND(_rt) do { \ if (mtx_owned(&(_rt)->rt_mtx)) \ mtx_unlock(&(_rt)->rt_mtx); \ } while (0) #define RT_ADDREF(_rt) do { \ RT_LOCK_ASSERT(_rt); \ KASSERT((_rt)->rt_refcnt >= 0, \ ("negative refcnt %d", (_rt)->rt_refcnt)); \ (_rt)->rt_refcnt++; \ } while (0) #define RT_REMREF(_rt) do { \ RT_LOCK_ASSERT(_rt); \ KASSERT((_rt)->rt_refcnt > 0, \ ("bogus refcnt %d", (_rt)->rt_refcnt)); \ (_rt)->rt_refcnt--; \ } while (0) #define RTFREE_LOCKED(_rt) do { \ if ((_rt)->rt_refcnt <= 1) \ rtfree(_rt); \ else { \ RT_REMREF(_rt); \ RT_UNLOCK(_rt); \ } \ /* guard against invalid refs */ \ _rt = 0; \ } while (0) #define RTFREE(_rt) do { \ RT_LOCK(_rt); \ RTFREE_LOCKED(_rt); \ } while (0) #define RO_RTFREE(_ro) do { \ if ((_ro)->ro_rt) { \ if ((_ro)->ro_flags & RT_NORTREF) { \ (_ro)->ro_flags &= ~RT_NORTREF; \ (_ro)->ro_rt = NULL; \ } else { \ RT_LOCK((_ro)->ro_rt); \ RTFREE_LOCKED((_ro)->ro_rt); \ } \ } \ } while (0) #endif Index: projects/routing/sys/net/rtsock.c =================================================================== --- projects/routing/sys/net/rtsock.c (revision 274334) +++ projects/routing/sys/net/rtsock.c (revision 274335) @@ -1,1922 +1,1921 @@ /*- * Copyright (c) 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_mpath.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif #ifdef COMPAT_FREEBSD32 #include #include struct if_msghdr32 { uint16_t ifm_msglen; uint8_t ifm_version; uint8_t ifm_type; int32_t ifm_addrs; int32_t ifm_flags; uint16_t ifm_index; struct if_data ifm_data; }; struct if_msghdrl32 { uint16_t ifm_msglen; uint8_t ifm_version; uint8_t ifm_type; int32_t ifm_addrs; int32_t ifm_flags; uint16_t ifm_index; uint16_t _ifm_spare1; uint16_t ifm_len; uint16_t ifm_data_off; struct if_data ifm_data; }; struct ifa_msghdrl32 { uint16_t ifam_msglen; uint8_t ifam_version; uint8_t ifam_type; int32_t ifam_addrs; int32_t ifam_flags; uint16_t ifam_index; uint16_t _ifam_spare1; uint16_t ifam_len; uint16_t ifam_data_off; int32_t ifam_metric; struct if_data ifam_data; }; #endif /* COMPAT_FREEBSD32 */ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); /* NB: these are not modified */ static struct sockaddr route_src = { 2, PF_ROUTE, }; static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; /* These are external hooks for CARP. */ int (*carp_get_vhid_p)(struct ifaddr *); /* * Used by rtsock/raw_input callback code to decide whether to filter the update * notification to a socket bound to a particular FIB. */ #define RTS_FILTER_FIB M_PROTO8 typedef struct { int ip_count; /* attached w/ AF_INET */ int ip6_count; /* attached w/ AF_INET6 */ int any_count; /* total attached */ } route_cb_t; static VNET_DEFINE(route_cb_t, route_cb); #define V_route_cb VNET(route_cb) struct mtx rtsock_mtx; MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx) #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, ""); struct walkarg { int w_tmemsize; int w_op, w_arg; caddr_t w_tmem; struct sysctl_req *w_req; }; static void rts_input(struct mbuf *m); static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo); static int rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen); static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo); static int sysctl_dumpentry(struct radix_node *rn, void *vw); static int sysctl_iflist(int af, struct walkarg *w); static int sysctl_ifmalist(int af, struct walkarg *w); static int route_output(struct mbuf *m, struct socket *so, ...); static void rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out); static void rt_dispatch(struct mbuf *, sa_family_t); static struct sockaddr *rtsock_fix_netmask(struct sockaddr *dst, struct sockaddr *smask, struct sockaddr_storage *dmask); static struct netisr_handler rtsock_nh = { .nh_name = "rtsock", .nh_handler = rts_input, .nh_proto = NETISR_ROUTE, .nh_policy = NETISR_POLICY_SOURCE, }; static int sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&rtsock_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&rtsock_nh, qlimit)); } SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_route_netisr_maxqlen, "I", "maximum routing socket dispatch queue length"); static void rts_init(void) { int tmp; if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp)) rtsock_nh.nh_qlimit = tmp; netisr_register(&rtsock_nh); } SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0); static int raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src, struct rawcb *rp) { int fibnum; KASSERT(m != NULL, ("%s: m is NULL", __func__)); KASSERT(proto != NULL, ("%s: proto is NULL", __func__)); KASSERT(rp != NULL, ("%s: rp is NULL", __func__)); /* No filtering requested. */ if ((m->m_flags & RTS_FILTER_FIB) == 0) return (0); /* Check if it is a rts and the fib matches the one of the socket. */ fibnum = M_GETFIB(m); if (proto->sp_family != PF_ROUTE || rp->rcb_socket == NULL || rp->rcb_socket->so_fibnum == fibnum) return (0); /* Filtering requested and no match, the socket shall be skipped. */ return (1); } static void rts_input(struct mbuf *m) { struct sockproto route_proto; unsigned short *family; struct m_tag *tag; route_proto.sp_family = PF_ROUTE; tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL); if (tag != NULL) { family = (unsigned short *)(tag + 1); route_proto.sp_protocol = *family; m_tag_delete(m, tag); } else route_proto.sp_protocol = 0; raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb); } /* * It really doesn't make any sense at all for this code to share much * with raw_usrreq.c, since its functionality is so restricted. XXX */ static void rts_abort(struct socket *so) { raw_usrreqs.pru_abort(so); } static void rts_close(struct socket *so) { raw_usrreqs.pru_close(so); } /* pru_accept is EOPNOTSUPP */ static int rts_attach(struct socket *so, int proto, struct thread *td) { struct rawcb *rp; int error; KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL")); /* XXX */ rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO); if (rp == NULL) return ENOBUFS; so->so_pcb = (caddr_t)rp; so->so_fibnum = td->td_proc->p_fibnum; error = raw_attach(so, proto); rp = sotorawcb(so); if (error) { so->so_pcb = NULL; free(rp, M_PCB); return error; } RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: V_route_cb.ip_count++; break; case AF_INET6: V_route_cb.ip6_count++; break; } V_route_cb.any_count++; RTSOCK_UNLOCK(); soisconnected(so); so->so_options |= SO_USELOOPBACK; return 0; } static int rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */ } static int rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */ } /* pru_connect2 is EOPNOTSUPP */ /* pru_control is EOPNOTSUPP */ static void rts_detach(struct socket *so) { struct rawcb *rp = sotorawcb(so); KASSERT(rp != NULL, ("rts_detach: rp == NULL")); RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: V_route_cb.ip_count--; break; case AF_INET6: V_route_cb.ip6_count--; break; } V_route_cb.any_count--; RTSOCK_UNLOCK(); raw_usrreqs.pru_detach(so); } static int rts_disconnect(struct socket *so) { return (raw_usrreqs.pru_disconnect(so)); } /* pru_listen is EOPNOTSUPP */ static int rts_peeraddr(struct socket *so, struct sockaddr **nam) { return (raw_usrreqs.pru_peeraddr(so, nam)); } /* pru_rcvd is EOPNOTSUPP */ /* pru_rcvoob is EOPNOTSUPP */ static int rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { return (raw_usrreqs.pru_send(so, flags, m, nam, control, td)); } /* pru_sense is null */ static int rts_shutdown(struct socket *so) { return (raw_usrreqs.pru_shutdown(so)); } static int rts_sockaddr(struct socket *so, struct sockaddr **nam) { return (raw_usrreqs.pru_sockaddr(so, nam)); } static struct pr_usrreqs route_usrreqs = { .pru_abort = rts_abort, .pru_attach = rts_attach, .pru_bind = rts_bind, .pru_connect = rts_connect, .pru_detach = rts_detach, .pru_disconnect = rts_disconnect, .pru_peeraddr = rts_peeraddr, .pru_send = rts_send, .pru_shutdown = rts_shutdown, .pru_sockaddr = rts_sockaddr, .pru_close = rts_close, }; #ifndef _SOCKADDR_UNION_DEFINED #define _SOCKADDR_UNION_DEFINED /* * The union of all possible address formats we handle. */ union sockaddr_union { struct sockaddr sa; struct sockaddr_in sin; struct sockaddr_in6 sin6; }; #endif /* _SOCKADDR_UNION_DEFINED */ static int rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred) { /* First, see if the returned address is part of the jail. */ if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) { info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; return (0); } switch (info->rti_info[RTAX_DST]->sa_family) { #ifdef INET case AF_INET: { struct in_addr ia; struct ifaddr *ifa; int found; found = 0; /* * Try to find an address on the given outgoing interface * that belongs to the jail. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa; sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; ia = ((struct sockaddr_in *)sa)->sin_addr; if (prison_check_ip4(cred, &ia) == 0) { found = 1; break; } } IF_ADDR_RUNLOCK(ifp); if (!found) { /* * As a last resort return the 'default' jail address. */ ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)-> sin_addr; if (prison_get_ip4(cred, &ia) != 0) return (ESRCH); } bzero(&saun->sin, sizeof(struct sockaddr_in)); saun->sin.sin_len = sizeof(struct sockaddr_in); saun->sin.sin_family = AF_INET; saun->sin.sin_addr.s_addr = ia.s_addr; info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin; break; } #endif #ifdef INET6 case AF_INET6: { struct in6_addr ia6; struct ifaddr *ifa; int found; found = 0; /* * Try to find an address on the given outgoing interface * that belongs to the jail. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa; sa = ifa->ifa_addr; if (sa->sa_family != AF_INET6) continue; bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr, &ia6, sizeof(struct in6_addr)); if (prison_check_ip6(cred, &ia6) == 0) { found = 1; break; } } IF_ADDR_RUNLOCK(ifp); if (!found) { /* * As a last resort return the 'default' jail address. */ ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)-> sin6_addr; if (prison_get_ip6(cred, &ia6) != 0) return (ESRCH); } bzero(&saun->sin6, sizeof(struct sockaddr_in6)); saun->sin6.sin6_len = sizeof(struct sockaddr_in6); saun->sin6.sin6_family = AF_INET6; bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr)); if (sa6_recoverscope(&saun->sin6) != 0) return (ESRCH); info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6; break; } #endif default: return (ESRCH); } return (0); } /*ARGSUSED*/ static int route_output(struct mbuf *m, struct socket *so, ...) { struct rt_msghdr *rtm = NULL; struct rtentry *rt = NULL; struct rib_head *rh; struct rt_addrinfo info; struct sockaddr_storage ss; #ifdef INET6 struct sockaddr_in6 *sin6; int i, rti_need_deembed = 0; #endif int alloc_len = 0, len, error = 0, fibnum; struct ifnet *ifp = NULL; union sockaddr_union saun; sa_family_t saf = AF_UNSPEC; struct rawcb *rp = NULL; struct walkarg w; fibnum = so->so_fibnum; #define senderr(e) { error = e; goto flush;} if (m == NULL || ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == NULL)) return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); len = m->m_pkthdr.len; if (len < sizeof(*rtm) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) senderr(EINVAL); /* * Most of current messages are in range 200-240 bytes, * minimize possible re-allocation on reply using larger size * buffer aligned on 1k boundaty. */ alloc_len = roundup2(len, 1024); if ((rtm = malloc(alloc_len, M_TEMP, M_NOWAIT)) == NULL) senderr(ENOBUFS); m_copydata(m, 0, len, (caddr_t)rtm); bzero(&info, sizeof(info)); bzero(&w, sizeof(w)); if (rtm->rtm_version != RTM_VERSION) { /* Do not touch message since format is unknown */ free(rtm, M_TEMP); rtm = NULL; senderr(EPROTONOSUPPORT); } /* * Starting from here, it is possible * to alter original message and insert * caller PID and error value. */ rtm->rtm_pid = curproc->p_pid; info.rti_addrs = rtm->rtm_addrs; info.rti_mflags = rtm->rtm_inits; info.rti_rmx = &rtm->rtm_rmx; /* * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6 * link-local address because rtrequest requires addresses with * embedded scope id. */ if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) senderr(EINVAL); info.rti_flags = rtm->rtm_flags; if (info.rti_info[RTAX_DST] == NULL || info.rti_info[RTAX_DST]->sa_family >= AF_MAX || (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) senderr(EINVAL); saf = info.rti_info[RTAX_DST]->sa_family; /* * Verify that the caller has the appropriate privilege; RTM_GET * is the only operation the non-superuser is allowed. */ if (rtm->rtm_type != RTM_GET) { error = priv_check(curthread, PRIV_NET_ROUTE); if (error) senderr(error); } /* * The given gateway address may be an interface address. * For example, issuing a "route change" command on a route * entry that was created from a tunnel, and the gateway * address given is the local end point. In this case the * RTF_GATEWAY flag must be cleared or the destination will * not be reachable even though there is no error message. */ if (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) { struct rtentry *rt; /* XXX-ME: Is this enough? */ struct sockaddr dst; bzero(&dst, sizeof(dst)); dst = *info.rti_info[RTAX_GATEWAY]; rt = rtalloc1_fib(&dst, 0, 0, fibnum); /* * A host route through the loopback interface is * installed for each interface adddress. In pre 8.0 * releases the interface address of a PPP link type * is not reachable locally. This behavior is fixed as * part of the new L2/L3 redesign and rewrite work. The * signature of this interface address route is the * AF_LINK sa_family type of the rt_gateway, and the * rt_ifp has the IFF_LOOPBACK flag set. */ if (rt != NULL && rt->rt_gateway->sa_family == AF_LINK && rt->rt_ifp->if_flags & IFF_LOOPBACK) { info.rti_flags &= ~RTF_GATEWAY; info.rti_flags |= RTF_GWFLAG_COMPAT; } if (rt != NULL) RTFREE_LOCKED(rt); } switch (rtm->rtm_type) { struct rtentry *saved_nrt; case RTM_ADD: case RTM_CHANGE: if (info.rti_info[RTAX_GATEWAY] == NULL) senderr(EINVAL); saved_nrt = NULL; /* support for new ARP code */ if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && (rtm->rtm_flags & RTF_LLDATA) != 0) { error = lla_rt_output(rtm, &info); #ifdef INET6 if (error == 0) rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; } error = rtrequest1_fib(rtm->rtm_type, &info, &saved_nrt, fibnum); if (error == 0 && saved_nrt != NULL) { #ifdef INET6 rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif RT_LOCK(saved_nrt); rtm->rtm_index = saved_nrt->rt_ifp->if_index; RT_REMREF(saved_nrt); RT_UNLOCK(saved_nrt); } break; case RTM_DELETE: saved_nrt = NULL; /* support for new ARP code */ if (info.rti_info[RTAX_GATEWAY] && (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) && (rtm->rtm_flags & RTF_LLDATA) != 0) { error = lla_rt_output(rtm, &info); #ifdef INET6 if (error == 0) rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; } error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, fibnum); if (error == 0) { RT_LOCK(saved_nrt); rt = saved_nrt; goto report; } #ifdef INET6 /* rt_msg2() will not be used when RTM_DELETE fails. */ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; case RTM_GET: rh = rt_tables_get_rnh(fibnum, saf); if (rh == NULL) senderr(EAFNOSUPPORT); RIB_RLOCK(rh); if (info.rti_info[RTAX_NETMASK] == NULL && rtm->rtm_type == RTM_GET) { /* * Provide logest prefix match for * address lookup (no mask). * 'route -n get addr' */ rt = (struct rtentry *) rh->rnh_matchaddr( info.rti_info[RTAX_DST], &rh->head); } else rt = (struct rtentry *) rh->rnh_lookup( info.rti_info[RTAX_DST], info.rti_info[RTAX_NETMASK], &rh->head); if (rt == NULL) { RIB_RUNLOCK(rh); senderr(ESRCH); } #ifdef RADIX_MPATH /* * for RTM_CHANGE/LOCK, if we got multipath routes, * we require users to specify a matching RTAX_GATEWAY. * * for RTM_GET, gate is optional even with multipath. * if gate == NULL the first match is returned. * (no need to call rt_mpath_matchgate if gate == NULL) */ if (rn_mpath_capable(rh) && (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) { rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]); if (!rt) { RIB_RUNLOCK(rh); senderr(ESRCH); } } #endif /* * If performing proxied L2 entry insertion, and * the actual PPP host entry is found, perform * another search to retrieve the prefix route of * the local end point of the PPP link. */ if (rtm->rtm_flags & RTF_ANNOUNCE) { struct sockaddr laddr; if (rt->rt_ifp != NULL && rt->rt_ifp->if_type == IFT_PROPVIRTUAL) { struct ifaddr *ifa; ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1, RT_ALL_FIBS); if (ifa != NULL) rt_maskedcopy(ifa->ifa_addr, &laddr, ifa->ifa_netmask); } else rt_maskedcopy(rt->rt_ifa->ifa_addr, &laddr, rt->rt_ifa->ifa_netmask); /* * refactor rt and no lock operation necessary */ rt = (struct rtentry *)rh->rnh_matchaddr(&laddr, &rh->head); if (rt == NULL) { RIB_RUNLOCK(rh); senderr(ESRCH); } } RT_LOCK(rt); RT_ADDREF(rt); RIB_RUNLOCK(rh); report: RT_LOCK_ASSERT(rt); if ((rt->rt_flags & RTF_HOST) == 0 ? jailed_without_vnet(curthread->td_ucred) : prison_if(curthread->td_ucred, rt_key(rt)) != 0) { RT_UNLOCK(rt); senderr(ESRCH); } info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt), rt_mask(rt), &ss); info.rti_info[RTAX_GENMASK] = 0; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { ifp = rt->rt_ifp; if (ifp) { info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; error = rtm_get_jailed(&info, ifp, rt, &saun, curthread->td_ucred); if (error != 0) { RT_UNLOCK(rt); senderr(error); } if (ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; rtm->rtm_index = ifp->if_index; } else { info.rti_info[RTAX_IFP] = NULL; info.rti_info[RTAX_IFA] = NULL; } } else if ((ifp = rt->rt_ifp) != NULL) { rtm->rtm_index = ifp->if_index; } /* Check if we need to realloc storage */ rtsock_msg_buffer(rtm->rtm_type, &info, NULL, &len); if (len > alloc_len) { struct rt_msghdr *new_rtm; new_rtm = malloc(len, M_TEMP, M_NOWAIT); if (new_rtm == NULL) { RT_UNLOCK(rt); senderr(ENOBUFS); } bcopy(rtm, new_rtm, rtm->rtm_msglen); free(rtm, M_TEMP); rtm = new_rtm; alloc_len = len; } w.w_tmem = (caddr_t)rtm; w.w_tmemsize = alloc_len; rtsock_msg_buffer(rtm->rtm_type, &info, &w, &len); if (rt->rt_flags & RTF_GWFLAG_COMPAT) rtm->rtm_flags = RTF_GATEWAY | (rt->rt_flags & ~RTF_GWFLAG_COMPAT); else rtm->rtm_flags = rt->rt_flags; rt_getmetrics(rt, &rtm->rtm_rmx); rtm->rtm_addrs = info.rti_addrs; RT_UNLOCK(rt); break; default: senderr(EOPNOTSUPP); } flush: if (rt != NULL) RTFREE(rt); /* * Check to see if we don't want our own messages. */ if ((so->so_options & SO_USELOOPBACK) == 0) { if (V_route_cb.any_count <= 1) { if (rtm != NULL) free(rtm, M_TEMP); m_freem(m); return (error); } /* There is another listener, so construct message */ rp = sotorawcb(so); } if (rtm != NULL) { #ifdef INET6 if (rti_need_deembed) { /* sin6_scope_id is recovered before sending rtm. */ sin6 = (struct sockaddr_in6 *)&ss; for (i = 0; i < RTAX_MAX; i++) { if (info.rti_info[i] == NULL) continue; if (info.rti_info[i]->sa_family != AF_INET6) continue; bcopy(info.rti_info[i], sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) bcopy(sin6, info.rti_info[i], sizeof(*sin6)); } } #endif if (error != 0) rtm->rtm_errno = error; else rtm->rtm_flags |= RTF_DONE; m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); if (m->m_pkthdr.len < rtm->rtm_msglen) { m_freem(m); m = NULL; } else if (m->m_pkthdr.len > rtm->rtm_msglen) m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); free(rtm, M_TEMP); } if (m != NULL) { M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; if (rp) { /* * XXX insure we don't get a copy by * invalidating our protocol */ unsigned short family = rp->rcb_proto.sp_family; rp->rcb_proto.sp_family = 0; rt_dispatch(m, saf); rp->rcb_proto.sp_family = family; } else rt_dispatch(m, saf); } return (error); } static void rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out) { bzero(out, sizeof(*out)); out->rmx_mtu = rt->rt_mtu; out->rmx_weight = rt->rt_weight; - out->rmx_pksent = counter_u64_fetch(rt->rt_pksent); /* Kernel -> userland timebase conversion. */ out->rmx_expire = rt->rt_expire ? rt->rt_expire - time_uptime + time_second : 0; } /* * Extract the addresses of the passed sockaddrs. * Do a little sanity checking so as to avoid bad memory references. * This data is derived straight from userland. */ static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) { struct sockaddr *sa; int i; for (i = 0; i < RTAX_MAX && cp < cplim; i++) { if ((rtinfo->rti_addrs & (1 << i)) == 0) continue; sa = (struct sockaddr *)cp; /* * It won't fit. */ if (cp + sa->sa_len > cplim) return (EINVAL); /* * there are no more.. quit now * If there are more bits, they are in error. * I've seen this. route(1) can evidently generate these. * This causes kernel to core dump. * for compatibility, If we see this, point to a safe address. */ if (sa->sa_len == 0) { rtinfo->rti_info[i] = &sa_zero; return (0); /* should be EINVAL but for compat */ } /* accept it */ #ifdef INET6 if (sa->sa_family == AF_INET6) sa6_embedscope((struct sockaddr_in6 *)sa, V_ip6_use_defzone); #endif rtinfo->rti_info[i] = sa; cp += SA_SIZE(sa); } return (0); } /* * Fill in @dmask with valid netmask leaving original @smask * intact. Mostly used with radix netmasks. */ static struct sockaddr * rtsock_fix_netmask(struct sockaddr *dst, struct sockaddr *smask, struct sockaddr_storage *dmask) { if (dst == NULL || smask == NULL) return (NULL); memset(dmask, 0, dst->sa_len); memcpy(dmask, smask, smask->sa_len); dmask->ss_len = dst->sa_len; dmask->ss_family = dst->sa_family; return ((struct sockaddr *)dmask); } /* * Writes information related to @rtinfo object to newly-allocated mbuf. * Assumes MCLBYTES is enough to construct any message. * Used for OS notifications of vaious events (if/ifa announces,etc) * * Returns allocated mbuf or NULL on failure. */ static struct mbuf * rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo) { struct rt_msghdr *rtm; struct mbuf *m; int i; struct sockaddr *sa; #ifdef INET6 struct sockaddr_storage ss; struct sockaddr_in6 *sin6; #endif int len, dlen; switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_DELMADDR: case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; case RTM_IFANNOUNCE: case RTM_IEEE80211: len = sizeof(struct if_announcemsghdr); break; default: len = sizeof(struct rt_msghdr); } /* XXXGL: can we use MJUMPAGESIZE cluster here? */ KASSERT(len <= MCLBYTES, ("%s: message too big", __func__)); if (len > MHLEN) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (m); m->m_pkthdr.len = m->m_len = len; rtm = mtod(m, struct rt_msghdr *); bzero((caddr_t)rtm, len); for (i = 0; i < RTAX_MAX; i++) { if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = SA_SIZE(sa); #ifdef INET6 if (V_deembed_scopeid && sa->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)&ss; bcopy(sa, sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) sa = (struct sockaddr *)sin6; } #endif m_copyback(m, len, dlen, (caddr_t)sa); len += dlen; } if (m->m_pkthdr.len != len) { m_freem(m); return (NULL); } rtm->rtm_msglen = len; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; return (m); } /* * Writes information related to @rtinfo object to preallocated buffer. * Stores needed size in @plen. If @w is NULL, calculates size without * writing. * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation. * * Returns 0 on success. * */ static int rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen) { int i; int len, buflen = 0, dlen; caddr_t cp = NULL; struct rt_msghdr *rtm = NULL; #ifdef INET6 struct sockaddr_storage ss; struct sockaddr_in6 *sin6; #endif switch (type) { case RTM_DELADDR: case RTM_NEWADDR: if (w != NULL && w->w_op == NET_RT_IFLISTL) { #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) len = sizeof(struct ifa_msghdrl32); else #endif len = sizeof(struct ifa_msghdrl); } else len = sizeof(struct ifa_msghdr); break; case RTM_IFINFO: #ifdef COMPAT_FREEBSD32 if (w != NULL && w->w_req->flags & SCTL_MASK32) { if (w->w_op == NET_RT_IFLISTL) len = sizeof(struct if_msghdrl32); else len = sizeof(struct if_msghdr32); break; } #endif if (w != NULL && w->w_op == NET_RT_IFLISTL) len = sizeof(struct if_msghdrl); else len = sizeof(struct if_msghdr); break; case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; default: len = sizeof(struct rt_msghdr); } if (w != NULL) { rtm = (struct rt_msghdr *)w->w_tmem; buflen = w->w_tmemsize - len; cp = (caddr_t)w->w_tmem + len; } rtinfo->rti_addrs = 0; for (i = 0; i < RTAX_MAX; i++) { struct sockaddr *sa; if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = SA_SIZE(sa); if (cp != NULL && buflen >= dlen) { #ifdef INET6 if (V_deembed_scopeid && sa->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)&ss; bcopy(sa, sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) sa = (struct sockaddr *)sin6; } #endif bcopy((caddr_t)sa, cp, (unsigned)dlen); cp += dlen; buflen -= dlen; } else if (cp != NULL) { /* * Buffer too small. Count needed size * and return with error. */ cp = NULL; } len += dlen; } if (cp != NULL) { dlen = ALIGN(len) - len; if (buflen < dlen) cp = NULL; else buflen -= dlen; } len = ALIGN(len); if (cp != NULL) { /* fill header iff buffer is large enough */ rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; rtm->rtm_msglen = len; } *plen = len; if (w != NULL && cp == NULL) return (ENOBUFS); return (0); } /* * This routine is called to generate a message from the routing * socket indicating that a redirect has occured, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. */ void rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error, int fibnum) { struct rt_msghdr *rtm; struct mbuf *m; struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; if (V_route_cb.any_count == 0) return; m = rtsock_msg_mbuf(type, rtinfo); if (m == NULL) return; if (fibnum != RT_ALL_FIBS) { KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out " "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs)); M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; rtm->rtm_errno = error; rtm->rtm_addrs = rtinfo->rti_addrs; rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); } void rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) { rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS); } /* * This routine is called to generate a message from the routing * socket indicating that the status of a network interface has changed. */ void rt_ifmsg(struct ifnet *ifp) { struct if_msghdr *ifm; struct mbuf *m; struct rt_addrinfo info; if (V_route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); m = rtsock_msg_mbuf(RTM_IFINFO, &info); if (m == NULL) return; ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; if_data_copy(ifp, &ifm->ifm_data); ifm->ifm_addrs = 0; rt_dispatch(m, AF_UNSPEC); } /* * Announce interface address arrival/withdraw. * Please do not call directly, use rt_addrmsg(). * Assume input data to be valid. * Returns 0 on success. */ int rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) { struct rt_addrinfo info; struct sockaddr *sa; int ncmd; struct mbuf *m; struct ifa_msghdr *ifam; struct ifnet *ifp = ifa->ifa_ifp; struct sockaddr_storage ss; if (V_route_cb.any_count == 0) return (0); ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr; info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask( info.rti_info[RTAX_IFP], ifa->ifa_netmask, &ss); info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL) return (ENOBUFS); ifam = mtod(m, struct ifa_msghdr *); ifam->ifam_index = ifp->if_index; ifam->ifam_metric = ifa->ifa_ifp->if_metric; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_addrs = info.rti_addrs; if (fibnum != RT_ALL_FIBS) { M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); return (0); } /* * Announce route addition/removal. * Please do not call directly, use rt_routemsg(). * Note that @rt data MAY be inconsistent/invalid: * if some userland app sends us "invalid" route message (invalid mask, * no dst, wrong address families, etc...) we need to pass it back * to app (and any other rtsock consumers) with rtm_errno field set to * non-zero value. * * Returns 0 on success. */ int rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt, int fibnum) { struct rt_addrinfo info; struct sockaddr *sa; struct mbuf *m; struct rt_msghdr *rtm; struct sockaddr_storage ss; if (V_route_cb.any_count == 0) return (0); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = sa = rt_key(rt); info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(sa, rt_mask(rt), &ss); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; if ((m = rtsock_msg_mbuf(cmd, &info)) == NULL) return (ENOBUFS); rtm = mtod(m, struct rt_msghdr *); rtm->rtm_index = ifp->if_index; rtm->rtm_flags |= rt->rt_flags; rtm->rtm_errno = error; rtm->rtm_addrs = info.rti_addrs; if (fibnum != RT_ALL_FIBS) { M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); return (0); } /* * This is the analogue to the rt_newaddrmsg which performs the same * function but for multicast group memberhips. This is easier since * there is no route state to worry about. */ void rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) { struct rt_addrinfo info; struct mbuf *m = NULL; struct ifnet *ifp = ifma->ifma_ifp; struct ifma_msghdr *ifmam; if (V_route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_IFA] = ifma->ifma_addr; info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL; /* * If a link-layer address is present, present it as a ``gateway'' * (similarly to how ARP entries, e.g., are presented). */ info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr; m = rtsock_msg_mbuf(cmd, &info); if (m == NULL) return; ifmam = mtod(m, struct ifma_msghdr *); KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n", __func__)); ifmam->ifmam_index = ifp->if_index; ifmam->ifmam_addrs = info.rti_addrs; rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC); } static struct mbuf * rt_makeifannouncemsg(struct ifnet *ifp, int type, int what, struct rt_addrinfo *info) { struct if_announcemsghdr *ifan; struct mbuf *m; if (V_route_cb.any_count == 0) return NULL; bzero((caddr_t)info, sizeof(*info)); m = rtsock_msg_mbuf(type, info); if (m != NULL) { ifan = mtod(m, struct if_announcemsghdr *); ifan->ifan_index = ifp->if_index; strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); ifan->ifan_what = what; } return m; } /* * This is called to generate routing socket messages indicating * IEEE80211 wireless events. * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way. */ void rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info); if (m != NULL) { /* * Append the ieee80211 data. Try to stick it in the * mbuf containing the ifannounce msg; otherwise allocate * a new mbuf and append. * * NB: we assume m is a single mbuf. */ if (data_len > M_TRAILINGSPACE(m)) { struct mbuf *n = m_get(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; } bcopy(data, mtod(n, void *), data_len); n->m_len = data_len; m->m_next = n; } else if (data_len > 0) { bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len); m->m_len += data_len; } if (m->m_flags & M_PKTHDR) m->m_pkthdr.len += data_len; mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len; rt_dispatch(m, AF_UNSPEC); } } /* * This is called to generate routing socket messages indicating * network interface arrival and departure. */ void rt_ifannouncemsg(struct ifnet *ifp, int what) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info); if (m != NULL) rt_dispatch(m, AF_UNSPEC); } static void rt_dispatch(struct mbuf *m, sa_family_t saf) { struct m_tag *tag; /* * Preserve the family from the sockaddr, if any, in an m_tag for * use when injecting the mbuf into the routing socket buffer from * the netisr. */ if (saf != AF_UNSPEC) { tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short), M_NOWAIT); if (tag == NULL) { m_freem(m); return; } *(unsigned short *)(tag + 1) = saf; m_tag_prepend(m, tag); } #ifdef VIMAGE if (V_loif) m->m_pkthdr.rcvif = V_loif; else { m_freem(m); return; } #endif netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */ } /* * This is used in dumping the kernel table via sysctl(). */ static int sysctl_dumpentry(struct radix_node *rn, void *vw) { struct walkarg *w = vw; struct rtentry *rt = (struct rtentry *)rn; int error = 0, size; struct rt_addrinfo info; struct sockaddr_storage ss; if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) return 0; if ((rt->rt_flags & RTF_HOST) == 0 ? jailed_without_vnet(w->w_req->td->td_ucred) : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0) return (0); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt), rt_mask(rt), &ss); info.rti_info[RTAX_GENMASK] = 0; if (rt->rt_ifp) { info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr; info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; } if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0) return (error); if (w->w_req && w->w_tmem) { struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; if (rt->rt_flags & RTF_GWFLAG_COMPAT) rtm->rtm_flags = RTF_GATEWAY | (rt->rt_flags & ~RTF_GWFLAG_COMPAT); else rtm->rtm_flags = rt->rt_flags; rt_getmetrics(rt, &rtm->rtm_rmx); rtm->rtm_index = rt->rt_ifp->if_index; rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); return (error); } return (error); } static int sysctl_iflist_ifml(struct ifnet *ifp, struct rt_addrinfo *info, struct walkarg *w, int len) { struct if_msghdrl *ifm; struct if_data *ifd; ifm = (struct if_msghdrl *)w->w_tmem; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct if_msghdrl32 *ifm32; ifm32 = (struct if_msghdrl32 *)ifm; ifm32->ifm_addrs = info->rti_addrs; ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm32->ifm_index = ifp->if_index; ifm32->_ifm_spare1 = 0; ifm32->ifm_len = sizeof(*ifm32); ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data); ifd = &ifm32->ifm_data; } else #endif { ifm->ifm_addrs = info->rti_addrs; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_index = ifp->if_index; ifm->_ifm_spare1 = 0; ifm->ifm_len = sizeof(*ifm); ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data); ifd = &ifm->ifm_data; } if_data_copy(ifp, ifd); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len)); } static int sysctl_iflist_ifm(struct ifnet *ifp, struct rt_addrinfo *info, struct walkarg *w, int len) { struct if_msghdr *ifm; struct if_data *ifd; ifm = (struct if_msghdr *)w->w_tmem; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct if_msghdr32 *ifm32; ifm32 = (struct if_msghdr32 *)ifm; ifm32->ifm_addrs = info->rti_addrs; ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm32->ifm_index = ifp->if_index; ifd = &ifm32->ifm_data; } else #endif { ifm->ifm_addrs = info->rti_addrs; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_index = ifp->if_index; ifd = &ifm->ifm_data; } if_data_copy(ifp, ifd); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len)); } static int sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info, struct walkarg *w, int len) { struct ifa_msghdrl *ifam; struct if_data *ifd; ifam = (struct ifa_msghdrl *)w->w_tmem; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct ifa_msghdrl32 *ifam32; ifam32 = (struct ifa_msghdrl32 *)ifam; ifam32->ifam_addrs = info->rti_addrs; ifam32->ifam_flags = ifa->ifa_flags; ifam32->ifam_index = ifa->ifa_ifp->if_index; ifam32->_ifam_spare1 = 0; ifam32->ifam_len = sizeof(*ifam32); ifam32->ifam_data_off = offsetof(struct ifa_msghdrl32, ifam_data); ifam32->ifam_metric = ifa->ifa_ifp->if_metric; ifd = &ifam32->ifam_data; } else #endif { ifam->ifam_addrs = info->rti_addrs; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->_ifam_spare1 = 0; ifam->ifam_len = sizeof(*ifam); ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data); ifam->ifam_metric = ifa->ifa_ifp->if_metric; ifd = &ifam->ifam_data; } bzero(ifd, sizeof(*ifd)); ifd->ifi_datalen = sizeof(struct if_data); ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets); ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets); ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes); ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes); /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) ifd->ifi_vhid = (*carp_get_vhid_p)(ifa); return (SYSCTL_OUT(w->w_req, w->w_tmem, len)); } static int sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info, struct walkarg *w, int len) { struct ifa_msghdr *ifam; ifam = (struct ifa_msghdr *)w->w_tmem; ifam->ifam_addrs = info->rti_addrs; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->ifam_metric = ifa->ifa_ifp->if_metric; return (SYSCTL_OUT(w->w_req, w->w_tmem, len)); } static int sysctl_iflist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifaddr *ifa; struct rt_addrinfo info; int len, error = 0; struct sockaddr_storage ss; bzero((caddr_t)&info, sizeof(info)); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; IF_ADDR_RLOCK(ifp); ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa->ifa_addr; error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len); if (error != 0) goto done; info.rti_info[RTAX_IFP] = NULL; if (w->w_req && w->w_tmem) { if (w->w_op == NET_RT_IFLISTL) error = sysctl_iflist_ifml(ifp, &info, w, len); else error = sysctl_iflist_ifm(ifp, &info, w, len); if (error) goto done; } while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) { if (af && af != ifa->ifa_addr->sa_family) continue; if (prison_if(w->w_req->td->td_ucred, ifa->ifa_addr) != 0) continue; info.rti_info[RTAX_IFA] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask( ifa->ifa_addr, ifa->ifa_netmask, &ss); info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len); if (error != 0) goto done; if (w->w_req && w->w_tmem) { if (w->w_op == NET_RT_IFLISTL) error = sysctl_iflist_ifaml(ifa, &info, w, len); else error = sysctl_iflist_ifam(ifa, &info, w, len); if (error) goto done; } } IF_ADDR_RUNLOCK(ifp); info.rti_info[RTAX_IFA] = NULL; info.rti_info[RTAX_NETMASK] = NULL; info.rti_info[RTAX_BRD] = NULL; } done: if (ifp != NULL) IF_ADDR_RUNLOCK(ifp); IFNET_RUNLOCK_NOSLEEP(); return (error); } static int sysctl_ifmalist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifmultiaddr *ifma; struct rt_addrinfo info; int len, error = 0; struct ifaddr *ifa; bzero((caddr_t)&info, sizeof(info)); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (af && af != ifma->ifma_addr->sa_family) continue; if (prison_if(w->w_req->td->td_ucred, ifma->ifma_addr) != 0) continue; info.rti_info[RTAX_IFA] = ifma->ifma_addr; info.rti_info[RTAX_GATEWAY] = (ifma->ifma_addr->sa_family != AF_LINK) ? ifma->ifma_lladdr : NULL; error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len); if (error != 0) goto done; if (w->w_req && w->w_tmem) { struct ifma_msghdr *ifmam; ifmam = (struct ifma_msghdr *)w->w_tmem; ifmam->ifmam_index = ifma->ifma_ifp->if_index; ifmam->ifmam_flags = 0; ifmam->ifmam_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, w->w_tmem, len); if (error) { IF_ADDR_RUNLOCK(ifp); goto done; } } } IF_ADDR_RUNLOCK(ifp); } done: IFNET_RUNLOCK_NOSLEEP(); return (error); } static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct rib_head *rh = NULL; /* silence compiler. */ int i, lim, error = EINVAL; int fib = 0; u_char af; struct walkarg w; name ++; namelen--; if (req->newptr) return (EPERM); if (name[1] == NET_RT_DUMP) { if (namelen == 3) fib = req->td->td_proc->p_fibnum; else if (namelen == 4) fib = (name[3] == RT_ALL_FIBS) ? req->td->td_proc->p_fibnum : name[3]; else return ((namelen < 3) ? EISDIR : ENOTDIR); if (fib < 0 || fib >= rt_numfibs) return (EINVAL); } else if (namelen != 3) return ((namelen < 3) ? EISDIR : ENOTDIR); af = name[0]; if (af > AF_MAX) return (EINVAL); bzero(&w, sizeof(w)); w.w_op = name[1]; w.w_arg = name[2]; w.w_req = req; error = sysctl_wire_old_buffer(req, 0); if (error) return (error); /* * Allocate reply buffer in advance. * All rtsock messages has maximum length of u_short. */ w.w_tmemsize = 65536; w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK); switch (w.w_op) { case NET_RT_DUMP: case NET_RT_FLAGS: if (af == 0) { /* dump all tables */ i = 1; lim = AF_MAX; } else /* dump only one table */ i = lim = af; /* * take care of llinfo entries, the caller must * specify an AF */ if (w.w_op == NET_RT_FLAGS && (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) { if (af != 0) error = lltable_sysctl_dumparp(af, w.w_req); else error = EINVAL; break; } /* * take care of routing entries */ for (error = 0; error == 0 && i <= lim; i++) { rh = rt_tables_get_rnh(fib, i); if (rh != NULL) { RIB_RLOCK(rh); error = rh->rnh_walktree(&rh->head, sysctl_dumpentry, &w); RIB_RUNLOCK(rh); } else if (af != 0) error = EAFNOSUPPORT; } break; case NET_RT_IFLIST: case NET_RT_IFLISTL: error = sysctl_iflist(af, &w); break; case NET_RT_IFMALIST: error = sysctl_ifmalist(af, &w); break; } free(w.w_tmem, M_TEMP); return (error); } static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); /* * Definitions of protocols supported in the ROUTE domain. */ static struct domain routedomain; /* or at least forward */ static struct protosw routesw[] = { { .pr_type = SOCK_RAW, .pr_domain = &routedomain, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_output = route_output, .pr_ctlinput = raw_ctlinput, .pr_init = raw_init, .pr_usrreqs = &route_usrreqs } }; static struct domain routedomain = { .dom_family = PF_ROUTE, .dom_name = "route", .dom_protosw = routesw, .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])] }; VNET_DOMAIN_SET(route);