Index: head/sys/net/if_var.h =================================================================== --- head/sys/net/if_var.h (revision 361420) +++ head/sys/net/if_var.h (revision 361421) @@ -1,807 +1,807 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_VAR_H_ #define _NET_IF_VAR_H_ /* * Structures defining a network interface, providing a packet * transport mechanism (ala level 0 of the PUP protocols). * * Each interface accepts output datagrams of a specified maximum * length, and provides higher level routines with input datagrams * received from its medium. * * Output occurs when the routine if_output is called, with three parameters: * (*ifp->if_output)(ifp, m, dst, ro) * Here m is the mbuf chain to be sent and dst is the destination address. * The output routine encapsulates the supplied datagram if necessary, * and then transmits it on its medium. * * On input, each interface unwraps the data received by it, and either * places it on the input queue of an internetwork datagram routine * and posts the associated software interrupt, or passes the datagram to a raw * packet input routine. * * Routines exist for locating interfaces by their addresses * or for locating an interface on a certain network, as well as more general * routing and gateway routines maintaining information used to locate * interfaces. These routines live in the files if.c and route.c */ struct rtentry; /* ifa_rtrequest */ struct nhop_object; /* ifa_rtrequest */ struct rt_addrinfo; /* ifa_rtrequest */ struct socket; struct carp_if; struct carp_softc; struct ifvlantrunk; struct route; /* if_output */ struct vnet; struct ifmedia; struct netmap_adapter; struct debugnet_methods; #ifdef _KERNEL #include #include /* ifqueue only? */ #include #include #endif /* _KERNEL */ #include #include #include #include /* XXX */ #include /* struct ifqueue */ #include /* XXX */ #include /* XXX */ #include /* if_link_task */ #define IF_DUNIT_NONE -1 #include CK_STAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */ CK_STAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */ CK_STAILQ_HEAD(ifmultihead, ifmultiaddr); CK_STAILQ_HEAD(ifgrouphead, ifg_group); #ifdef _KERNEL VNET_DECLARE(struct pfil_head *, link_pfil_head); #define V_link_pfil_head VNET(link_pfil_head) #define PFIL_ETHER_NAME "ethernet" #define HHOOK_IPSEC_INET 0 #define HHOOK_IPSEC_INET6 1 #define HHOOK_IPSEC_COUNT 2 VNET_DECLARE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DECLARE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); #define V_ipsec_hhh_in VNET(ipsec_hhh_in) #define V_ipsec_hhh_out VNET(ipsec_hhh_out) #endif /* _KERNEL */ typedef enum { IFCOUNTER_IPACKETS = 0, IFCOUNTER_IERRORS, IFCOUNTER_OPACKETS, IFCOUNTER_OERRORS, IFCOUNTER_COLLISIONS, IFCOUNTER_IBYTES, IFCOUNTER_OBYTES, IFCOUNTER_IMCASTS, IFCOUNTER_OMCASTS, IFCOUNTER_IQDROPS, IFCOUNTER_OQDROPS, IFCOUNTER_NOPROTO, IFCOUNTERS /* Array size. */ } ift_counter; typedef struct ifnet * if_t; typedef void (*if_start_fn_t)(if_t); typedef int (*if_ioctl_fn_t)(if_t, u_long, caddr_t); typedef void (*if_init_fn_t)(void *); typedef void (*if_qflush_fn_t)(if_t); typedef int (*if_transmit_fn_t)(if_t, struct mbuf *); typedef uint64_t (*if_get_counter_t)(if_t, ift_counter); struct ifnet_hw_tsomax { u_int tsomaxbytes; /* TSO total burst length limit in bytes */ u_int tsomaxsegcount; /* TSO maximum segment count */ u_int tsomaxsegsize; /* TSO maximum segment size in bytes */ }; /* Interface encap request types */ typedef enum { IFENCAP_LL = 1 /* pre-calculate link-layer header */ } ife_type; /* * The structure below allows to request various pre-calculated L2/L3 headers * for different media. Requests varies by type (rtype field). * * IFENCAP_LL type: pre-calculates link header based on address family * and destination lladdr. * * Input data fields: * buf: pointer to destination buffer * bufsize: buffer size * flags: IFENCAP_FLAG_BROADCAST if destination is broadcast * family: address family defined by AF_ constant. * lladdr: pointer to link-layer address * lladdr_len: length of link-layer address * hdata: pointer to L3 header (optional, used for ARP requests). * Output data fields: * buf: encap data is stored here * bufsize: resulting encap length is stored here * lladdr_off: offset of link-layer address from encap hdr start * hdata: L3 header may be altered if necessary */ struct if_encap_req { u_char *buf; /* Destination buffer (w) */ size_t bufsize; /* size of provided buffer (r) */ ife_type rtype; /* request type (r) */ uint32_t flags; /* Request flags (r) */ int family; /* Address family AF_* (r) */ int lladdr_off; /* offset from header start (w) */ int lladdr_len; /* lladdr length (r) */ char *lladdr; /* link-level address pointer (r) */ char *hdata; /* Upper layer header data (rw) */ }; #define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */ /* * Network interface send tag support. The storage of "struct * m_snd_tag" comes from the network driver and it is free to allocate * as much additional space as it wants for its own use. */ struct ktls_session; struct m_snd_tag; #define IF_SND_TAG_TYPE_RATE_LIMIT 0 #define IF_SND_TAG_TYPE_UNLIMITED 1 #define IF_SND_TAG_TYPE_TLS 2 #define IF_SND_TAG_TYPE_MAX 3 struct if_snd_tag_alloc_header { uint32_t type; /* send tag type, see IF_SND_TAG_XXX */ uint32_t flowid; /* mbuf hash value */ uint32_t flowtype; /* mbuf hash type */ uint8_t numa_domain; /* numa domain of associated inp */ }; struct if_snd_tag_alloc_rate_limit { struct if_snd_tag_alloc_header hdr; uint64_t max_rate; /* in bytes/s */ uint32_t flags; /* M_NOWAIT or M_WAITOK */ uint32_t reserved; /* alignment */ }; struct if_snd_tag_alloc_tls { struct if_snd_tag_alloc_header hdr; struct inpcb *inp; const struct ktls_session *tls; }; struct if_snd_tag_rate_limit_params { uint64_t max_rate; /* in bytes/s */ uint32_t queue_level; /* 0 (empty) .. 65535 (full) */ #define IF_SND_QUEUE_LEVEL_MIN 0 #define IF_SND_QUEUE_LEVEL_MAX 65535 uint32_t flags; /* M_NOWAIT or M_WAITOK */ }; union if_snd_tag_alloc_params { struct if_snd_tag_alloc_header hdr; struct if_snd_tag_alloc_rate_limit rate_limit; struct if_snd_tag_alloc_rate_limit unlimited; struct if_snd_tag_alloc_tls tls; }; union if_snd_tag_modify_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; }; union if_snd_tag_query_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; }; /* Query return flags */ #define RT_NOSUPPORT 0x00000000 /* Not supported */ #define RT_IS_INDIRECT 0x00000001 /* * Interface like a lagg, select * the actual interface for * capabilities. */ #define RT_IS_SELECTABLE 0x00000002 /* * No rate table, you select * rates and the first * number_of_rates are created. */ #define RT_IS_FIXED_TABLE 0x00000004 /* A fixed table is attached */ #define RT_IS_UNUSABLE 0x00000008 /* It is not usable for this */ #define RT_IS_SETUP_REQ 0x00000010 /* The interface setup must be called before use */ struct if_ratelimit_query_results { const uint64_t *rate_table; /* Pointer to table if present */ uint32_t flags; /* Flags indicating results */ uint32_t max_flows; /* Max flows using, 0=unlimited */ uint32_t number_of_rates; /* How many unique rates can be created */ uint32_t min_segment_burst; /* The amount the adapter bursts at each send */ }; typedef int (if_snd_tag_alloc_t)(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *); typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *); typedef void (if_snd_tag_free_t)(struct m_snd_tag *); typedef void (if_ratelimit_query_t)(struct ifnet *, struct if_ratelimit_query_results *); typedef int (if_ratelimit_setup_t)(struct ifnet *, uint64_t, uint32_t); /* * Structure defining a network interface. */ struct ifnet { /* General book keeping of interface lists. */ CK_STAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained (CK_) */ LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */ CK_STAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if (CK_) */ /* protected by if_addr_lock */ u_char if_alloctype; /* if_type at time of allocation */ uint8_t if_numa_domain; /* NUMA domain of device */ /* Driver and protocol specific information that remains stable. */ void *if_softc; /* pointer to driver state */ void *if_llsoftc; /* link layer softc */ void *if_l2com; /* pointer to protocol bits */ const char *if_dname; /* driver name */ int if_dunit; /* unit or IF_DUNIT_NONE */ u_short if_index; /* numeric abbreviation for this if */ short if_index_reserved; /* spare space to grow if_index */ char if_xname[IFNAMSIZ]; /* external name (name + unit) */ char *if_description; /* interface description */ /* Variable fields that are touched by the stack and drivers. */ int if_flags; /* up/down, broadcast, etc. */ int if_drv_flags; /* driver-managed status flags */ int if_capabilities; /* interface features & capabilities */ int if_capenable; /* enabled features & capabilities */ void *if_linkmib; /* link-type-specific MIB data */ size_t if_linkmiblen; /* length of above data */ u_int if_refcount; /* reference count */ /* These fields are shared with struct if_data. */ uint8_t if_type; /* ethernet, tokenring, etc */ uint8_t if_addrlen; /* media address length */ uint8_t if_hdrlen; /* media header length */ uint8_t if_link_state; /* current link state */ uint32_t if_mtu; /* maximum transmission unit */ uint32_t if_metric; /* routing metric (external only) */ uint64_t if_baudrate; /* linespeed */ uint64_t if_hwassist; /* HW offload capabilities, see IFCAP */ time_t if_epoch; /* uptime at attach or stat reset */ struct timeval if_lastchange; /* time of last administrative change */ struct ifaltq if_snd; /* output queue (includes altq) */ struct task if_linktask; /* task for link change events */ struct task if_addmultitask; /* task for SIOCADDMULTI */ /* Addresses of different protocol families assigned to this if. */ struct mtx if_addr_lock; /* lock to protect address lists */ /* * if_addrhead is the list of all addresses associated to * an interface. * Some code in the kernel assumes that first element * of the list has type AF_LINK, and contains sockaddr_dl * addresses which store the link-level address and the name * of the interface. * However, access to the AF_LINK address through this * field is deprecated. Use if_addr or ifaddr_byindex() instead. */ struct ifaddrhead if_addrhead; /* linked list of addresses per if */ struct ifmultihead if_multiaddrs; /* multicast addresses configured */ int if_amcount; /* number of all-multicast requests */ struct ifaddr *if_addr; /* pointer to link-level address */ void *if_hw_addr; /* hardware link-level address */ const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */ struct mtx if_afdata_lock; void *if_afdata[AF_MAX]; int if_afdata_initialized; /* Additional features hung off the interface. */ u_int if_fib; /* interface FIB */ struct vnet *if_vnet; /* pointer to network stack instance */ struct vnet *if_home_vnet; /* where this ifnet originates from */ struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */ struct bpf_if *if_bpf; /* packet filter structure */ int if_pcount; /* number of promiscuous listeners */ void *if_bridge; /* bridge glue */ void *if_lagg; /* lagg glue */ void *if_pf_kif; /* pf glue */ struct carp_if *if_carp; /* carp interface structure */ struct label *if_label; /* interface MAC label */ struct netmap_adapter *if_netmap; /* netmap(4) softc */ /* Various procedures of the layer2 encapsulation and drivers. */ int (*if_output) /* output routine (enqueue) */ (struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); void (*if_input) /* input routine (from h/w driver) */ (struct ifnet *, struct mbuf *); struct mbuf *(*if_bridge_input)(struct ifnet *, struct mbuf *); int (*if_bridge_output)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); void (*if_bridge_linkstate)(struct ifnet *ifp); if_start_fn_t if_start; /* initiate output routine */ if_ioctl_fn_t if_ioctl; /* ioctl routine */ if_init_fn_t if_init; /* Init routine */ int (*if_resolvemulti) /* validate/resolve multicast */ (struct ifnet *, struct sockaddr **, struct sockaddr *); if_qflush_fn_t if_qflush; /* flush any queue */ if_transmit_fn_t if_transmit; /* initiate output routine */ void (*if_reassign) /* reassign to vnet routine */ (struct ifnet *, struct vnet *, char *); if_get_counter_t if_get_counter; /* get counter values */ int (*if_requestencap) /* make link header from request */ (struct ifnet *, struct if_encap_req *); /* Statistics. */ counter_u64_t if_counters[IFCOUNTERS]; /* Stuff that's only temporary and doesn't belong here. */ /* * Network adapter TSO limits: * =========================== * * If the "if_hw_tsomax" field is zero the maximum segment * length limit does not apply. If the "if_hw_tsomaxsegcount" * or the "if_hw_tsomaxsegsize" field is zero the TSO segment * count limit does not apply. If all three fields are zero, * there is no TSO limit. * * NOTE: The TSO limits should reflect the values used in the * BUSDMA tag a network adapter is using to load a mbuf chain * for transmission. The TCP/IP network stack will subtract * space for all linklevel and protocol level headers and * ensure that the full mbuf chain passed to the network * adapter fits within the given limits. */ u_int if_hw_tsomax; /* TSO maximum size in bytes */ u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */ u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */ /* * Network adapter send tag support: */ if_snd_tag_alloc_t *if_snd_tag_alloc; if_snd_tag_modify_t *if_snd_tag_modify; if_snd_tag_query_t *if_snd_tag_query; if_snd_tag_free_t *if_snd_tag_free; if_ratelimit_query_t *if_ratelimit_query; if_ratelimit_setup_t *if_ratelimit_setup; /* Ethernet PCP */ uint8_t if_pcp; /* * Debugnet (Netdump) hooks to be called while in db/panic. */ struct debugnet_methods *if_debugnet_methods; struct epoch_context if_epoch_ctx; /* * Spare fields to be added before branching a stable branch, so * that structure can be enhanced without changing the kernel * binary interface. */ int if_ispare[4]; /* general use */ }; /* for compatibility with other BSDs */ #define if_name(ifp) ((ifp)->if_xname) #define IF_NODOM 255 /* * Locks for address lists on the network interface. */ #define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF) #define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock) #define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock) #define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock) #define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock)) #define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED) #ifdef _KERNEL /* interface link layer address change event */ typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t); /* interface address change event */ typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t); typedef void (*ifaddr_event_ext_handler_t)(void *, struct ifnet *, struct ifaddr *, int); EVENTHANDLER_DECLARE(ifaddr_event_ext, ifaddr_event_ext_handler_t); #define IFADDR_EVENT_ADD 0 #define IFADDR_EVENT_DEL 1 /* new interface arrival event */ typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t); /* interface departure event */ typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t); /* Interface link state change event */ typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int); EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t); /* Interface up/down event */ #define IFNET_EVENT_UP 0 #define IFNET_EVENT_DOWN 1 #define IFNET_EVENT_PCP 2 /* priority code point, PCP */ typedef void (*ifnet_event_fn)(void *, struct ifnet *ifp, int event); EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn); /* * interface groups */ struct ifg_group { char ifg_group[IFNAMSIZ]; u_int ifg_refcnt; void *ifg_pf_kif; CK_STAILQ_HEAD(, ifg_member) ifg_members; /* (CK_) */ CK_STAILQ_ENTRY(ifg_group) ifg_next; /* (CK_) */ }; struct ifg_member { CK_STAILQ_ENTRY(ifg_member) ifgm_next; /* (CK_) */ struct ifnet *ifgm_ifp; }; struct ifg_list { struct ifg_group *ifgl_group; CK_STAILQ_ENTRY(ifg_list) ifgl_next; /* (CK_) */ }; #ifdef _SYS_EVENTHANDLER_H_ /* group attach event */ typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t); /* group detach event */ typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t); /* group change event */ typedef void (*group_change_event_handler_t)(void *, const char *); EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); #endif /* _SYS_EVENTHANDLER_H_ */ #define IF_AFDATA_LOCK_INIT(ifp) \ mtx_init(&(ifp)->if_afdata_lock, "if_afdata", NULL, MTX_DEF) #define IF_AFDATA_WLOCK(ifp) mtx_lock(&(ifp)->if_afdata_lock) #define IF_AFDATA_WUNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp) #define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp) #define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_lock) #define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ifp)->if_afdata_lock)) #define IF_AFDATA_WLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_OWNED) #define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_NOTOWNED) /* * 72 was chosen below because it is the size of a TCP/IP * header (40) + the minimum mss (32). */ #define IF_MINMTU 72 #define IF_MAXMTU 65535 #define TOEDEV(ifp) ((ifp)->if_llsoftc) /* * The ifaddr structure contains information about one address * of an interface. They are maintained by the different address families, * are allocated and attached when an address is set, and are linked * together so all addresses for an interface can be located. * * NOTE: a 'struct ifaddr' is always at the beginning of a larger * chunk of malloc'ed memory, where we store the three addresses * (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here. */ struct ifaddr { struct sockaddr *ifa_addr; /* address of interface */ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ struct ifnet *ifa_ifp; /* back-pointer to interface */ struct carp_softc *ifa_carp; /* pointer to CARP data */ CK_STAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */ (int, struct rtentry *, struct nhop_object *, struct rt_addrinfo *); u_short ifa_flags; /* mostly rt_flags for cloning */ #define IFA_ROUTE RTF_UP /* route installed */ #define IFA_RTSELF RTF_HOST /* loopback route to self installed */ u_int ifa_refcnt; /* references to this structure */ counter_u64_t ifa_ipackets; counter_u64_t ifa_opackets; counter_u64_t ifa_ibytes; counter_u64_t ifa_obytes; struct epoch_context ifa_epoch_ctx; }; struct ifaddr * ifa_alloc(size_t size, int flags); void ifa_free(struct ifaddr *ifa); void ifa_ref(struct ifaddr *ifa); /* * Multicast address structure. This is analogous to the ifaddr * structure except that it keeps track of multicast addresses. */ #define IFMA_F_ENQUEUED 0x1 struct ifmultiaddr { CK_STAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */ struct sockaddr *ifma_addr; /* address this membership is for */ struct sockaddr *ifma_lladdr; /* link-layer translation, if any */ struct ifnet *ifma_ifp; /* back-pointer to interface */ u_int ifma_refcount; /* reference count */ int ifma_flags; void *ifma_protospec; /* protocol-specific state, if any */ struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */ struct epoch_context ifma_epoch_ctx; }; extern struct rwlock ifnet_rwlock; extern struct sx ifnet_sxlock; #define IFNET_WLOCK() do { \ sx_xlock(&ifnet_sxlock); \ rw_wlock(&ifnet_rwlock); \ } while (0) #define IFNET_WUNLOCK() do { \ rw_wunlock(&ifnet_rwlock); \ sx_xunlock(&ifnet_sxlock); \ } while (0) /* * To assert the ifnet lock, you must know not only whether it's for read or * write, but also whether it was acquired with sleep support or not. */ #define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) #define IFNET_WLOCK_ASSERT() do { \ sx_assert(&ifnet_sxlock, SA_XLOCKED); \ rw_assert(&ifnet_rwlock, RA_WLOCKED); \ } while (0) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) /* * Look up an ifnet given its index; the _ref variant also acquires a * reference that must be freed using if_rele(). It is almost always a bug * to call ifnet_byindex() instead of ifnet_byindex_ref(). */ struct ifnet *ifnet_byindex(u_short idx); struct ifnet *ifnet_byindex_ref(u_short idx); /* * Given the index, ifaddr_byindex() returns the one and only * link-level ifaddr for the interface. You are not supposed to use * it to traverse the list of addresses associated to the interface. */ struct ifaddr *ifaddr_byindex(u_short idx); VNET_DECLARE(struct ifnethead, ifnet); VNET_DECLARE(struct ifgrouphead, ifg_head); VNET_DECLARE(int, if_index); VNET_DECLARE(struct ifnet *, loif); /* first loopback interface */ #define V_ifnet VNET(ifnet) #define V_ifg_head VNET(ifg_head) #define V_if_index VNET(if_index) #define V_loif VNET(loif) #ifdef MCAST_VERBOSE #define MCDPRINTF printf #else #define MCDPRINTF(...) #endif int if_addgroup(struct ifnet *, const char *); int if_delgroup(struct ifnet *, const char *); int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **); int if_allmulti(struct ifnet *, int); struct ifnet* if_alloc(u_char); struct ifnet* if_alloc_dev(u_char, device_t dev); struct ifnet* if_alloc_domain(u_char, int numa_domain); void if_attach(struct ifnet *); void if_dead(struct ifnet *); int if_delmulti(struct ifnet *, struct sockaddr *); void if_delmulti_ifma(struct ifmultiaddr *); void if_delmulti_ifma_flags(struct ifmultiaddr *, int flags); void if_detach(struct ifnet *); void if_purgeaddrs(struct ifnet *); void if_delallmulti(struct ifnet *); void if_down(struct ifnet *); struct ifmultiaddr * if_findmulti(struct ifnet *, const struct sockaddr *); void if_freemulti(struct ifmultiaddr *ifma); void if_free(struct ifnet *); void if_initname(struct ifnet *, const char *, int); void if_link_state_change(struct ifnet *, int); int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3); void if_ref(struct ifnet *); void if_rele(struct ifnet *); int if_setlladdr(struct ifnet *, const u_char *, int); int if_tunnel_check_nesting(struct ifnet *, struct mbuf *, uint32_t, int); void if_up(struct ifnet *); int ifioctl(struct socket *, u_long, caddr_t, struct thread *); int ifpromisc(struct ifnet *, int); struct ifnet *ifunit(const char *); struct ifnet *ifunit_ref(const char *); int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *); struct ifaddr *ifa_ifwithaddr(const struct sockaddr *); int ifa_ifwithaddr_check(const struct sockaddr *); struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int); -struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, struct sockaddr *, - u_int); +struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, + const struct sockaddr *, u_int); struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *); int ifa_preferred(struct ifaddr *, struct ifaddr *); int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen); typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp); typedef void if_com_free_t(void *com, u_char type); void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f); void if_deregister_com_alloc(u_char type); void if_data_copy(struct ifnet *, struct if_data *); uint64_t if_get_counter_default(struct ifnet *, ift_counter); void if_inc_counter(struct ifnet *, ift_counter, int64_t); #define IF_LLADDR(ifp) \ LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr)) uint64_t if_setbaudrate(if_t ifp, uint64_t baudrate); uint64_t if_getbaudrate(if_t ifp); int if_setcapabilities(if_t ifp, int capabilities); int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit); int if_getcapabilities(if_t ifp); int if_togglecapenable(if_t ifp, int togglecap); int if_setcapenable(if_t ifp, int capenable); int if_setcapenablebit(if_t ifp, int setcap, int clearcap); int if_getcapenable(if_t ifp); const char *if_getdname(if_t ifp); int if_setdev(if_t ifp, void *dev); int if_setdrvflagbits(if_t ifp, int if_setflags, int clear_flags); int if_getdrvflags(if_t ifp); int if_setdrvflags(if_t ifp, int flags); int if_clearhwassist(if_t ifp); int if_sethwassistbits(if_t ifp, int toset, int toclear); int if_sethwassist(if_t ifp, int hwassist_bit); int if_gethwassist(if_t ifp); int if_setsoftc(if_t ifp, void *softc); void *if_getsoftc(if_t ifp); int if_setflags(if_t ifp, int flags); int if_gethwaddr(if_t ifp, struct ifreq *); int if_setmtu(if_t ifp, int mtu); int if_getmtu(if_t ifp); int if_getmtu_family(if_t ifp, int family); int if_setflagbits(if_t ifp, int set, int clear); int if_getflags(if_t ifp); int if_sendq_empty(if_t ifp); int if_setsendqready(if_t ifp); int if_setsendqlen(if_t ifp, int tx_desc_count); int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax); int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount); int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize); u_int if_gethwtsomax(if_t ifp); u_int if_gethwtsomaxsegcount(if_t ifp); u_int if_gethwtsomaxsegsize(if_t ifp); int if_input(if_t ifp, struct mbuf* sendmp); int if_sendq_prepend(if_t ifp, struct mbuf *m); struct mbuf *if_dequeue(if_t ifp); int if_setifheaderlen(if_t ifp, int len); void if_setrcvif(struct mbuf *m, if_t ifp); void if_setvtag(struct mbuf *m, u_int16_t tag); u_int16_t if_getvtag(struct mbuf *m); int if_vlantrunkinuse(if_t ifp); caddr_t if_getlladdr(if_t ifp); void *if_gethandle(u_char); void if_bpfmtap(if_t ifp, struct mbuf *m); void if_etherbpfmtap(if_t ifp, struct mbuf *m); void if_vlancap(if_t ifp); /* * Traversing through interface address lists. */ struct sockaddr_dl; typedef u_int iflladdr_cb_t(void *, struct sockaddr_dl *, u_int); u_int if_foreach_lladdr(if_t, iflladdr_cb_t, void *); u_int if_foreach_llmaddr(if_t, iflladdr_cb_t, void *); u_int if_lladdr_count(if_t); u_int if_llmaddr_count(if_t); int if_getamcount(if_t ifp); struct ifaddr * if_getifaddr(if_t ifp); /* Functions */ void if_setinitfn(if_t ifp, void (*)(void *)); void if_setioctlfn(if_t ifp, int (*)(if_t, u_long, caddr_t)); void if_setstartfn(if_t ifp, void (*)(if_t)); void if_settransmitfn(if_t ifp, if_transmit_fn_t); void if_setqflushfn(if_t ifp, if_qflush_fn_t); void if_setgetcounterfn(if_t ifp, if_get_counter_t); /* Revisit the below. These are inline functions originally */ int drbr_inuse_drv(if_t ifp, struct buf_ring *br); struct mbuf* drbr_dequeue_drv(if_t ifp, struct buf_ring *br); int drbr_needs_enqueue_drv(if_t ifp, struct buf_ring *br); int drbr_enqueue_drv(if_t ifp, struct buf_ring *br, struct mbuf *m); /* TSO */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *); int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *); /* accessors for struct ifreq */ void *ifr_data_get_ptr(void *ifrp); void *ifr_buffer_get_buffer(void *data); size_t ifr_buffer_get_length(void *data); int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *); #ifdef DEVICE_POLLING enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS }; typedef int poll_handler_t(if_t ifp, enum poll_cmd cmd, int count); int ether_poll_register(poll_handler_t *h, if_t ifp); int ether_poll_deregister(if_t ifp); #endif /* DEVICE_POLLING */ #endif /* _KERNEL */ #include /* XXXAO: temporary unconditional include */ #endif /* !_NET_IF_VAR_H_ */ Index: head/sys/net/route/route_ctl.c =================================================================== --- head/sys/net/route/route_ctl.c (revision 361420) +++ head/sys/net/route/route_ctl.c (revision 361421) @@ -1,65 +1,584 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2020 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#ifdef RADIX_MPATH +#include +#endif + #include /* * This file contains control plane routing tables functions. * * All functions assumes they are called in net epoch. */ + +static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info); + +int +add_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt) +{ + struct sockaddr *dst, *ndst, *gateway, *netmask; + struct rtentry *rt, *rt_old; + struct nhop_object *nh; + struct radix_node *rn; + struct ifaddr *ifa; + int error, flags; + struct epoch_tracker et; + + dst = info->rti_info[RTAX_DST]; + gateway = info->rti_info[RTAX_GATEWAY]; + netmask = info->rti_info[RTAX_NETMASK]; + flags = info->rti_flags; + + if ((flags & RTF_GATEWAY) && !gateway) + return (EINVAL); + if (dst && gateway && (dst->sa_family != gateway->sa_family) && + (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) + return (EINVAL); + + if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) + return (EINVAL); + + if (info->rti_ifa == NULL) { + error = rt_getifa_fib(info, rnh->rib_fibnum); + if (error) + return (error); + } else { + ifa_ref(info->rti_ifa); + } + + NET_EPOCH_ENTER(et); + error = nhop_create_from_info(rnh, info, &nh); + NET_EPOCH_EXIT(et); + if (error != 0) { + ifa_free(info->rti_ifa); + return (error); + } + + rt = uma_zalloc(V_rtzone, M_NOWAIT); + if (rt == NULL) { + ifa_free(info->rti_ifa); + nhop_free(nh); + return (ENOBUFS); + } + rt->rt_flags = RTF_UP | flags; + rt->rt_nhop = nh; + + /* Fill in dst */ + memcpy(&rt->rt_dst, dst, dst->sa_len); + rt_key(rt) = &rt->rt_dst; + + /* + * point to the (possibly newly malloc'd) dest address. + */ + ndst = (struct sockaddr *)rt_key(rt); + + /* + * make sure it contains the value we want (masked if needed). + */ + if (netmask) { + rt_maskedcopy(dst, ndst, netmask); + } else + bcopy(dst, ndst, dst->sa_len); + + /* + * We use the ifa reference returned by rt_getifa_fib(). + * This moved from below so that rnh->rnh_addaddr() can + * examine the ifa and ifa->ifa_ifp if it so desires. + */ + ifa = info->rti_ifa; + rt->rt_weight = 1; + + rt_setmetrics(info, rt); + + RIB_WLOCK(rnh); + RT_LOCK(rt); +#ifdef RADIX_MPATH + /* do not permit exactly the same dst/mask/gw pair */ + if (rt_mpath_capable(rnh) && + rt_mpath_conflict(rnh, rt, netmask)) { + RIB_WUNLOCK(rnh); + + nhop_free(nh); + uma_zfree(V_rtzone, rt); + return (EEXIST); + } +#endif + + rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); + + if (rn != NULL && rt->rt_expire > 0) + tmproutes_update(rnh, rt); + + rt_old = NULL; + if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) { + + /* + * Force removal and re-try addition + * TODO: better multipath&pinned support + */ + struct sockaddr *info_dst = info->rti_info[RTAX_DST]; + info->rti_info[RTAX_DST] = ndst; + /* Do not delete existing PINNED(interface) routes */ + info->rti_flags &= ~RTF_PINNED; + rt_old = rt_unlinkrte(rnh, info, &error); + info->rti_flags |= RTF_PINNED; + info->rti_info[RTAX_DST] = info_dst; + if (rt_old != NULL) + rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, + rt->rt_nodes); + } + RIB_WUNLOCK(rnh); + + if (rt_old != NULL) { + rt_notifydelete(rt_old, info); + rtfree(rt_old); + } + + /* + * If it still failed to go into the tree, + * then un-make it (this should be a function) + */ + if (rn == NULL) { + nhop_free(nh); + uma_zfree(V_rtzone, rt); + return (EEXIST); + } + + /* + * If this protocol has something to add to this then + * allow it to do that as well. + */ + if (ifa->ifa_rtrequest) + ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info); + + /* + * actually return a resultant rtentry + */ + if (ret_nrt) + *ret_nrt = rt; + rnh->rnh_gen++; /* Routing table updated */ + RT_UNLOCK(rt); + + return (0); +} + + +/* + * Conditionally unlinks rtentry matching data inside @info from @rnh. + * Returns unlinked, locked and referenced @rtentry on success, + * Returns NULL and sets @perror to: + * ESRCH - if prefix was not found, + * EADDRINUSE - if trying to delete PINNED route without appropriate flag. + * ENOENT - if supplied filter function returned 0 (not matched). + */ +struct rtentry * +rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror) +{ + struct sockaddr *dst, *netmask; + struct rtentry *rt; + struct radix_node *rn; + + dst = info->rti_info[RTAX_DST]; + netmask = info->rti_info[RTAX_NETMASK]; + + rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); + if (rt == NULL) { + *perror = ESRCH; + return (NULL); + } + + if ((info->rti_flags & RTF_PINNED) == 0) { + /* Check if target route can be deleted */ + if (rt->rt_flags & RTF_PINNED) { + *perror = EADDRINUSE; + return (NULL); + } + } + + if (info->rti_filter != NULL) { + if (info->rti_filter(rt, rt->rt_nhop, info->rti_filterdata)==0){ + /* Not matched */ + *perror = ENOENT; + return (NULL); + } + + /* + * Filter function requested rte deletion. + * Ease the caller work by filling in remaining info + * from that particular entry. + */ + info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; + } + + /* + * Remove the item from the tree and return it. + * Complain if it is not there and do no more processing. + */ + *perror = ESRCH; +#ifdef RADIX_MPATH + if (rt_mpath_capable(rnh)) + rn = rt_mpath_unlink(rnh, info, rt, perror); + else +#endif + rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); + if (rn == NULL) + return (NULL); + + if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) + panic ("rtrequest delete"); + + rt = RNTORT(rn); + RT_LOCK(rt); + rt->rt_flags &= ~RTF_UP; + + *perror = 0; + + return (rt); +} + +int +del_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt) +{ + struct sockaddr *dst, *netmask; + struct sockaddr_storage mdst; + struct rtentry *rt; + int error; + + dst = info->rti_info[RTAX_DST]; + netmask = info->rti_info[RTAX_NETMASK]; + + if (netmask) { + if (dst->sa_len > sizeof(mdst)) + return (EINVAL); + rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); + dst = (struct sockaddr *)&mdst; + } + + RIB_WLOCK(rnh); + rt = rt_unlinkrte(rnh, info, &error); + RIB_WUNLOCK(rnh); + if (error != 0) + return (error); + + rt_notifydelete(rt, info); + + /* + * If the caller wants it, then it can have it, + * the entry will be deleted after the end of the current epoch. + */ + if (ret_nrt) + *ret_nrt = rt; + + rtfree(rt); + + return (0); +} + +static int +change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt) +{ + RIB_RLOCK_TRACKER; + struct rtentry *rt = NULL; + int error = 0; + int free_ifa = 0; + struct nhop_object *nh, *nh_orig; + + RIB_RLOCK(rnh); + rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], &rnh->head); + + if (rt == NULL) { + RIB_RUNLOCK(rnh); + return (ESRCH); + } + +#ifdef RADIX_MPATH + /* + * If we got multipath routes, + * we require users to specify a matching RTAX_GATEWAY. + */ + if (rt_mpath_capable(rnh)) { + rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); + if (rt == NULL) { + RIB_RUNLOCK(rnh); + return (ESRCH); + } + } +#endif + nh_orig = rt->rt_nhop; + + RIB_RUNLOCK(rnh); + + rt = NULL; + nh = NULL; + + /* + * New gateway could require new ifaddr, ifp; + * flags may also be different; ifp may be specified + * by ll sockaddr when protocol address is ambiguous + */ + if (((nh_orig->nh_flags & NHF_GATEWAY) && + info->rti_info[RTAX_GATEWAY] != NULL) || + info->rti_info[RTAX_IFP] != NULL || + (info->rti_info[RTAX_IFA] != NULL && + !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { + error = rt_getifa_fib(info, rnh->rib_fibnum); + if (info->rti_ifa != NULL) + free_ifa = 1; + + if (error != 0) { + if (free_ifa) { + ifa_free(info->rti_ifa); + info->rti_ifa = NULL; + } + + return (error); + } + } + + error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); + if (free_ifa) { + ifa_free(info->rti_ifa); + info->rti_ifa = NULL; + } + if (error != 0) + return (error); + + RIB_WLOCK(rnh); + + /* Lookup rtentry once again and check if nexthop is still the same */ + rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], &rnh->head); + + if (rt == NULL) { + RIB_WUNLOCK(rnh); + nhop_free(nh); + return (ESRCH); + } + + if (rt->rt_nhop != nh_orig) { + RIB_WUNLOCK(rnh); + nhop_free(nh); + return (EAGAIN); + } + + /* Proceed with the update */ + RT_LOCK(rt); + + /* Provide notification to the protocols.*/ + if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) + nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); + + rt->rt_nhop = nh; + rt_setmetrics(info, rt); + + if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) + nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); + + if (ret_nrt != NULL) + *ret_nrt = rt; + + RT_UNLOCK(rt); + + /* Update generation id to reflect rtable change */ + rnh->rnh_gen++; + + RIB_WUNLOCK(rnh); + + nhop_free(nh_orig); + + return (0); +} + +int +change_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt) +{ + int error; + + /* Check if updated gateway exists */ + if ((info->rti_flags & RTF_GATEWAY) && + (info->rti_info[RTAX_GATEWAY] == NULL)) + return (EINVAL); + + /* + * route change is done in multiple steps, with dropping and + * reacquiring lock. In the situations with multiple processes + * changes the same route in can lead to the case when route + * is changed between the steps. Address it by retrying the operation + * multiple times before failing. + */ + for (int i = 0; i < RIB_MAX_RETRIES; i++) { + error = change_route_one(rnh, info, ret_nrt); + if (error != EAGAIN) + break; + } + + return (error); +} + +static void +rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info) +{ + struct ifaddr *ifa; + + /* + * give the protocol a chance to keep things in sync. + */ + ifa = rt->rt_nhop->nh_ifa; + if (ifa != NULL && ifa->ifa_rtrequest != NULL) + ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info); +} + +struct rt_delinfo +{ + struct rt_addrinfo info; + struct rib_head *rnh; + struct rtentry *head; +}; + +/* + * Conditionally unlinks @rn from radix tree based + * on info data passed in @arg. + */ +static int +rt_checkdelroute(struct radix_node *rn, void *arg) +{ + struct rt_delinfo *di; + struct rt_addrinfo *info; + struct rtentry *rt; + int error; + + di = (struct rt_delinfo *)arg; + rt = (struct rtentry *)rn; + info = &di->info; + error = 0; + + info->rti_info[RTAX_DST] = rt_key(rt); + info->rti_info[RTAX_NETMASK] = rt_mask(rt); + info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; + + rt = rt_unlinkrte(di->rnh, info, &error); + if (rt == NULL) { + /* Either not allowed or not matched. Skip entry */ + return (0); + } + + /* Entry was unlinked. Add to the list and return */ + rt->rt_chain = di->head; + di->head = rt; + + return (0); +} + +/* + * Iterates over a routing table specified by @fibnum and @family and + * deletes elements marked by @filter_f. + * @fibnum: rtable id + * @family: AF_ address family + * @filter_f: function returning non-zero value for items to delete + * @arg: data to pass to the @filter_f function + * @report: true if rtsock notification is needed. + */ +void +rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report) +{ + struct rib_head *rnh; + struct rt_delinfo di; + struct rtentry *rt; + + rnh = rt_tables_get_rnh(fibnum, family); + if (rnh == NULL) + return; + + bzero(&di, sizeof(di)); + di.info.rti_filter = filter_f; + di.info.rti_filterdata = arg; + di.rnh = rnh; + + RIB_WLOCK(rnh); + rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); + RIB_WUNLOCK(rnh); + + if (di.head == NULL) + return; + + /* We might have something to reclaim. */ + while (di.head != NULL) { + rt = di.head; + di.head = rt->rt_chain; + rt->rt_chain = NULL; + + /* TODO std rt -> rt_addrinfo export */ + di.info.rti_info[RTAX_DST] = rt_key(rt); + di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); + + rt_notifydelete(rt, &di.info); + + if (report) + rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0, + fibnum); + rtfree(rt); + } +} Index: head/sys/net/route/route_var.h =================================================================== --- head/sys/net/route/route_var.h (revision 361420) +++ head/sys/net/route/route_var.h (revision 361421) @@ -1,218 +1,243 @@ /*- * Copyright (c) 2015-2016 * Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NET_ROUTE_VAR_H_ #define _NET_ROUTE_VAR_H_ #ifndef RNF_NORMAL #include #endif #include #include /* struct sockaddr_in */ #include struct nh_control; typedef int rnh_preadd_entry_f_t(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask, struct nhop_object *nh); struct rib_head { struct radix_head head; rn_matchaddr_f_t *rnh_matchaddr; /* longest match for sockaddr */ rn_addaddr_f_t *rnh_addaddr; /* add based on sockaddr*/ rn_deladdr_f_t *rnh_deladdr; /* remove based on sockaddr */ rn_lookup_f_t *rnh_lookup; /* exact match for sockaddr */ rn_walktree_t *rnh_walktree; /* traverse tree */ rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */ rnh_preadd_entry_f_t *rnh_preadd; /* hook to alter record prior to insertion */ rt_gen_t rnh_gen; /* generation counter */ int rnh_multipath; /* multipath capable ? */ struct radix_node rnh_nodes[3]; /* empty tree for common case */ struct rmlock rib_lock; /* config/data path lock */ struct radix_mask_head rmhead; /* masks radix head */ struct vnet *rib_vnet; /* vnet pointer */ int rib_family; /* AF of the rtable */ u_int rib_fibnum; /* fib number */ struct callout expire_callout; /* Callout for expiring dynamic routes */ time_t next_expire; /* Next expire run ts */ struct nh_control *nh_control; /* nexthop subsystem data */ }; #define RIB_RLOCK_TRACKER struct rm_priotracker _rib_tracker #define RIB_LOCK_INIT(rh) rm_init(&(rh)->rib_lock, "rib head lock") #define RIB_LOCK_DESTROY(rh) rm_destroy(&(rh)->rib_lock) #define RIB_RLOCK(rh) rm_rlock(&(rh)->rib_lock, &_rib_tracker) #define RIB_RUNLOCK(rh) rm_runlock(&(rh)->rib_lock, &_rib_tracker) #define RIB_WLOCK(rh) rm_wlock(&(rh)->rib_lock) #define RIB_WUNLOCK(rh) rm_wunlock(&(rh)->rib_lock) #define RIB_LOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_LOCKED) #define RIB_WLOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_WLOCKED) /* Constants */ #define RIB_MAX_RETRIES 3 /* Macro for verifying fields in af-specific 'struct route' structures */ #define CHK_STRUCT_FIELD_GENERIC(_s1, _f1, _s2, _f2) \ _Static_assert(sizeof(((_s1 *)0)->_f1) == sizeof(((_s2 *)0)->_f2), \ "Fields " #_f1 " and " #_f2 " size differs"); \ _Static_assert(__offsetof(_s1, _f1) == __offsetof(_s2, _f2), \ "Fields " #_f1 " and " #_f2 " offset differs"); #define _CHK_ROUTE_FIELD(_route_new, _field) \ CHK_STRUCT_FIELD_GENERIC(struct route, _field, _route_new, _field) #define CHK_STRUCT_ROUTE_FIELDS(_route_new) \ _CHK_ROUTE_FIELD(_route_new, ro_nh) \ _CHK_ROUTE_FIELD(_route_new, ro_lle) \ _CHK_ROUTE_FIELD(_route_new, ro_prepend)\ _CHK_ROUTE_FIELD(_route_new, ro_plen) \ _CHK_ROUTE_FIELD(_route_new, ro_flags) \ _CHK_ROUTE_FIELD(_route_new, ro_mtu) \ _CHK_ROUTE_FIELD(_route_new, spare) #define CHK_STRUCT_ROUTE_COMPAT(_ro_new, _dst_new) \ CHK_STRUCT_ROUTE_FIELDS(_ro_new); \ _Static_assert(__offsetof(struct route, ro_dst) == __offsetof(_ro_new, _dst_new),\ "ro_dst and " #_dst_new " are at different offset") struct rib_head *rt_tables_get_rnh(int fib, int family); void rt_mpath_init_rnh(struct rib_head *rnh); +int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum); +void rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt); +#ifdef RADIX_MPATH +struct radix_node *rt_mpath_unlink(struct rib_head *rnh, + struct rt_addrinfo *info, struct rtentry *rto, int *perror); +#endif +int add_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt); +int del_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt); +int change_route(struct rib_head *, struct rt_addrinfo *, + struct rtentry **); VNET_PCPUSTAT_DECLARE(struct rtstat, rtstat); #define RTSTAT_ADD(name, val) \ VNET_PCPUSTAT_ADD(struct rtstat, rtstat, name, (val)) #define RTSTAT_INC(name) RTSTAT_ADD(name, 1) + + +/* + * Convert a 'struct radix_node *' to a 'struct rtentry *'. + * The operation can be done safely (in this code) because a + * 'struct rtentry' starts with two 'struct radix_node''s, the first + * one representing leaf nodes in the routing tree, which is + * what the code in radix.c passes us as a 'struct radix_node'. + * + * But because there are a lot of assumptions in this conversion, + * do not cast explicitly, but always use the macro below. + */ +#define RNTORT(p) ((struct rtentry *)(p)) struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ /* * XXX struct rtentry must begin with a struct radix_node (or two!) * because the code does some casts of a 'struct radix_node *' * to a 'struct rtentry *' */ #define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key))) #define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask))) #define rt_key_const(r) (*((const struct sockaddr * const *)(&(r)->rt_nodes->rn_key))) #define rt_mask_const(r) (*((const struct sockaddr * const *)(&(r)->rt_nodes->rn_mask))) /* * 2 radix_node structurs above consists of 2x6 pointers, leaving * 4 pointers (32 bytes) of the second cache line on amd64. * */ struct nhop_object *rt_nhop; /* nexthop data */ union { /* * Destination address storage. * sizeof(struct sockaddr_in6) == 28, however * the dataplane-relevant part (e.g. address) lies * at offset 8..24, making the address not crossing * cacheline boundary. */ struct sockaddr_in rt_dst4; struct sockaddr_in6 rt_dst6; struct sockaddr rt_dst; char rt_dstb[28]; }; int rt_flags; /* up/down?, host/net */ u_long rt_weight; /* absolute weight */ u_long rt_expire; /* lifetime for route, e.g. redirect */ #define rt_endzero rt_mtx struct mtx rt_mtx; /* mutex for routing entry */ struct rtentry *rt_chain; /* pointer to next rtentry to delete */ struct epoch_context rt_epoch_ctx; /* net epoch tracker */ }; #define RT_LOCK_INIT(_rt) \ mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW) #define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx) #define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx) #define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx) #define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED) #define RT_UNLOCK_COND(_rt) do { \ if (mtx_owned(&(_rt)->rt_mtx)) \ mtx_unlock(&(_rt)->rt_mtx); \ } while (0) /* * With the split between the routing entry and the nexthop, * rt_flags has to be split between these 2 entries. As rtentry * mostly contains prefix data and is thought to be generic enough * so one can transparently change the nexthop pointer w/o requiring * any other rtentry changes, most of rt_flags shifts to the particular nexthop. * / * * RTF_UP: rtentry, as an indication that it is linked. * RTF_HOST: rtentry, nhop. The latter indication is needed for the datapath * RTF_DYNAMIC: nhop, to make rtentry generic. * RTF_MODIFIED: nhop, to make rtentry generic. (legacy) * -- "native" path (nhop) properties: * RTF_GATEWAY, RTF_STATIC, RTF_PROTO1, RTF_PROTO2, RTF_PROTO3, RTF_FIXEDMTU, * RTF_PINNED, RTF_REJECT, RTF_BLACKHOLE, RTF_BROADCAST */ /* Nexthop rt flags mask */ #define NHOP_RT_FLAG_MASK (RTF_GATEWAY | RTF_HOST | RTF_REJECT | RTF_DYNAMIC | \ RTF_MODIFIED | RTF_STATIC | RTF_BLACKHOLE | RTF_PROTO1 | RTF_PROTO2 | \ RTF_PROTO3 | RTF_FIXEDMTU | RTF_PINNED | RTF_BROADCAST) /* rtentry rt flag mask */ #define RTE_RT_FLAG_MASK (RTF_UP | RTF_HOST) /* Nexthop selection */ #define _NH2MP(_nh) ((struct nhgrp_object *)(_nh)) #define _SELECT_NHOP(_nh, _flowid) \ (_NH2MP(_nh))->nhops[(_flowid) % (_NH2MP(_nh))->mp_size] #define _RT_SELECT_NHOP(_nh, _flowid) \ ((!NH_IS_MULTIPATH(_nh)) ? (_nh) : _SELECT_NHOP(_nh, _flowid)) #define RT_SELECT_NHOP(_rt, _flowid) _RT_SELECT_NHOP((_rt)->rt_nhop, _flowid) /* rte<>nhop translation */ static inline uint16_t fib_rte_to_nh_flags(int rt_flags) { uint16_t res; res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0; res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0; res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0; res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0; res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0; res |= (rt_flags & RTF_GATEWAY) ? NHF_GATEWAY : 0; return (res); } void tmproutes_update(struct rib_head *rnh, struct rtentry *rt); void tmproutes_init(struct rib_head *rh); void tmproutes_destroy(struct rib_head *rh); #endif Index: head/sys/net/route/shared.h =================================================================== --- head/sys/net/route/shared.h (revision 361420) +++ head/sys/net/route/shared.h (revision 361421) @@ -1,68 +1,77 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2020 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Contains various definitions shared between the parts of a routing subsystem. * * Header is not intended to be included by the code external to the * routing subsystem. */ #ifndef _NET_ROUTE_SHARED_H_ #define _NET_ROUTE_SHARED_H_ +#include + #ifdef RTDEBUG #define DPRINTF(_fmt, ...) printf("%s: " _fmt "\n", __func__ , ## __VA_ARGS__) #else #define DPRINTF(_fmt, ...) #endif struct rib_head; /* Nexhops */ void nhops_init(void); int nhops_init_rib(struct rib_head *rh); void nhops_destroy_rib(struct rib_head *rh); int nhop_ref_object(struct nhop_object *nh); int nhop_ref_any(struct nhop_object *nh); void nhop_free_any(struct nhop_object *nh); void nhop_set_type(struct nhop_object *nh, enum nhop_type nh_type); void nhop_set_rtflags(struct nhop_object *nh, int rt_flags); int nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info, struct nhop_object **nh_ret); int nhop_create_from_nhop(struct rib_head *rnh, const struct nhop_object *nh_orig, struct rt_addrinfo *info, struct nhop_object **pnh_priv); void nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu); int nhops_dump_sysctl(struct rib_head *rh, struct sysctl_req *w); + +/* route */ +VNET_DECLARE(uma_zone_t, rtzone); /* Routing table UMA zone. */ +#define V_rtzone VNET(rtzone) + +struct rtentry *rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, + int *perror); #endif Index: head/sys/net/route.c =================================================================== --- head/sys/net/route.c (revision 361420) +++ head/sys/net/route.c (revision 361421) @@ -1,2077 +1,1527 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 * $FreeBSD$ */ /************************************************************************ * Note: In this file a 'fib' is a "forwarding information base" * * Which is the new name for an in kernel routing (next hop) table. * ***********************************************************************/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_mrouting.h" #include "opt_mpath.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #define RT_MAXFIBS UINT16_MAX /* Kernel config default option. */ #ifdef ROUTETABLES #if ROUTETABLES <= 0 #error "ROUTETABLES defined too low" #endif #if ROUTETABLES > RT_MAXFIBS #error "ROUTETABLES defined too big" #endif #define RT_NUMFIBS ROUTETABLES #endif /* ROUTETABLES */ /* Initialize to default if not otherwise set. */ #ifndef RT_NUMFIBS #define RT_NUMFIBS 1 #endif /* This is read-only.. */ u_int rt_numfibs = RT_NUMFIBS; SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, ""); /* * By default add routes to all fibs for new interfaces. * Once this is set to 0 then only allocate routes on interface * changes for the FIB of the caller when adding a new set of addresses * to an interface. XXX this is a shotgun aproach to a problem that needs * a more fine grained solution.. that will come. * XXX also has the problems getting the FIB from curthread which will not * always work given the fib can be overridden and prefixes can be added * from the network stack context. */ VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1; SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(rt_add_addr_allfibs), 0, ""); VNET_PCPUSTAT_DEFINE(struct rtstat, rtstat); VNET_PCPUSTAT_SYSINIT(rtstat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(rtstat); #endif VNET_DEFINE(struct rib_head *, rt_tables); #define V_rt_tables VNET(rt_tables) -/* - * Convert a 'struct radix_node *' to a 'struct rtentry *'. - * The operation can be done safely (in this code) because a - * 'struct rtentry' starts with two 'struct radix_node''s, the first - * one representing leaf nodes in the routing tree, which is - * what the code in radix.c passes us as a 'struct radix_node'. - * - * But because there are a lot of assumptions in this conversion, - * do not cast explicitly, but always use the macro below. - */ -#define RNTORT(p) ((struct rtentry *)(p)) - -VNET_DEFINE_STATIC(uma_zone_t, rtzone); /* Routing table UMA zone. */ +VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ #define V_rtzone VNET(rtzone) EVENTHANDLER_LIST_DEFINE(rt_addrmsg); -static int rt_getifa_fib(struct rt_addrinfo *, u_int); -static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *); static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *, void *arg); -static struct rtentry *rt_unlinkrte(struct rib_head *rnh, - struct rt_addrinfo *info, int *perror); -static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info); static void destroy_rtentry_epoch(epoch_context_t ctx); -#ifdef RADIX_MPATH -static struct radix_node *rt_mpath_unlink(struct rib_head *rnh, - struct rt_addrinfo *info, struct rtentry *rto, int *perror); -#endif static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags); -static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt); -static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt); -static int change_route(struct rib_head *, struct rt_addrinfo *, - struct rtentry **); - /* * handler for net.my_fibnum */ static int sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) { int fibnum; int error; fibnum = curthread->td_proc->p_fibnum; error = sysctl_handle_int(oidp, &fibnum, 0, req); return (error); } SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); static __inline struct rib_head ** rt_tables_get_rnh_ptr(int table, int fam) { struct rib_head **rnh; KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds (0 <= %d < %d)", __func__, table, rt_numfibs)); KASSERT(fam >= 0 && fam < (AF_MAX + 1), ("%s: fam out of bounds (0 <= %d < %d)", __func__, fam, AF_MAX+1)); /* rnh is [fib=0][af=0]. */ rnh = (struct rib_head **)V_rt_tables; /* Get the offset to the requested table and fam. */ rnh += table * (AF_MAX+1) + fam; return (rnh); } struct rib_head * rt_tables_get_rnh(int table, int fam) { return (*rt_tables_get_rnh_ptr(table, fam)); } u_int rt_tables_get_gen(int table, int fam) { struct rib_head *rnh; rnh = *rt_tables_get_rnh_ptr(table, fam); KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d fam %d", __func__, table, fam)); return (rnh->rnh_gen); } /* * route initialization must occur before ip6_init2(), which happenas at * SI_ORDER_MIDDLE. */ static void route_init(void) { /* whack the tunable ints into line. */ if (rt_numfibs > RT_MAXFIBS) rt_numfibs = RT_MAXFIBS; if (rt_numfibs == 0) rt_numfibs = 1; nhops_init(); } SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, NULL); static int rtentry_zinit(void *mem, int size, int how) { struct rtentry *rt = mem; RT_LOCK_INIT(rt); return (0); } static void rtentry_zfini(void *mem, int size) { struct rtentry *rt = mem; RT_LOCK_DESTROY(rt); } static int rtentry_ctor(void *mem, int size, void *arg, int how) { struct rtentry *rt = mem; bzero(rt, offsetof(struct rtentry, rt_endzero)); rt->rt_chain = NULL; return (0); } static void rtentry_dtor(void *mem, int size, void *arg) { struct rtentry *rt = mem; RT_UNLOCK_COND(rt); } static void vnet_route_init(const void *unused __unused) { struct domain *dom; struct rib_head **rnh; int table; int fam; V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO); V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), rtentry_ctor, rtentry_dtor, rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0); for (dom = domains; dom; dom = dom->dom_next) { if (dom->dom_rtattach == NULL) continue; for (table = 0; table < rt_numfibs; table++) { fam = dom->dom_family; if (table != 0 && fam != AF_INET6 && fam != AF_INET) break; rnh = rt_tables_get_rnh_ptr(table, fam); if (rnh == NULL) panic("%s: rnh NULL", __func__); dom->dom_rtattach((void **)rnh, 0, table); } } } VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, vnet_route_init, 0); #ifdef VIMAGE static void vnet_route_uninit(const void *unused __unused) { int table; int fam; struct domain *dom; struct rib_head **rnh; for (dom = domains; dom; dom = dom->dom_next) { if (dom->dom_rtdetach == NULL) continue; for (table = 0; table < rt_numfibs; table++) { fam = dom->dom_family; if (table != 0 && fam != AF_INET6 && fam != AF_INET) break; rnh = rt_tables_get_rnh_ptr(table, fam); if (rnh == NULL) panic("%s: rnh NULL", __func__); dom->dom_rtdetach((void **)rnh, 0); } } /* * dom_rtdetach calls rt_table_destroy(), which * schedules deletion for all rtentries, nexthops and control * structures. Wait for the destruction callbacks to fire. * Note that this should result in freeing all rtentries, but * nexthops deletions will be scheduled for the next epoch run * and will be completed after vnet teardown. */ epoch_drain_callbacks(net_epoch_preempt); free(V_rt_tables, M_RTABLE); uma_zdestroy(V_rtzone); } VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, vnet_route_uninit, 0); #endif struct rib_head * rt_table_init(int offset, int family, u_int fibnum) { struct rib_head *rh; rh = malloc(sizeof(struct rib_head), M_RTABLE, M_WAITOK | M_ZERO); /* TODO: These details should be hidded inside radix.c */ /* Init masks tree */ rn_inithead_internal(&rh->head, rh->rnh_nodes, offset); rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0); rh->head.rnh_masks = &rh->rmhead; /* Save metadata associated with this routing table. */ rh->rib_family = family; rh->rib_fibnum = fibnum; #ifdef VIMAGE rh->rib_vnet = curvnet; #endif tmproutes_init(rh); /* Init locks */ RIB_LOCK_INIT(rh); nhops_init_rib(rh); /* Finally, set base callbacks */ rh->rnh_addaddr = rn_addroute; rh->rnh_deladdr = rn_delete; rh->rnh_matchaddr = rn_match; rh->rnh_lookup = rn_lookup; rh->rnh_walktree = rn_walktree; rh->rnh_walktree_from = rn_walktree_from; return (rh); } static int rt_freeentry(struct radix_node *rn, void *arg) { struct radix_head * const rnh = arg; struct radix_node *x; x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh); if (x != NULL) R_Free(x); return (0); } void rt_table_destroy(struct rib_head *rh) { tmproutes_destroy(rh); rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head); nhops_destroy_rib(rh); /* Assume table is already empty */ RIB_LOCK_DESTROY(rh); free(rh, M_RTABLE); } #ifndef _SYS_SYSPROTO_H_ struct setfib_args { int fibnum; }; #endif int sys_setfib(struct thread *td, struct setfib_args *uap) { if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) return EINVAL; td->td_proc->p_fibnum = uap->fibnum; return (0); } /* * Remove a reference count from an rtentry. * If the count gets low enough, take it out of the routing table */ void rtfree(struct rtentry *rt) { KASSERT(rt != NULL,("%s: NULL rt", __func__)); RT_LOCK_ASSERT(rt); epoch_call(net_epoch_preempt, destroy_rtentry_epoch, &rt->rt_epoch_ctx); RT_UNLOCK(rt); } static void destroy_rtentry(struct rtentry *rt) { /* * At this moment rnh, nh_control may be already freed. * nhop interface may have been migrated to a different vnet. * Use vnet stored in the nexthop to delete the entry. */ CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); /* Unreference nexthop */ nhop_free(rt->rt_nhop); uma_zfree(V_rtzone, rt); CURVNET_RESTORE(); } /* * Epoch callback indicating rtentry is safe to destroy */ static void destroy_rtentry_epoch(epoch_context_t ctx) { struct rtentry *rt; rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); destroy_rtentry(rt); } /* * Adds a temporal redirect entry to the routing table. * @fibnum: fib number * @dst: destination to install redirect to * @gateway: gateway to go via * @author: sockaddr of originating router, can be NULL * @ifp: interface to use for the redirected route * @flags: set of flags to add. Allowed: RTF_GATEWAY * @lifetime_sec: time in seconds to expire this redirect. * * Retuns 0 on success, errno otherwise. */ int rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *author, struct ifnet *ifp, int flags, int lifetime_sec) { struct rtentry *rt; int error; struct rt_addrinfo info; struct rt_metrics rti_rmx; struct ifaddr *ifa; NET_EPOCH_ASSERT(); if (rt_tables_get_rnh(fibnum, dst->sa_family) == NULL) return (EAFNOSUPPORT); /* Verify the allowed flag mask. */ KASSERT(((flags & ~(RTF_GATEWAY)) == 0), ("invalid redirect flags: %x", flags)); /* Get the best ifa for the given interface and gateway. */ if ((ifa = ifaof_ifpforaddr(gateway, ifp)) == NULL) return (ENETUNREACH); ifa_ref(ifa); bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_ifa = ifa; info.rti_ifp = ifp; info.rti_flags = flags | RTF_HOST | RTF_DYNAMIC; /* Setup route metrics to define expire time. */ bzero(&rti_rmx, sizeof(rti_rmx)); /* Set expire time as absolute. */ rti_rmx.rmx_expire = lifetime_sec + time_second; info.rti_mflags |= RTV_EXPIRE; info.rti_rmx = &rti_rmx; error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); ifa_free(ifa); if (error != 0) { /* TODO: add per-fib redirect stats. */ return (error); } RT_LOCK(rt); flags = rt->rt_flags; RT_UNLOCK(rt); RTSTAT_INC(rts_dynamic); /* Send notification of a route addition to userland. */ bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_AUTHOR] = author; rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); return (0); } /* * Routing table ioctl interface. */ int rtioctl_fib(u_long req, caddr_t data, u_int fibnum) { /* * If more ioctl commands are added here, make sure the proper * super-user checks are being performed because it is possible for * prison-root to make it this far if raw sockets have been enabled * in jails. */ #ifdef INET /* Multicast goop, grrr... */ return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; #else /* INET */ return ENXIO; #endif /* INET */ } struct ifaddr * -ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway, - u_int fibnum) +ifa_ifwithroute(int flags, const struct sockaddr *dst, + const struct sockaddr *gateway, u_int fibnum) { struct ifaddr *ifa; NET_EPOCH_ASSERT(); if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, * and the interface is a pt to pt link * we should search for the destination * as our clue to the interface. Otherwise * we can use the local address. */ ifa = NULL; if (flags & RTF_HOST) ifa = ifa_ifwithdstaddr(dst, fibnum); if (ifa == NULL) ifa = ifa_ifwithaddr(gateway); } else { /* * If we are adding a route to a remote net * or host, the gateway may still be on the * other end of a pt to pt link. */ ifa = ifa_ifwithdstaddr(gateway, fibnum); } if (ifa == NULL) ifa = ifa_ifwithnet(gateway, 0, fibnum); if (ifa == NULL) { struct nhop_object *nh; nh = rib_lookup(fibnum, gateway, NHR_NONE, 0); /* * dismiss a gateway that is reachable only * through the default router */ if ((nh == NULL) || (nh->nh_flags & NHF_DEFAULT)) return (NULL); ifa = nh->nh_ifa; } if (ifa->ifa_addr->sa_family != dst->sa_family) { struct ifaddr *oifa = ifa; ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); if (ifa == NULL) ifa = oifa; } return (ifa); } /* * Do appropriate manipulations of a routing tree given * all the bits of info needed */ int rtrequest_fib(int req, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct rtentry **ret_nrt, u_int fibnum) { struct rt_addrinfo info; if (dst->sa_len == 0) return(EINVAL); bzero((caddr_t)&info, sizeof(info)); info.rti_flags = flags; info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; return rtrequest1_fib(req, &info, ret_nrt, fibnum); } /* * Copy most of @rt data into @info. * * If @flags contains NHR_COPY, copies dst,netmask and gw to the * pointers specified by @info structure. Assume such pointers * are zeroed sockaddr-like structures with sa_len field initialized * to reflect size of the provided buffer. if no NHR_COPY is specified, * point dst,netmask and gw @info fields to appropriate @rt values. * * if @flags contains NHR_REF, do refcouting on rt_ifp and rt_ifa. * * Returns 0 on success. */ int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags) { struct rt_metrics *rmx; struct sockaddr *src, *dst; struct nhop_object *nh; int sa_len; if (flags & NHR_COPY) { /* Copy destination if dst is non-zero */ src = rt_key(rt); dst = info->rti_info[RTAX_DST]; sa_len = src->sa_len; if (dst != NULL) { if (src->sa_len > dst->sa_len) return (ENOMEM); memcpy(dst, src, src->sa_len); info->rti_addrs |= RTA_DST; } /* Copy mask if set && dst is non-zero */ src = rt_mask(rt); dst = info->rti_info[RTAX_NETMASK]; if (src != NULL && dst != NULL) { /* * Radix stores different value in sa_len, * assume rt_mask() to have the same length * as rt_key() */ if (sa_len > dst->sa_len) return (ENOMEM); memcpy(dst, src, src->sa_len); info->rti_addrs |= RTA_NETMASK; } /* Copy gateway is set && dst is non-zero */ src = &rt->rt_nhop->gw_sa; dst = info->rti_info[RTAX_GATEWAY]; if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){ if (src->sa_len > dst->sa_len) return (ENOMEM); memcpy(dst, src, src->sa_len); info->rti_addrs |= RTA_GATEWAY; } } else { info->rti_info[RTAX_DST] = rt_key(rt); info->rti_addrs |= RTA_DST; if (rt_mask(rt) != NULL) { info->rti_info[RTAX_NETMASK] = rt_mask(rt); info->rti_addrs |= RTA_NETMASK; } if (rt->rt_flags & RTF_GATEWAY) { info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; info->rti_addrs |= RTA_GATEWAY; } } nh = rt->rt_nhop; rmx = info->rti_rmx; if (rmx != NULL) { info->rti_mflags |= RTV_MTU; rmx->rmx_mtu = nh->nh_mtu; } info->rti_flags = rt->rt_flags | nhop_get_rtflags(nh); info->rti_ifp = nh->nh_ifp; info->rti_ifa = nh->nh_ifa; if (flags & NHR_REF) { if_ref(info->rti_ifp); ifa_ref(info->rti_ifa); } return (0); } /* * Lookups up route entry for @dst in RIB database for fib @fibnum. * Exports entry data to @info using rt_exportinfo(). * * If @flags contains NHR_REF, refcouting is performed on rt_ifp and rt_ifa. * All references can be released later by calling rib_free_info(). * * Returns 0 on success. * Returns ENOENT for lookup failure, ENOMEM for export failure. */ int rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, uint32_t flowid, struct rt_addrinfo *info) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; int error; KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum")); rh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rh == NULL) return (ENOENT); RIB_RLOCK(rh); rn = rh->rnh_matchaddr(__DECONST(void *, dst), &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(rt->rt_nhop->nh_ifp)) { flags = (flags & NHR_REF) | NHR_COPY; error = rt_exportinfo(rt, info, flags); RIB_RUNLOCK(rh); return (error); } } RIB_RUNLOCK(rh); return (ENOENT); } /* * Releases all references acquired by rib_lookup_info() when * called with NHR_REF flags. */ void rib_free_info(struct rt_addrinfo *info) { ifa_free(info->rti_ifa); if_rele(info->rti_ifp); } /* * Iterates over all existing fibs in system calling * @setwa_f function prior to traversing each fib. * Calls @wa_f function for each element in current fib. * If af is not AF_UNSPEC, iterates over fibs in particular * address family. */ void rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f, void *arg) { struct rib_head *rnh; uint32_t fibnum; int i; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { /* Do we want some specific family? */ if (af != AF_UNSPEC) { rnh = rt_tables_get_rnh(fibnum, af); if (rnh == NULL) continue; if (setwa_f != NULL) setwa_f(rnh, fibnum, af, arg); RIB_WLOCK(rnh); rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg); RIB_WUNLOCK(rnh); continue; } for (i = 1; i <= AF_MAX; i++) { rnh = rt_tables_get_rnh(fibnum, i); if (rnh == NULL) continue; if (setwa_f != NULL) setwa_f(rnh, fibnum, i, arg); RIB_WLOCK(rnh); rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg); RIB_WUNLOCK(rnh); } } } -struct rt_delinfo -{ - struct rt_addrinfo info; - struct rib_head *rnh; - struct rtentry *head; -}; - /* - * Conditionally unlinks @rn from radix tree based - * on info data passed in @arg. - */ -static int -rt_checkdelroute(struct radix_node *rn, void *arg) -{ - struct rt_delinfo *di; - struct rt_addrinfo *info; - struct rtentry *rt; - int error; - - di = (struct rt_delinfo *)arg; - rt = (struct rtentry *)rn; - info = &di->info; - error = 0; - - info->rti_info[RTAX_DST] = rt_key(rt); - info->rti_info[RTAX_NETMASK] = rt_mask(rt); - info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; - - rt = rt_unlinkrte(di->rnh, info, &error); - if (rt == NULL) { - /* Either not allowed or not matched. Skip entry */ - return (0); - } - - /* Entry was unlinked. Add to the list and return */ - rt->rt_chain = di->head; - di->head = rt; - - return (0); -} - -/* - * Iterates over a routing table specified by @fibnum and @family and - * deletes elements marked by @filter_f. - * @fibnum: rtable id - * @family: AF_ address family - * @filter_f: function returning non-zero value for items to delete - * @arg: data to pass to the @filter_f function - * @report: true if rtsock notification is needed. - */ -void -rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report) -{ - struct rib_head *rnh; - struct rt_delinfo di; - struct rtentry *rt; - - rnh = rt_tables_get_rnh(fibnum, family); - if (rnh == NULL) - return; - - bzero(&di, sizeof(di)); - di.info.rti_filter = filter_f; - di.info.rti_filterdata = arg; - di.rnh = rnh; - - RIB_WLOCK(rnh); - rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); - RIB_WUNLOCK(rnh); - - if (di.head == NULL) - return; - - /* We might have something to reclaim. */ - while (di.head != NULL) { - rt = di.head; - di.head = rt->rt_chain; - rt->rt_chain = NULL; - - /* TODO std rt -> rt_addrinfo export */ - di.info.rti_info[RTAX_DST] = rt_key(rt); - di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); - - rt_notifydelete(rt, &di.info); - - if (report) - rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0, - fibnum); - rtfree(rt); - } -} - -/* * Iterates over all existing fibs in system and deletes each element * for which @filter_f function returns non-zero value. * If @family is not AF_UNSPEC, iterates over fibs in particular * address family. */ void rt_foreach_fib_walk_del(int family, rt_filter_f_t *filter_f, void *arg) { u_int fibnum; int i, start, end; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { /* Do we want some specific family? */ if (family != AF_UNSPEC) { start = family; end = family; } else { start = 1; end = AF_MAX; } for (i = start; i <= end; i++) { if (rt_tables_get_rnh(fibnum, i) == NULL) continue; rib_walk_del(fibnum, i, filter_f, arg, 0); } } } /* * Delete Routes for a Network Interface * * Called for each routing entry via the rnh->rnh_walktree() call above * to delete all route entries referencing a detaching network interface. * * Arguments: * rt pointer to rtentry * nh pointer to nhop * arg argument passed to rnh->rnh_walktree() - detaching interface * * Returns: * 0 successful * errno failed - reason indicated */ static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *nh, void *arg) { struct ifnet *ifp = arg; if (nh->nh_ifp != ifp) return (0); /* * Protect (sorta) against walktree recursion problems * with cloned routes */ if ((rt->rt_flags & RTF_UP) == 0) return (0); return (1); } /* * Delete all remaining routes using this interface * Unfortuneatly the only way to do this is to slog through * the entire routing table looking for routes which point * to this interface...oh well... */ void rt_flushifroutes_af(struct ifnet *ifp, int af) { KASSERT((af >= 1 && af <= AF_MAX), ("%s: af %d not >= 1 and <= %d", __func__, af, AF_MAX)); rt_foreach_fib_walk_del(af, rt_ifdelroute, ifp); } void rt_flushifroutes(struct ifnet *ifp) { rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp); } /* - * Conditionally unlinks rtentry matching data inside @info from @rnh. - * Returns unlinked, locked and referenced @rtentry on success, - * Returns NULL and sets @perror to: - * ESRCH - if prefix was not found, - * EADDRINUSE - if trying to delete PINNED route without appropriate flag. - * ENOENT - if supplied filter function returned 0 (not matched). - */ -static struct rtentry * -rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror) -{ - struct sockaddr *dst, *netmask; - struct rtentry *rt; - struct radix_node *rn; - - dst = info->rti_info[RTAX_DST]; - netmask = info->rti_info[RTAX_NETMASK]; - - rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); - if (rt == NULL) { - *perror = ESRCH; - return (NULL); - } - - if ((info->rti_flags & RTF_PINNED) == 0) { - /* Check if target route can be deleted */ - if (rt->rt_flags & RTF_PINNED) { - *perror = EADDRINUSE; - return (NULL); - } - } - - if (info->rti_filter != NULL) { - if (info->rti_filter(rt, rt->rt_nhop, info->rti_filterdata)==0){ - /* Not matched */ - *perror = ENOENT; - return (NULL); - } - - /* - * Filter function requested rte deletion. - * Ease the caller work by filling in remaining info - * from that particular entry. - */ - info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; - } - - /* - * Remove the item from the tree and return it. - * Complain if it is not there and do no more processing. - */ - *perror = ESRCH; -#ifdef RADIX_MPATH - if (rt_mpath_capable(rnh)) - rn = rt_mpath_unlink(rnh, info, rt, perror); - else -#endif - rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); - if (rn == NULL) - return (NULL); - - if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) - panic ("rtrequest delete"); - - rt = RNTORT(rn); - RT_LOCK(rt); - rt->rt_flags &= ~RTF_UP; - - *perror = 0; - - return (rt); -} - -static void -rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info) -{ - struct ifaddr *ifa; - - /* - * give the protocol a chance to keep things in sync. - */ - ifa = rt->rt_nhop->nh_ifa; - if (ifa != NULL && ifa->ifa_rtrequest != NULL) - ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info); -} - - -/* - * These (questionable) definitions of apparent local variables apply - * to the next two functions. XXXXXX!!! - */ -#define dst info->rti_info[RTAX_DST] -#define gateway info->rti_info[RTAX_GATEWAY] -#define netmask info->rti_info[RTAX_NETMASK] -#define ifaaddr info->rti_info[RTAX_IFA] -#define ifpaddr info->rti_info[RTAX_IFP] -#define flags info->rti_flags - -/* * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, * it will be referenced so the caller must free it. * * Assume basic consistency checks are executed by callers: * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well. */ int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) { + const struct sockaddr *dst, *gateway, *ifpaddr, *ifaaddr; struct epoch_tracker et; - int needref, error; + int needref, error, flags; + dst = info->rti_info[RTAX_DST]; + gateway = info->rti_info[RTAX_GATEWAY]; + ifpaddr = info->rti_info[RTAX_IFP]; + ifaaddr = info->rti_info[RTAX_IFA]; + flags = info->rti_flags; + /* * ifp may be specified by sockaddr_dl * when protocol address is ambiguous. */ error = 0; needref = (info->rti_ifa == NULL); NET_EPOCH_ENTER(et); /* If we have interface specified by the ifindex in the address, use it */ if (info->rti_ifp == NULL && ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)ifpaddr; if (sdl->sdl_index != 0) info->rti_ifp = ifnet_byindex(sdl->sdl_index); } /* * If we have source address specified, try to find it * TODO: avoid enumerating all ifas on all interfaces. */ if (info->rti_ifa == NULL && ifaaddr != NULL) info->rti_ifa = ifa_ifwithaddr(ifaaddr); if (info->rti_ifa == NULL) { - struct sockaddr *sa; + const struct sockaddr *sa; /* * Most common use case for the userland-supplied routes. * * Choose sockaddr to select ifa. * -- if ifp is set -- * Order of preference: * 1) IFA address * 2) gateway address * Note: for interface routes link-level gateway address * is specified to indicate the interface index without * specifying RTF_GATEWAY. In this case, ignore gateway * Note: gateway AF may be different from dst AF. In this case, * ignore gateway * 3) final destination. * 4) if all of these fails, try to get at least link-level ifa. * -- else -- * try to lookup gateway or dst in the routing table to get ifa */ if (info->rti_info[RTAX_IFA] != NULL) sa = info->rti_info[RTAX_IFA]; else if ((info->rti_flags & RTF_GATEWAY) != 0 && gateway->sa_family == dst->sa_family) sa = gateway; else sa = dst; if (info->rti_ifp != NULL) { info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); /* Case 4 */ if (info->rti_ifa == NULL && gateway != NULL) info->rti_ifa = ifaof_ifpforaddr(gateway, info->rti_ifp); } else if (dst != NULL && gateway != NULL) info->rti_ifa = ifa_ifwithroute(flags, dst, gateway, fibnum); else if (sa != NULL) info->rti_ifa = ifa_ifwithroute(flags, sa, sa, fibnum); } if (needref && info->rti_ifa != NULL) { if (info->rti_ifp == NULL) info->rti_ifp = info->rti_ifa->ifa_ifp; ifa_ref(info->rti_ifa); } else error = ENETUNREACH; NET_EPOCH_EXIT(et); return (error); } void rt_updatemtu(struct ifnet *ifp) { struct rib_head *rnh; int mtu; int i, j; /* * Try to update rt_mtu for all routes using this interface * Unfortunately the only way to do this is to traverse all * routing tables in all fibs/domains. */ for (i = 1; i <= AF_MAX; i++) { mtu = if_getmtu_family(ifp, i); for (j = 0; j < rt_numfibs; j++) { rnh = rt_tables_get_rnh(j, i); if (rnh == NULL) continue; nhops_update_ifmtu(rnh, ifp, mtu); } } } #if 0 int p_sockaddr(char *buf, int buflen, struct sockaddr *s); int rt_print(char *buf, int buflen, struct rtentry *rt); int p_sockaddr(char *buf, int buflen, struct sockaddr *s) { void *paddr = NULL; switch (s->sa_family) { case AF_INET: paddr = &((struct sockaddr_in *)s)->sin_addr; break; case AF_INET6: paddr = &((struct sockaddr_in6 *)s)->sin6_addr; break; } if (paddr == NULL) return (0); if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL) return (0); return (strlen(buf)); } int rt_print(char *buf, int buflen, struct rtentry *rt) { struct sockaddr *addr, *mask; int i = 0; addr = rt_key(rt); mask = rt_mask(rt); i = p_sockaddr(buf, buflen, addr); if (!(rt->rt_flags & RTF_HOST)) { buf[i++] = '/'; i += p_sockaddr(buf + i, buflen - i, mask); } if (rt->rt_flags & RTF_GATEWAY) { buf[i++] = '>'; i += p_sockaddr(buf + i, buflen - i, &rt->rt_nhop->gw_sa); } return (i); } #endif #ifdef RADIX_MPATH /* * Deletes key for single-path routes, unlinks rtentry with * gateway specified in @info from multi-path routes. * * Returnes unlinked entry. In case of failure, returns NULL * and sets @perror to ESRCH. */ -static struct radix_node * +struct radix_node * rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info, struct rtentry *rto, int *perror) { /* * if we got multipath routes, we require users to specify * a matching RTAX_GATEWAY. */ struct rtentry *rt; // *rto = NULL; struct radix_node *rn; struct sockaddr *gw; gw = info->rti_info[RTAX_GATEWAY]; rt = rt_mpath_matchgate(rto, gw); if (rt == NULL) { *perror = ESRCH; return (NULL); } /* * this is the first entry in the chain */ if (rto == rt) { rn = rn_mpath_next((struct radix_node *)rt); /* * there is another entry, now it's active */ if (rn) { rto = RNTORT(rn); RT_LOCK(rto); rto->rt_flags |= RTF_UP; RT_UNLOCK(rto); } else if (rt->rt_flags & RTF_GATEWAY) { /* * For gateway routes, we need to * make sure that we we are deleting * the correct gateway. * rt_mpath_matchgate() does not * check the case when there is only * one route in the chain. */ if (gw && (rt->rt_nhop->gw_sa.sa_len != gw->sa_len || memcmp(&rt->rt_nhop->gw_sa, gw, gw->sa_len))) { *perror = ESRCH; return (NULL); } } /* * use the normal delete code to remove * the first entry */ - rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); + rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], + &rnh->head); *perror = 0; return (rn); } /* * if the entry is 2nd and on up */ if (rt_mpath_deldup(rto, rt) == 0) panic ("rtrequest1: rt_mpath_deldup"); *perror = 0; rn = (struct radix_node *)rt; return (rn); } #endif -#undef dst -#undef gateway -#undef netmask -#undef ifaaddr -#undef ifpaddr -#undef flags - int rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) { const struct sockaddr *dst; struct rib_head *rnh; int error; KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked")); NET_EPOCH_ASSERT(); - + dst = info->rti_info[RTAX_DST]; switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We support multiple FIBs. */ break; default: fibnum = RT_DEFAULT_FIB; break; } /* * Find the correct routing tree to use for this Address Family */ rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) return (EAFNOSUPPORT); /* * If we are adding a host route then we don't want to put * a netmask in the tree, nor do we want to clone it. */ if (info->rti_flags & RTF_HOST) info->rti_info[RTAX_NETMASK] = NULL; error = 0; switch (req) { case RTM_DELETE: error = del_route(rnh, info, ret_nrt); break; case RTM_RESOLVE: /* * resolve was only used for route cloning * here for compat */ break; case RTM_ADD: error = add_route(rnh, info, ret_nrt); break; case RTM_CHANGE: error = change_route(rnh, info, ret_nrt); break; default: error = EOPNOTSUPP; } return (error); } -static int -add_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt) -{ - struct sockaddr *dst, *ndst, *gateway, *netmask; - struct rtentry *rt, *rt_old; - struct nhop_object *nh; - struct radix_node *rn; - struct ifaddr *ifa; - int error, flags; - struct epoch_tracker et; - - dst = info->rti_info[RTAX_DST]; - gateway = info->rti_info[RTAX_GATEWAY]; - netmask = info->rti_info[RTAX_NETMASK]; - flags = info->rti_flags; - - if ((flags & RTF_GATEWAY) && !gateway) - return (EINVAL); - if (dst && gateway && (dst->sa_family != gateway->sa_family) && - (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) - return (EINVAL); - - if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) - return (EINVAL); - - if (info->rti_ifa == NULL) { - error = rt_getifa_fib(info, rnh->rib_fibnum); - if (error) - return (error); - } else { - ifa_ref(info->rti_ifa); - } - - NET_EPOCH_ENTER(et); - error = nhop_create_from_info(rnh, info, &nh); - NET_EPOCH_EXIT(et); - if (error != 0) { - ifa_free(info->rti_ifa); - return (error); - } - - rt = uma_zalloc(V_rtzone, M_NOWAIT); - if (rt == NULL) { - ifa_free(info->rti_ifa); - nhop_free(nh); - return (ENOBUFS); - } - rt->rt_flags = RTF_UP | flags; - rt->rt_nhop = nh; - - /* Fill in dst */ - memcpy(&rt->rt_dst, dst, dst->sa_len); - rt_key(rt) = &rt->rt_dst; - - /* - * point to the (possibly newly malloc'd) dest address. - */ - ndst = (struct sockaddr *)rt_key(rt); - - /* - * make sure it contains the value we want (masked if needed). - */ - if (netmask) { - rt_maskedcopy(dst, ndst, netmask); - } else - bcopy(dst, ndst, dst->sa_len); - - /* - * We use the ifa reference returned by rt_getifa_fib(). - * This moved from below so that rnh->rnh_addaddr() can - * examine the ifa and ifa->ifa_ifp if it so desires. - */ - ifa = info->rti_ifa; - rt->rt_weight = 1; - - rt_setmetrics(info, rt); - - RIB_WLOCK(rnh); - RT_LOCK(rt); -#ifdef RADIX_MPATH - /* do not permit exactly the same dst/mask/gw pair */ - if (rt_mpath_capable(rnh) && - rt_mpath_conflict(rnh, rt, netmask)) { - RIB_WUNLOCK(rnh); - - nhop_free(nh); - uma_zfree(V_rtzone, rt); - return (EEXIST); - } -#endif - - rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); - - if (rn != NULL && rt->rt_expire > 0) - tmproutes_update(rnh, rt); - - rt_old = NULL; - if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) { - - /* - * Force removal and re-try addition - * TODO: better multipath&pinned support - */ - struct sockaddr *info_dst = info->rti_info[RTAX_DST]; - info->rti_info[RTAX_DST] = ndst; - /* Do not delete existing PINNED(interface) routes */ - info->rti_flags &= ~RTF_PINNED; - rt_old = rt_unlinkrte(rnh, info, &error); - info->rti_flags |= RTF_PINNED; - info->rti_info[RTAX_DST] = info_dst; - if (rt_old != NULL) - rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, - rt->rt_nodes); - } - RIB_WUNLOCK(rnh); - - if (rt_old != NULL) { - rt_notifydelete(rt_old, info); - rtfree(rt_old); - } - - /* - * If it still failed to go into the tree, - * then un-make it (this should be a function) - */ - if (rn == NULL) { - nhop_free(nh); - uma_zfree(V_rtzone, rt); - return (EEXIST); - } - - /* - * If this protocol has something to add to this then - * allow it to do that as well. - */ - if (ifa->ifa_rtrequest) - ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info); - - /* - * actually return a resultant rtentry - */ - if (ret_nrt) - *ret_nrt = rt; - rnh->rnh_gen++; /* Routing table updated */ - RT_UNLOCK(rt); - - return (0); -} - -static int -del_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt) -{ - struct sockaddr *dst, *netmask; - struct sockaddr_storage mdst; - struct rtentry *rt; - int error; - - dst = info->rti_info[RTAX_DST]; - netmask = info->rti_info[RTAX_NETMASK]; - - if (netmask) { - if (dst->sa_len > sizeof(mdst)) - return (EINVAL); - rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); - dst = (struct sockaddr *)&mdst; - } - - RIB_WLOCK(rnh); - rt = rt_unlinkrte(rnh, info, &error); - RIB_WUNLOCK(rnh); - if (error != 0) - return (error); - - rt_notifydelete(rt, info); - - /* - * If the caller wants it, then it can have it, - * the entry will be deleted after the end of the current epoch. - */ - if (ret_nrt) - *ret_nrt = rt; - - rtfree(rt); - - return (0); -} - -static int -change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt) -{ - RIB_RLOCK_TRACKER; - struct rtentry *rt = NULL; - int error = 0; - int free_ifa = 0; - struct nhop_object *nh, *nh_orig; - - RIB_RLOCK(rnh); - rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], - info->rti_info[RTAX_NETMASK], &rnh->head); - - if (rt == NULL) { - RIB_RUNLOCK(rnh); - return (ESRCH); - } - -#ifdef RADIX_MPATH - /* - * If we got multipath routes, - * we require users to specify a matching RTAX_GATEWAY. - */ - if (rt_mpath_capable(rnh)) { - rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); - if (rt == NULL) { - RIB_RUNLOCK(rnh); - return (ESRCH); - } - } -#endif - nh_orig = rt->rt_nhop; - - RIB_RUNLOCK(rnh); - - rt = NULL; - nh = NULL; - - /* - * New gateway could require new ifaddr, ifp; - * flags may also be different; ifp may be specified - * by ll sockaddr when protocol address is ambiguous - */ - if (((nh_orig->nh_flags & NHF_GATEWAY) && - info->rti_info[RTAX_GATEWAY] != NULL) || - info->rti_info[RTAX_IFP] != NULL || - (info->rti_info[RTAX_IFA] != NULL && - !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { - error = rt_getifa_fib(info, rnh->rib_fibnum); - if (info->rti_ifa != NULL) - free_ifa = 1; - - if (error != 0) { - if (free_ifa) { - ifa_free(info->rti_ifa); - info->rti_ifa = NULL; - } - - return (error); - } - } - - error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); - if (free_ifa) { - ifa_free(info->rti_ifa); - info->rti_ifa = NULL; - } - if (error != 0) - return (error); - - RIB_WLOCK(rnh); - - /* Lookup rtentry once again and check if nexthop is still the same */ - rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], - info->rti_info[RTAX_NETMASK], &rnh->head); - - if (rt == NULL) { - RIB_WUNLOCK(rnh); - nhop_free(nh); - return (ESRCH); - } - - if (rt->rt_nhop != nh_orig) { - RIB_WUNLOCK(rnh); - nhop_free(nh); - return (EAGAIN); - } - - /* Proceed with the update */ - RT_LOCK(rt); - - /* Provide notification to the protocols.*/ - if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) - nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); - - rt->rt_nhop = nh; - rt_setmetrics(info, rt); - - if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) - nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); - - if (ret_nrt != NULL) - *ret_nrt = rt; - - RT_UNLOCK(rt); - - /* Update generation id to reflect rtable change */ - rnh->rnh_gen++; - - RIB_WUNLOCK(rnh); - - nhop_free(nh_orig); - - return (0); -} - -static int -change_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt) -{ - int error; - - /* Check if updated gateway exists */ - if ((info->rti_flags & RTF_GATEWAY) && - (info->rti_info[RTAX_GATEWAY] == NULL)) - return (EINVAL); - - /* - * route change is done in multiple steps, with dropping and - * reacquiring lock. In the situations with multiple processes - * changes the same route in can lead to the case when route - * is changed between the steps. Address it by retrying the operation - * multiple times before failing. - */ - for (int i = 0; i < RIB_MAX_RETRIES; i++) { - error = change_route_one(rnh, info, ret_nrt); - if (error != EAGAIN) - break; - } - - return (error); -} - - -static void +void rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt) { if (info->rti_mflags & RTV_WEIGHT) rt->rt_weight = info->rti_rmx->rmx_weight; /* Kernel -> userland timebase conversion. */ if (info->rti_mflags & RTV_EXPIRE) rt->rt_expire = info->rti_rmx->rmx_expire ? info->rti_rmx->rmx_expire - time_second + time_uptime : 0; } void rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) { u_char *cp1 = (u_char *)src; u_char *cp2 = (u_char *)dst; u_char *cp3 = (u_char *)netmask; u_char *cplim = cp2 + *cp3; u_char *cplim2 = cp2 + *cp1; *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ cp3 += 2; if (cplim > cplim2) cplim = cplim2; while (cp2 < cplim) *cp2++ = *cp1++ & *cp3++; if (cp2 < cplim2) bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); } /* * Set up a routing table entry, normally * for an interface. */ #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ static inline int rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { RIB_RLOCK_TRACKER; struct epoch_tracker et; struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; struct rt_addrinfo info; int error = 0; int startfib, endfib; char tempbuf[_SOCKADDR_TMPSIZE]; int didwork = 0; int a_failure = 0; struct sockaddr_dl_short *sdl = NULL; struct rib_head *rnh; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; netmask = NULL; } else { dst = ifa->ifa_addr; netmask = ifa->ifa_netmask; } if (dst->sa_len == 0) return(EINVAL); switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We support multiple FIBs. */ break; default: fibnum = RT_DEFAULT_FIB; break; } if (fibnum == RT_ALL_FIBS) { if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) startfib = endfib = ifa->ifa_ifp->if_fib; else { startfib = 0; endfib = rt_numfibs - 1; } } else { KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); startfib = fibnum; endfib = fibnum; } /* * If it's a delete, check that if it exists, * it's on the correct interface or we might scrub * a route to another ifa which would * be confusing at best and possibly worse. */ if (cmd == RTM_DELETE) { /* * It's a delete, so it should already exist.. * If it's a net, mask off the host bits * (Assuming we have a mask) * XXX this is kinda inet specific.. */ if (netmask != NULL) { rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); dst = (struct sockaddr *)tempbuf; } } else if (cmd == RTM_ADD) { sdl = (struct sockaddr_dl_short *)tempbuf; bzero(sdl, sizeof(struct sockaddr_dl_short)); sdl->sdl_family = AF_LINK; sdl->sdl_len = sizeof(struct sockaddr_dl_short); sdl->sdl_type = ifa->ifa_ifp->if_type; sdl->sdl_index = ifa->ifa_ifp->if_index; } /* * Now go through all the requested tables (fibs) and do the * requested action. Realistically, this will either be fib 0 * for protocols that don't do multiple tables or all the * tables for those that do. */ for ( fibnum = startfib; fibnum <= endfib; fibnum++) { if (cmd == RTM_DELETE) { struct radix_node *rn; /* * Look up an rtentry that is in the routing tree and * contains the correct info. */ rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) /* this table doesn't exist but others might */ continue; RIB_RLOCK(rnh); rn = rnh->rnh_lookup(dst, netmask, &rnh->head); #ifdef RADIX_MPATH if (rt_mpath_capable(rnh)) { if (rn == NULL) error = ESRCH; else { rt = RNTORT(rn); /* * for interface route the gateway * gateway is sockaddr_dl, so * rt_mpath_matchgate must use the * interface address */ rt = rt_mpath_matchgate(rt, ifa->ifa_addr); if (rt == NULL) error = ESRCH; } } #endif error = (rn == NULL || (rn->rn_flags & RNF_ROOT) || RNTORT(rn)->rt_nhop->nh_ifa != ifa); RIB_RUNLOCK(rnh); if (error) { /* this is only an error if bad on ALL tables */ continue; } } /* * Do the actual request */ bzero((caddr_t)&info, sizeof(info)); info.rti_ifa = ifa; info.rti_flags = flags | (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; info.rti_info[RTAX_DST] = dst; /* * doing this for compatibility reasons */ if (cmd == RTM_ADD) info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sdl; else info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = netmask; NET_EPOCH_ENTER(et); error = rtrequest1_fib(cmd, &info, &rt, fibnum); if (error == 0 && rt != NULL) { /* * notify any listening routing agents of the change */ /* TODO: interface routes/aliases */ rt_newaddrmsg_fib(cmd, ifa, rt, fibnum); didwork = 1; } NET_EPOCH_EXIT(et); if (error) a_failure = error; } if (cmd == RTM_DELETE) { if (didwork) { error = 0; } else { /* we only give an error if it wasn't in any table */ error = ((flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH); } } else { if (a_failure) { /* return an error if any of them failed */ error = a_failure; } } return (error); } /* * Set up a routing table entry, normally * for an interface. */ int rtinit(struct ifaddr *ifa, int cmd, int flags) { struct sockaddr *dst; int fib = RT_DEFAULT_FIB; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; } else { dst = ifa->ifa_addr; } switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We do support multiple FIBs. */ fib = RT_ALL_FIBS; break; } return (rtinit1(ifa, cmd, flags, fib)); } /* * Announce interface address arrival/withdraw * Returns 0 on success. */ int rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %d", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); EVENTHANDLER_DIRECT_INVOKE(rt_addrmsg, ifa, cmd); return (rtsock_addrmsg(cmd, ifa, fibnum)); } /* * Announce kernel-originated route addition/removal to rtsock based on @rt data. * cmd: RTM_ cmd * @rt: valid rtentry * @ifp: target route interface * @fibnum: fib id or RT_ALL_FIBS * * Returns 0 on success. */ int rt_routemsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int rti_addrs, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %d", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__)); return (rtsock_routemsg(cmd, rt, ifp, 0, fibnum)); } /* * Announce kernel-originated route addition/removal to rtsock based on @rt data. * cmd: RTM_ cmd * @info: addrinfo structure with valid data. * @fibnum: fib id or RT_ALL_FIBS * * Returns 0 on success. */ int rt_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE || cmd == RTM_CHANGE, ("unexpected cmd %d", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); KASSERT(info->rti_info[RTAX_DST] != NULL, (":%s: RTAX_DST must be supplied", __func__)); return (rtsock_routemsg_info(cmd, info, fibnum)); } /* * This is called to generate messages from the routing socket * indicating a network interface has had addresses associated with it. */ void rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, struct rtentry *rt, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %u", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); if (cmd == RTM_ADD) { rt_addrmsg(cmd, ifa, fibnum); if (rt != NULL) rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum); } else { if (rt != NULL) rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum); rt_addrmsg(cmd, ifa, fibnum); } } Index: head/sys/netinet/in_fib.c =================================================================== --- head/sys/netinet/in_fib.c (revision 361420) +++ head/sys/netinet/in_fib.c (revision 361421) @@ -1,406 +1,405 @@ /*- * Copyright (c) 2015 * Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_route.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #ifdef INET /* Verify struct route compatiblity */ /* Assert 'struct route_in' is compatible with 'struct route' */ CHK_STRUCT_ROUTE_COMPAT(struct route_in, ro_dst4); static void fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst, uint32_t flags, struct nhop4_basic *pnh4); static void fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst, uint32_t flags, struct nhop4_extended *pnh4); -#define RNTORT(p) ((struct rtentry *)(p)) static void fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst, uint32_t flags, struct nhop4_basic *pnh4) { if ((flags & NHR_IFAIF) != 0) pnh4->nh_ifp = nh->nh_ifa->ifa_ifp; else pnh4->nh_ifp = nh->nh_ifp; pnh4->nh_mtu = nh->nh_mtu; if (nh->nh_flags & NHF_GATEWAY) pnh4->nh_addr = nh->gw4_sa.sin_addr; else pnh4->nh_addr = dst; /* Set flags */ pnh4->nh_flags = nh->nh_flags; /* TODO: Handle RTF_BROADCAST here */ } static void fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst, uint32_t flags, struct nhop4_extended *pnh4) { if ((flags & NHR_IFAIF) != 0) pnh4->nh_ifp = nh->nh_ifa->ifa_ifp; else pnh4->nh_ifp = nh->nh_ifp; pnh4->nh_mtu = nh->nh_mtu; if (nh->nh_flags & NHF_GATEWAY) pnh4->nh_addr = nh->gw4_sa.sin_addr; else pnh4->nh_addr = dst; /* Set flags */ pnh4->nh_flags = nh->nh_flags; pnh4->nh_ia = ifatoia(nh->nh_ifa); pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr; } /* * Performs IPv4 route table lookup on @dst. Returns 0 on success. * Stores nexthop info provided @pnh4 structure. * Note that * - nh_ifp cannot be safely dereferenced * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if * looking up address on interface "ix0" pointer to "lo0" interface * will be returned instead of "ix0") * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - howewer mtu from "transmit" interface will be returned. */ int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_basic *pnh4) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in sin; struct nhop_object *nh; KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_basic: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = dst; RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { nh = RNTORT(rn)->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { fib4_rte_to_nh_basic(nh, dst, flags, pnh4); RIB_RUNLOCK(rh); return (0); } } RIB_RUNLOCK(rh); return (ENOENT); } /* * Performs IPv4 route table lookup on @dst. Returns 0 on success. * Stores extende nexthop info provided @pnh4 structure. * Note that * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified. * - in that case you need to call fib4_free_nh_ext() * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if * looking up address of interface "ix0" pointer to "lo0" interface * will be returned instead of "ix0") * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - howewer mtu from "transmit" interface will be returned. */ int fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_extended *pnh4) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in sin; struct rtentry *rte; struct nhop_object *nh; KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_ext: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = dst; RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); #ifdef RADIX_MPATH rte = rt_mpath_select(rte, flowid); if (rte == NULL) { RIB_RUNLOCK(rh); return (ENOENT); } #endif nh = rte->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { fib4_rte_to_nh_extended(nh, dst, flags, pnh4); if ((flags & NHR_REF) != 0) { /* TODO: lwref on egress ifp's ? */ } RIB_RUNLOCK(rh); return (0); } } RIB_RUNLOCK(rh); return (ENOENT); } void fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4) { } /* * Looks up path in fib @fibnum specified by @dst. * Returns path nexthop on success. Nexthop is safe to use * within the current network epoch. If longer lifetime is required, * one needs to pass NHR_REF as a flag. This will return referenced * nexthop. */ struct nhop_object * fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags, uint32_t flowid) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; struct nhop_object *nh; KASSERT((fibnum < rt_numfibs), ("fib4_lookup: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (NULL); /* Prepare lookup key */ struct sockaddr_in sin4; memset(&sin4, 0, sizeof(sin4)); sin4.sin_family = AF_INET; sin4.sin_len = sizeof(struct sockaddr_in); sin4.sin_addr = dst; nh = NULL; RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin4, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); #ifdef RADIX_MPATH if (rt_mpath_next(rt) != NULL) rt = rt_mpath_selectrte(rt, flowid); #endif nh = rt->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { if (flags & NHR_REF) nhop_ref_object(nh); RIB_RUNLOCK(rh); return (nh); } } RIB_RUNLOCK(rh); RTSTAT_INC(rts_unreach); return (NULL); } inline static int check_urpf(const struct nhop_object *nh, uint32_t flags, const struct ifnet *src_if) { if (src_if != NULL && nh->nh_aifp == src_if) { return (1); } if (src_if == NULL) { if ((flags & NHR_NODEFAULT) == 0) return (1); else if ((nh->nh_flags & NHF_DEFAULT) == 0) return (1); } return (0); } #ifdef RADIX_MPATH inline static int check_urpf_mpath(struct rtentry *rt, uint32_t flags, const struct ifnet *src_if) { while (rt != NULL) { if (check_urpf(rt->rt_nhop, flags, src_if) != 0) return (1); rt = rt_mpath_next(rt); } return (0); } #endif /* * Performs reverse path forwarding lookup. * If @src_if is non-zero, verifies that at least 1 path goes via * this interface. * If @src_if is zero, verifies that route exist. * if @flags contains NHR_NOTDEFAULT, do not consider default route. * * Returns 1 if route matching conditions is found, 0 otherwise. */ int fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags, const struct ifnet *src_if) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; int ret; KASSERT((fibnum < rt_numfibs), ("fib4_check_urpf: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (0); /* Prepare lookup key */ struct sockaddr_in sin4; memset(&sin4, 0, sizeof(sin4)); sin4.sin_len = sizeof(struct sockaddr_in); sin4.sin_addr = dst; RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin4, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); #ifdef RADIX_MPATH ret = check_urpf_mpath(rt, flags, src_if); #else ret = check_urpf(rt->rt_nhop, flags, src_if); #endif RIB_RUNLOCK(rh); return (ret); } RIB_RUNLOCK(rh); return (0); } struct nhop_object * fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags) { struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; struct nhop_object *nh; KASSERT((fibnum < rt_numfibs), ("fib4_lookup_debugnet: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (NULL); /* Prepare lookup key */ struct sockaddr_in sin4; memset(&sin4, 0, sizeof(sin4)); sin4.sin_family = AF_INET; sin4.sin_len = sizeof(struct sockaddr_in); sin4.sin_addr = dst; nh = NULL; /* unlocked lookup */ rn = rh->rnh_matchaddr((void *)&sin4, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); #ifdef RADIX_MPATH if (rt_mpath_next(rt) != NULL) rt = rt_mpath_selectrte(rt, 0); #endif nh = rt->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { if (flags & NHR_REF) nhop_ref_object(nh); return (nh); } } return (NULL); } #endif Index: head/sys/netinet6/in6_fib.c =================================================================== --- head/sys/netinet6/in6_fib.c (revision 361420) +++ head/sys/netinet6/in6_fib.c (revision 361421) @@ -1,442 +1,441 @@ /*- * Copyright (c) 2015 * Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #include #include #include #include #include #include #ifdef INET6 static void fib6_rte_to_nh_extended(const struct nhop_object *nh, const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6); static void fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst, uint32_t flags, struct nhop6_basic *pnh6); -#define RNTORT(p) ((struct rtentry *)(p)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) CHK_STRUCT_ROUTE_COMPAT(struct route_in6, ro_dst); static void fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst, uint32_t flags, struct nhop6_basic *pnh6) { /* Do explicit nexthop zero unless we're copying it */ memset(pnh6, 0, sizeof(*pnh6)); if ((flags & NHR_IFAIF) != 0) pnh6->nh_ifp = nh->nh_aifp; else pnh6->nh_ifp = nh->nh_ifp; pnh6->nh_mtu = nh->nh_mtu; if (nh->nh_flags & NHF_GATEWAY) { /* Return address with embedded scope. */ pnh6->nh_addr = nh->gw6_sa.sin6_addr; } else pnh6->nh_addr = *dst; /* Set flags */ pnh6->nh_flags = nh->nh_flags; } static void fib6_rte_to_nh_extended(const struct nhop_object *nh, const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6) { /* Do explicit nexthop zero unless we're copying it */ memset(pnh6, 0, sizeof(*pnh6)); if ((flags & NHR_IFAIF) != 0) pnh6->nh_ifp = nh->nh_aifp; else pnh6->nh_ifp = nh->nh_ifp; pnh6->nh_mtu = nh->nh_mtu; if (nh->nh_flags & NHF_GATEWAY) { /* Return address with embedded scope. */ pnh6->nh_addr = nh->gw6_sa.sin6_addr; } else pnh6->nh_addr = *dst; /* Set flags */ pnh6->nh_flags = nh->nh_flags; pnh6->nh_ia = ifatoia6(nh->nh_ifa); } /* * Performs IPv6 route table lookup on @dst. Returns 0 on success. * Stores basic nexthop info into provided @pnh6 structure. * Note that * - nh_ifp represents logical transmit interface (rt_ifp) by default * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - mtu from logical transmit interface will be returned. * - nh_ifp cannot be safely dereferenced * - nh_ifp represents rt_ifp (e.g. if looking up address on * interface "ix0" pointer to "ix0" interface will be returned instead * of "lo0") * - howewer mtu from "transmit" interface will be returned. * - scope will be embedded in nh_addr */ int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; struct nhop_object *nh; KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET6); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_addr = *dst; sin6.sin6_len = sizeof(struct sockaddr_in6); /* Assume scopeid is valid and embed it directly */ if (IN6_IS_SCOPE_LINKLOCAL(dst)) sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { nh = RNTORT(rn)->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { fib6_rte_to_nh_basic(nh, &sin6.sin6_addr, flags, pnh6); RIB_RUNLOCK(rh); return (0); } } RIB_RUNLOCK(rh); return (ENOENT); } /* * Performs IPv6 route table lookup on @dst. Returns 0 on success. * Stores extended nexthop info into provided @pnh6 structure. * Note that * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified. * - in that case you need to call fib6_free_nh_ext() * - nh_ifp represents logical transmit interface (rt_ifp) by default * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - mtu from logical transmit interface will be returned. * - scope will be embedded in nh_addr */ int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; struct rtentry *rte; struct nhop_object *nh; KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET6); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *dst; /* Assume scopeid is valid and embed it directly */ if (IN6_IS_SCOPE_LINKLOCAL(dst)) sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); #ifdef RADIX_MPATH rte = rt_mpath_select(rte, flowid); if (rte == NULL) { RIB_RUNLOCK(rh); return (ENOENT); } #endif nh = rte->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { fib6_rte_to_nh_extended(nh, &sin6.sin6_addr, flags, pnh6); if ((flags & NHR_REF) != 0) { /* TODO: Do lwref on egress ifp's */ } RIB_RUNLOCK(rh); return (0); } } RIB_RUNLOCK(rh); return (ENOENT); } void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6) { } /* * Looks up path in fib @fibnum specified by @dst. * Assumes scope is deembedded and provided in @scopeid. * * Returns path nexthop on success. Nexthop is safe to use * within the current network epoch. If longer lifetime is required, * one needs to pass NHR_REF as a flag. This will return referenced * nexthop. */ struct nhop_object * fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags, uint32_t flowid) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; struct nhop_object *nh; struct sockaddr_in6 sin6; KASSERT((fibnum < rt_numfibs), ("fib6_lookup: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET6); if (rh == NULL) return (NULL); /* TODO: radix changes */ //addr = *dst6; /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *dst6; /* Assume scopeid is valid and embed it directly */ if (IN6_IS_SCOPE_LINKLOCAL(dst6)) sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); #ifdef RADIX_MPATH if (rt_mpath_next(rt) != NULL) rt = rt_mpath_selectrte(rt, flowid); #endif nh = rt->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { if (flags & NHR_REF) nhop_ref_object(nh); RIB_RUNLOCK(rh); return (nh); } } RIB_RUNLOCK(rh); RTSTAT_INC(rts_unreach); return (NULL); } inline static int check_urpf(const struct nhop_object *nh, uint32_t flags, const struct ifnet *src_if) { if (src_if != NULL && nh->nh_aifp == src_if) { return (1); } if (src_if == NULL) { if ((flags & NHR_NODEFAULT) == 0) return (1); else if ((nh->nh_flags & NHF_DEFAULT) == 0) return (1); } return (0); } #ifdef RADIX_MPATH inline static int check_urpf_mpath(struct rtentry *rt, uint32_t flags, const struct ifnet *src_if) { while (rt != NULL) { if (check_urpf(rt->rt_nhop, flags, src_if) != 0) return (1); rt = rt_mpath_next(rt); } return (0); } #endif /* * Performs reverse path forwarding lookup. * If @src_if is non-zero, verifies that at least 1 path goes via * this interface. * If @src_if is zero, verifies that route exist. * if @flags contains NHR_NOTDEFAULT, do not consider default route. * * Returns 1 if route matching conditions is found, 0 otherwise. */ int fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags, const struct ifnet *src_if) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; struct sockaddr_in6 sin6; int ret; KASSERT((fibnum < rt_numfibs), ("fib6_check_urpf: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET6); if (rh == NULL) return (0); /* TODO: radix changes */ /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *dst6; /* Assume scopeid is valid and embed it directly */ if (IN6_IS_SCOPE_LINKLOCAL(dst6)) sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); #ifdef RADIX_MPATH ret = check_urpf_mpath(rt, flags, src_if); #else ret = check_urpf(rt->rt_nhop, flags, src_if); #endif RIB_RUNLOCK(rh); return (ret); } RIB_RUNLOCK(rh); return (0); } struct nhop_object * fib6_lookup_debugnet(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags) { struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; struct nhop_object *nh; struct sockaddr_in6 sin6; KASSERT((fibnum < rt_numfibs), ("fib6_lookup: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET6); if (rh == NULL) return (NULL); /* TODO: radix changes */ //addr = *dst6; /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *dst6; /* Assume scopeid is valid and embed it directly */ if (IN6_IS_SCOPE_LINKLOCAL(dst6)) sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); nh = rt->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { if (flags & NHR_REF) nhop_ref_object(nh); return (nh); } } return (NULL); } #endif