diff --git a/sys/net/route/nhop.h b/sys/net/route/nhop.h index 3944d8946b07..8b5ac7ada072 100644 --- a/sys/net/route/nhop.h +++ b/sys/net/route/nhop.h @@ -1,247 +1,248 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2020 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This header file contains public definitions for the nexthop routing subsystem. */ #ifndef _NET_ROUTE_NHOP_H_ #define _NET_ROUTE_NHOP_H_ #include /* sockaddr_in && sockaddr_in6 */ #include enum nhop_type { NH_TYPE_IPV4_ETHER_RSLV = 1, /* IPv4 ethernet without GW */ NH_TYPE_IPV4_ETHER_NHOP = 2, /* IPv4 with pre-calculated ethernet encap */ NH_TYPE_IPV6_ETHER_RSLV = 3, /* IPv6 ethernet, without GW */ NH_TYPE_IPV6_ETHER_NHOP = 4 /* IPv6 with pre-calculated ethernet encap*/ }; #ifdef _KERNEL /* * Define shorter version of AF_LINK sockaddr. * * Currently the only use case of AF_LINK gateway is storing * interface index of the interface of the source IPv6 address. * This is used by the IPv6 code for the connections over loopback * interface. * * The structure below copies 'struct sockaddr_dl', reducing the * size of sdl_data buffer, as it is not used. This change * allows to store the AF_LINK gateways in the nhop gateway itself, * simplifying control plane handling. */ struct sockaddr_dl_short { u_char sdl_len; /* Total length of sockaddr */ u_char sdl_family; /* AF_LINK */ u_short sdl_index; /* if != 0, system given index for interface */ u_char sdl_type; /* interface type */ u_char sdl_nlen; /* interface name length, no trailing 0 reqd. */ u_char sdl_alen; /* link level address length */ u_char sdl_slen; /* link layer selector length */ char sdl_data[8]; /* unused */ }; #define NHOP_RELATED_FLAGS \ (RTF_GATEWAY | RTF_HOST | RTF_REJECT | RTF_BLACKHOLE | \ RTF_FIXEDMTU | RTF_LOCAL | RTF_BROADCAST | RTF_MULTICAST) struct nh_control; struct nhop_priv; /* * Struct 'nhop_object' field description: * * nh_flags: NHF_ flags used in the dataplane code. NHF_GATEWAY or NHF_BLACKHOLE * can be examples of such flags. * nh_mtu: ready-to-use nexthop mtu. Already accounts for the link-level header, * interface MTU and protocol-specific limitations. * nh_prepend_len: link-level prepend length. Currently unused. * nh_ifp: logical transmit interface. The one from which if_transmit() will be * called. Guaranteed to be non-NULL. * nh_aifp: ifnet of the source address. Same as nh_ifp except IPv6 loopback * routes. See the example below. * nh_ifa: interface address to use. Guaranteed to be non-NULL. * nh_pksent: counter(9) reflecting the number of packets transmitted. * * gw_: storage suitable to hold AF_INET, AF_INET6 or AF_LINK gateway. More * details ara available in the examples below. * * Examples: * * Direct routes (routes w/o gateway): * NHF_GATEWAY is NOT set. * nh_ifp denotes the logical transmit interface (). * nh_aifp is the same as nh_ifp * gw_sa contains AF_LINK sa with nh_aifp ifindex (compat) * Loopback routes: * NHF_GATEWAY is NOT set. * nh_ifp points to the loopback interface (lo0). * nh_aifp points to the interface where the destination address belongs to. * This is useful in IPv6 link-local-over-loopback communications. * gw_sa contains AF_LINK sa with nh_aifp ifindex (compat) * GW routes: * NHF_GATEWAY is set. * nh_ifp denotes the logical transmit interface. * nh_aifp is the same as nh_ifp * gw_sa contains L3 address (either AF_INET or AF_INET6). * * * Note: struct nhop_object fields are ordered in a way that * supports memcmp-based comparisons. * */ #define NHOP_END_CMP (__offsetof(struct nhop_object, nh_pksent)) struct nhop_object { uint16_t nh_flags; /* nhop flags */ uint16_t nh_mtu; /* nexthop mtu */ union { struct sockaddr_in gw4_sa; /* GW accessor as IPv4 */ struct sockaddr_in6 gw6_sa; /* GW accessor as IPv6 */ struct sockaddr gw_sa; struct sockaddr_dl_short gwl_sa; /* AF_LINK gw (compat) */ char gw_buf[28]; }; struct ifnet *nh_ifp; /* Logical egress interface. Always != NULL */ struct ifaddr *nh_ifa; /* interface address to use. Always != NULL */ struct ifnet *nh_aifp; /* ifnet of the source address. Always != NULL */ counter_u64_t nh_pksent; /* packets sent using this nhop */ /* 32 bytes + 4xPTR == 64(amd64) / 48(i386) */ uint8_t nh_prepend_len; /* length of prepend data */ uint8_t spare[3]; uint32_t spare1; /* alignment */ char nh_prepend[48]; /* L2 prepend */ struct nhop_priv *nh_priv; /* control plane data */ /* -- 128 bytes -- */ }; /* * Nhop validness. * * Currently we verify whether link is up or not on every packet, which can be * quite costy. * TODO: subscribe for the interface notifications and update the nexthops * with NHF_INVALID flag. */ #define NH_IS_VALID(_nh) RT_LINK_IS_UP((_nh)->nh_ifp) #define NH_IS_NHGRP(_nh) ((_nh)->nh_flags & NHF_MULTIPATH) #define RT_GATEWAY(_rt) ((struct sockaddr *)&(_rt)->rt_nhop->gw4_sa) #define RT_GATEWAY_CONST(_rt) ((const struct sockaddr *)&(_rt)->rt_nhop->gw4_sa) #define NH_FREE(_nh) do { \ nhop_free(_nh); \ /* guard against invalid refs */ \ _nh = NULL; \ } while (0) struct weightened_nhop { struct nhop_object *nh; uint32_t weight; }; void nhop_free(struct nhop_object *nh); struct sysctl_req; struct sockaddr_dl; struct rib_head; uint32_t nhop_get_idx(const struct nhop_object *nh); enum nhop_type nhop_get_type(const struct nhop_object *nh); int nhop_get_rtflags(const struct nhop_object *nh); struct vnet *nhop_get_vnet(const struct nhop_object *nh); +struct nhop_object *nhop_select_func(struct nhop_object *nh, uint32_t flowid); #endif /* _KERNEL */ /* Kernel <> userland structures */ /* Structure usage and layout are described in dump_nhop_entry() */ struct nhop_external { uint32_t nh_len; /* length of the datastructure */ uint32_t nh_idx; /* Nexthop index */ uint32_t nh_fib; /* Fib nexhop is attached to */ uint32_t ifindex; /* transmit interface ifindex */ uint32_t aifindex; /* address ifindex */ uint8_t prepend_len; /* length of the prepend */ uint8_t nh_family; /* address family */ uint16_t nh_type; /* nexthop type */ uint16_t nh_mtu; /* nexthop mtu */ uint16_t nh_flags; /* nhop flags */ struct in_addr nh_addr; /* GW/DST IPv4 address */ struct in_addr nh_src; /* default source IPv4 address */ uint64_t nh_pksent; /* control plane */ /* lookup key: address, family, type */ char nh_prepend[64]; /* L2 prepend */ uint64_t nh_refcount; /* number of references */ }; struct nhop_addrs { uint32_t na_len; /* length of the datastructure */ uint16_t gw_sa_off; /* offset of gateway SA */ uint16_t src_sa_off; /* offset of src address SA */ }; #define NHG_C_TYPE_CNHOPS 0x1 /* Control plane nhops list */ #define NHG_C_TYPE_DNHOPS 0x2 /* Dataplane nhops list */ struct nhgrp_container { uint32_t nhgc_len; /* container length */ uint16_t nhgc_count; /* number of items */ uint8_t nhgc_type; /* container type */ uint8_t nhgc_subtype; /* container subtype */ }; struct nhgrp_nhop_external { uint32_t nh_idx; uint32_t nh_weight; }; /* * Layout: * - nhgrp_external * - nhgrp_container (control plane nhops list) * - nhgrp_nhop_external * - nhgrp_nhop_external * .. * - nhgrp_container (dataplane nhops list) * - nhgrp_nhop_external * - nhgrp_nhop_external */ struct nhgrp_external { uint32_t nhg_idx; /* Nexthop group index */ uint32_t nhg_refcount; /* number of references */ }; #endif diff --git a/sys/net/route/nhop_ctl.c b/sys/net/route/nhop_ctl.c index 2b676663a782..542380afd64b 100644 --- a/sys/net/route/nhop_ctl.c +++ b/sys/net/route/nhop_ctl.c @@ -1,904 +1,911 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2020 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * This file contains core functionality for the nexthop ("nhop") route subsystem. * The business logic needed to create nexhop objects is implemented here. * * Nexthops in the original sense are the objects containing all the necessary * information to forward the packet to the selected destination. * In particular, nexthop is defined by a combination of * ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and * NHF_DEFAULT * * Additionally, each nexthop gets assigned its unique index (nexthop index). * It serves two purposes: first one is to ease the ability of userland programs to * reference nexthops by their index. The second one allows lookup algorithms to * to store index instead of pointer (2 bytes vs 8) as a lookup result. * All nexthops are stored in the resizable hash table. * * Basically, this file revolves around supporting 3 functions: * 1) nhop_create_from_info / nhop_create_from_nhop, which contains all * business logic on filling the nexthop fields based on the provided request. * 2) nhop_get(), which gets a usable referenced nexthops. * * Conventions: * 1) non-exported functions start with verb * 2) exported function starts with the subsystem prefix: "nhop" */ static int dump_nhop_entry(struct rib_head *rh, struct nhop_object *nh, struct sysctl_req *w); static struct nhop_priv *alloc_nhop_structure(void); static int get_nhop(struct rib_head *rnh, struct rt_addrinfo *info, struct nhop_priv **pnh_priv); static int finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info, struct nhop_priv *nh_priv); static struct ifnet *get_aifp(const struct nhop_object *nh, int reference); static void fill_sdl_from_ifp(struct sockaddr_dl_short *sdl, const struct ifnet *ifp); static void destroy_nhop_epoch(epoch_context_t ctx); static void destroy_nhop(struct nhop_priv *nh_priv); static void print_nhop(const char *prefix, const struct nhop_object *nh); _Static_assert(__offsetof(struct nhop_object, nh_ifp) == 32, "nhop_object: wrong nh_ifp offset"); _Static_assert(sizeof(struct nhop_object) <= 128, "nhop_object: size exceeds 128 bytes"); static uma_zone_t nhops_zone; /* Global zone for each and every nexthop */ #define NHOP_OBJECT_ALIGNED_SIZE roundup2(sizeof(struct nhop_object), \ 2 * CACHE_LINE_SIZE) #define NHOP_PRIV_ALIGNED_SIZE roundup2(sizeof(struct nhop_priv), \ 2 * CACHE_LINE_SIZE) void nhops_init(void) { nhops_zone = uma_zcreate("routing nhops", NHOP_OBJECT_ALIGNED_SIZE + NHOP_PRIV_ALIGNED_SIZE, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } /* * Fetches the interface of source address used by the route. * In all cases except interface-address-route it would be the * same as the transmit interfaces. * However, for the interface address this function will return * this interface ifp instead of loopback. This is needed to support * link-local IPv6 loopback communications. * * If @reference is non-zero, found ifp is referenced. * * Returns found ifp. */ static struct ifnet * get_aifp(const struct nhop_object *nh, int reference) { struct ifnet *aifp = NULL; /* * Adjust the "outgoing" interface. If we're going to loop * the packet back to ourselves, the ifp would be the loopback * interface. However, we'd rather know the interface associated * to the destination address (which should probably be one of * our own addresses). */ if ((nh->nh_ifp->if_flags & IFF_LOOPBACK) && nh->gw_sa.sa_family == AF_LINK) { if (reference) aifp = ifnet_byindex_ref(nh->gwl_sa.sdl_index); else aifp = ifnet_byindex(nh->gwl_sa.sdl_index); if (aifp == NULL) { DPRINTF("unable to get aifp for %s index %d", if_name(nh->nh_ifp), nh->gwl_sa.sdl_index); } } if (aifp == NULL) { aifp = nh->nh_ifp; if (reference) if_ref(aifp); } return (aifp); } int cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two) { if (memcmp(_one->nh, _two->nh, NHOP_END_CMP) != 0) return (0); if (memcmp(_one, _two, NH_PRIV_END_CMP) != 0) return (0); return (1); } /* * Conditionally sets @nh mtu data based on the @info data. */ static void set_nhop_mtu_from_info(struct nhop_object *nh, const struct rt_addrinfo *info) { if (info->rti_mflags & RTV_MTU) { if (info->rti_rmx->rmx_mtu != 0) { /* * MTU was explicitly provided by user. * Keep it. */ nh->nh_priv->rt_flags |= RTF_FIXEDMTU; } else { /* * User explicitly sets MTU to 0. * Assume rollback to default. */ nh->nh_priv->rt_flags &= ~RTF_FIXEDMTU; } nh->nh_mtu = info->rti_rmx->rmx_mtu; } } /* * Fills in shorted link-level sockadd version suitable to be stored inside the * nexthop gateway buffer. */ static void fill_sdl_from_ifp(struct sockaddr_dl_short *sdl, const struct ifnet *ifp) { bzero(sdl, sizeof(struct sockaddr_dl_short)); sdl->sdl_family = AF_LINK; sdl->sdl_len = sizeof(struct sockaddr_dl_short); sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; } static int set_nhop_gw_from_info(struct nhop_object *nh, struct rt_addrinfo *info) { struct sockaddr *gw; gw = info->rti_info[RTAX_GATEWAY]; KASSERT(gw != NULL, ("gw is NULL")); if (info->rti_flags & RTF_GATEWAY) { if (gw->sa_len > sizeof(struct sockaddr_in6)) { DPRINTF("nhop SA size too big: AF %d len %u", gw->sa_family, gw->sa_len); return (ENOMEM); } memcpy(&nh->gw_sa, gw, gw->sa_len); } else { /* * Interface route. Currently the route.c code adds * sa of type AF_LINK, which is 56 bytes long. The only * meaningful data there is the interface index. It is used * used is the IPv6 loopback output, where we need to preserve * the original interface to maintain proper scoping. * Despite the fact that nexthop code stores original interface * in the separate field (nh_aifp, see below), write AF_LINK * compatible sa with shorter total length. */ struct sockaddr_dl *sdl; struct ifnet *ifp; /* Fetch and validate interface index */ sdl = (struct sockaddr_dl *)gw; if (sdl->sdl_family != AF_LINK) { DPRINTF("unsupported AF: %d", sdl->sdl_family); return (ENOTSUP); } ifp = ifnet_byindex(sdl->sdl_index); if (ifp == NULL) { DPRINTF("invalid ifindex %d", sdl->sdl_index); return (EINVAL); } fill_sdl_from_ifp(&nh->gwl_sa, ifp); } return (0); } static uint16_t convert_rt_to_nh_flags(int rt_flags) { uint16_t res; res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0; res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0; res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0; res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0; res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0; res |= (rt_flags & RTF_GATEWAY) ? NHF_GATEWAY : 0; return (res); } static int fill_nhop_from_info(struct nhop_priv *nh_priv, struct rt_addrinfo *info) { int error, rt_flags; struct nhop_object *nh; nh = nh_priv->nh; rt_flags = info->rti_flags & NHOP_RT_FLAG_MASK; nh->nh_priv->rt_flags = rt_flags; nh_priv->nh_family = info->rti_info[RTAX_DST]->sa_family; nh_priv->nh_type = 0; // hook responsibility to set nhop type nh->nh_flags = convert_rt_to_nh_flags(rt_flags); set_nhop_mtu_from_info(nh, info); if ((error = set_nhop_gw_from_info(nh, info)) != 0) return (error); nh->nh_ifp = info->rti_ifa->ifa_ifp; nh->nh_ifa = info->rti_ifa; /* depends on the gateway */ nh->nh_aifp = get_aifp(nh, 0); /* * Note some of the remaining data is set by the * per-address-family pre-add hook. */ return (0); } /* * Creates a new nexthop based on the information in @info. * * Returns: * 0 on success, filling @nh_ret with the desired nexthop object ptr * errno otherwise */ int nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info, struct nhop_object **nh_ret) { struct nhop_priv *nh_priv; int error; NET_EPOCH_ASSERT(); if (info->rti_info[RTAX_GATEWAY] == NULL) return (EINVAL); nh_priv = alloc_nhop_structure(); error = fill_nhop_from_info(nh_priv, info); if (error != 0) { uma_zfree(nhops_zone, nh_priv->nh); return (error); } error = get_nhop(rnh, info, &nh_priv); if (error == 0) *nh_ret = nh_priv->nh; return (error); } /* * Gets linked nhop using the provided @pnh_priv nexhop data. * If linked nhop is found, returns it, freeing the provided one. * If there is no such nexthop, attaches the remaining data to the * provided nexthop and links it. * * Returns 0 on success, storing referenced nexthop in @pnh_priv. * Otherwise, errno is returned. */ static int get_nhop(struct rib_head *rnh, struct rt_addrinfo *info, struct nhop_priv **pnh_priv) { const struct sockaddr *dst, *gateway, *netmask; struct nhop_priv *nh_priv, *tmp_priv; int error; nh_priv = *pnh_priv; /* Give the protocols chance to augment the request data */ dst = info->rti_info[RTAX_DST]; netmask = info->rti_info[RTAX_NETMASK]; gateway = info->rti_info[RTAX_GATEWAY]; error = rnh->rnh_preadd(rnh->rib_fibnum, dst, netmask, nh_priv->nh); if (error != 0) { uma_zfree(nhops_zone, nh_priv->nh); return (error); } tmp_priv = find_nhop(rnh->nh_control, nh_priv); if (tmp_priv != NULL) { uma_zfree(nhops_zone, nh_priv->nh); *pnh_priv = tmp_priv; return (0); } /* * Existing nexthop not found, need to create new one. * Note: multiple simultaneous get_nhop() requests * can result in multiple equal nexhops existing in the * nexthop table. This is not a not a problem until the * relative number of such nexthops is significant, which * is extremely unlikely. */ error = finalize_nhop(rnh->nh_control, info, nh_priv); if (error != 0) return (error); return (0); } /* * Update @nh with data supplied in @info. * This is a helper function to support route changes. * * It limits the changes that can be done to the route to the following: * 1) all combination of gateway changes (gw, interface, blackhole/reject) * 2) route flags (FLAG[123],STATIC,BLACKHOLE,REJECT) * 3) route MTU * * Returns: * 0 on success */ static int alter_nhop_from_info(struct nhop_object *nh, struct rt_addrinfo *info) { struct sockaddr *info_gw; int error; /* Update MTU if set in the request*/ set_nhop_mtu_from_info(nh, info); /* XXX: allow only one of BLACKHOLE,REJECT,GATEWAY */ /* Allow some flags (FLAG1,STATIC,BLACKHOLE,REJECT) to be toggled on change. */ nh->nh_priv->rt_flags &= ~RTF_FMASK; nh->nh_priv->rt_flags |= info->rti_flags & RTF_FMASK; /* Consider gateway change */ info_gw = info->rti_info[RTAX_GATEWAY]; if (info_gw != NULL) { error = set_nhop_gw_from_info(nh, info); if (error != 0) return (error); /* Update RTF_GATEWAY flag status */ nh->nh_priv->rt_flags &= ~RTF_GATEWAY; nh->nh_priv->rt_flags |= (RTF_GATEWAY & info->rti_flags); } /* Update datapath flags */ nh->nh_flags = convert_rt_to_nh_flags(nh->nh_priv->rt_flags); if (info->rti_ifa != NULL) nh->nh_ifa = info->rti_ifa; if (info->rti_ifp != NULL) nh->nh_ifp = info->rti_ifp; nh->nh_aifp = get_aifp(nh, 0); return (0); } /* * Creates new nexthop based on @nh_orig and augmentation data from @info. * Helper function used in the route changes, please see * alter_nhop_from_info() comments for more details. * * Returns: * 0 on success, filling @nh_ret with the desired nexthop object * errno otherwise */ int nhop_create_from_nhop(struct rib_head *rnh, const struct nhop_object *nh_orig, struct rt_addrinfo *info, struct nhop_object **pnh) { struct nhop_priv *nh_priv; struct nhop_object *nh; int error; NET_EPOCH_ASSERT(); nh_priv = alloc_nhop_structure(); nh = nh_priv->nh; /* Start with copying data from original nexthop */ nh_priv->nh_family = nh_orig->nh_priv->nh_family; nh_priv->rt_flags = nh_orig->nh_priv->rt_flags; nh_priv->nh_type = nh_orig->nh_priv->nh_type; nh->nh_ifp = nh_orig->nh_ifp; nh->nh_ifa = nh_orig->nh_ifa; nh->nh_aifp = nh_orig->nh_aifp; nh->nh_mtu = nh_orig->nh_mtu; nh->nh_flags = nh_orig->nh_flags; memcpy(&nh->gw_sa, &nh_orig->gw_sa, nh_orig->gw_sa.sa_len); error = alter_nhop_from_info(nh, info); if (error != 0) { uma_zfree(nhops_zone, nh_priv->nh); return (error); } error = get_nhop(rnh, info, &nh_priv); if (error == 0) *pnh = nh_priv->nh; return (error); } /* * Allocates memory for public/private nexthop structures. * * Returns pointer to nhop_priv or NULL. */ static struct nhop_priv * alloc_nhop_structure() { struct nhop_object *nh; struct nhop_priv *nh_priv; nh = (struct nhop_object *)uma_zalloc(nhops_zone, M_NOWAIT | M_ZERO); if (nh == NULL) return (NULL); nh_priv = (struct nhop_priv *)((char *)nh + NHOP_OBJECT_ALIGNED_SIZE); nh->nh_priv = nh_priv; nh_priv->nh = nh; return (nh_priv); } /* * Alocates/references the remaining bits of nexthop data and links * it to the hash table. * Returns 0 if successful, * errno otherwise. @nh_priv is freed in case of error. */ static int finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info, struct nhop_priv *nh_priv) { struct nhop_object *nh; nh = nh_priv->nh; /* Allocate per-cpu packet counter */ nh->nh_pksent = counter_u64_alloc(M_NOWAIT); if (nh->nh_pksent == NULL) { uma_zfree(nhops_zone, nh); RTSTAT_INC(rts_nh_alloc_failure); DPRINTF("nh_alloc_finalize failed"); return (ENOMEM); } /* Save vnet to ease destruction */ nh_priv->nh_vnet = curvnet; /* Reference external objects and calculate (referenced) ifa */ if_ref(nh->nh_ifp); ifa_ref(nh->nh_ifa); nh->nh_aifp = get_aifp(nh, 1); DPRINTF("AIFP: %p nh_ifp %p", nh->nh_aifp, nh->nh_ifp); refcount_init(&nh_priv->nh_refcnt, 1); /* Please see nhop_free() comments on the initial value */ refcount_init(&nh_priv->nh_linked, 2); print_nhop("FINALIZE", nh); if (link_nhop(ctl, nh_priv) == 0) { /* * Adding nexthop to the datastructures * failed. Call destructor w/o waiting for * the epoch end, as nexthop is not used * and return. */ DPRINTF("link_nhop failed!"); destroy_nhop(nh_priv); return (ENOBUFS); } return (0); } static void print_nhop_sa(char *buf, size_t buflen, const struct sockaddr *sa) { if (sa->sa_family == AF_INET) { const struct sockaddr_in *sin4; sin4 = (const struct sockaddr_in *)sa; inet_ntop(AF_INET, &sin4->sin_addr, buf, buflen); } else if (sa->sa_family == AF_INET6) { const struct sockaddr_in6 *sin6; sin6 = (const struct sockaddr_in6 *)sa; inet_ntop(AF_INET6, &sin6->sin6_addr, buf, buflen); } else if (sa->sa_family == AF_LINK) { const struct sockaddr_dl *sdl; sdl = (const struct sockaddr_dl *)sa; snprintf(buf, buflen, "if#%d", sdl->sdl_index); } else snprintf(buf, buflen, "af:%d", sa->sa_family); } static void print_nhop(const char *prefix, const struct nhop_object *nh) { char src_buf[INET6_ADDRSTRLEN], addr_buf[INET6_ADDRSTRLEN]; print_nhop_sa(src_buf, sizeof(src_buf), nh->nh_ifa->ifa_addr); print_nhop_sa(addr_buf, sizeof(addr_buf), &nh->gw_sa); DPRINTF("%s nhop priv %p: AF %d ifp %p %s addr %s src %p %s aifp %p %s mtu %d nh_flags %X", prefix, nh->nh_priv, nh->nh_priv->nh_family, nh->nh_ifp, if_name(nh->nh_ifp), addr_buf, nh->nh_ifa, src_buf, nh->nh_aifp, if_name(nh->nh_aifp), nh->nh_mtu, nh->nh_flags); } static void destroy_nhop(struct nhop_priv *nh_priv) { struct nhop_object *nh; nh = nh_priv->nh; print_nhop("DEL", nh); if_rele(nh->nh_ifp); if_rele(nh->nh_aifp); ifa_free(nh->nh_ifa); counter_u64_free(nh->nh_pksent); uma_zfree(nhops_zone, nh); } /* * Epoch callback indicating nhop is safe to destroy */ static void destroy_nhop_epoch(epoch_context_t ctx) { struct nhop_priv *nh_priv; nh_priv = __containerof(ctx, struct nhop_priv, nh_epoch_ctx); destroy_nhop(nh_priv); } void nhop_ref_object(struct nhop_object *nh) { u_int old; old = refcount_acquire(&nh->nh_priv->nh_refcnt); KASSERT(old > 0, ("%s: nhop object %p has 0 refs", __func__, nh)); } int nhop_try_ref_object(struct nhop_object *nh) { return (refcount_acquire_if_not_zero(&nh->nh_priv->nh_refcnt)); } void nhop_free(struct nhop_object *nh) { struct nh_control *ctl; struct nhop_priv *nh_priv = nh->nh_priv; struct epoch_tracker et; if (!refcount_release(&nh_priv->nh_refcnt)) return; /* * There are only 2 places, where nh_linked can be decreased: * rib destroy (nhops_destroy_rib) and this function. * nh_link can never be increased. * * Hence, use initial value of 2 to make use of * refcount_release_if_not_last(). * * There can be two scenarious when calling this function: * * 1) nh_linked value is 2. This means that either * nhops_destroy_rib() has not been called OR it is running, * but we are guaranteed that nh_control won't be freed in * this epoch. Hence, nexthop can be safely unlinked. * * 2) nh_linked value is 1. In that case, nhops_destroy_rib() * has been called and nhop unlink can be skipped. */ NET_EPOCH_ENTER(et); if (refcount_release_if_not_last(&nh_priv->nh_linked)) { ctl = nh_priv->nh_control; if (unlink_nhop(ctl, nh_priv) == NULL) { /* Do not try to reclaim */ DPRINTF("Failed to unlink nexhop %p", nh_priv); NET_EPOCH_EXIT(et); return; } } NET_EPOCH_EXIT(et); epoch_call(net_epoch_preempt, destroy_nhop_epoch, &nh_priv->nh_epoch_ctx); } void nhop_ref_any(struct nhop_object *nh) { #ifdef ROUTE_MPATH if (!NH_IS_NHGRP(nh)) nhop_ref_object(nh); else nhgrp_ref_object((struct nhgrp_object *)nh); #else nhop_ref_object(nh); #endif } void nhop_free_any(struct nhop_object *nh) { #ifdef ROUTE_MPATH if (!NH_IS_NHGRP(nh)) nhop_free(nh); else nhgrp_free((struct nhgrp_object *)nh); #else nhop_free(nh); #endif } /* Helper functions */ uint32_t nhop_get_idx(const struct nhop_object *nh) { return (nh->nh_priv->nh_idx); } enum nhop_type nhop_get_type(const struct nhop_object *nh) { return (nh->nh_priv->nh_type); } void nhop_set_type(struct nhop_object *nh, enum nhop_type nh_type) { nh->nh_priv->nh_type = nh_type; } int nhop_get_rtflags(const struct nhop_object *nh) { return (nh->nh_priv->rt_flags); } void nhop_set_rtflags(struct nhop_object *nh, int rt_flags) { nh->nh_priv->rt_flags = rt_flags; } struct vnet * nhop_get_vnet(const struct nhop_object *nh) { return (nh->nh_priv->nh_vnet); } +struct nhop_object * +nhop_select_func(struct nhop_object *nh, uint32_t flowid) +{ + + return (nhop_select(nh, flowid)); +} + void nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu) { struct nh_control *ctl; struct nhop_priv *nh_priv; struct nhop_object *nh; ctl = rh->nh_control; NHOPS_WLOCK(ctl); CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) { nh = nh_priv->nh; if (nh->nh_ifp == ifp) { if ((nh_priv->rt_flags & RTF_FIXEDMTU) == 0 || nh->nh_mtu > mtu) { /* Update MTU directly */ nh->nh_mtu = mtu; } } } CHT_SLIST_FOREACH_END; NHOPS_WUNLOCK(ctl); } /* * Dumps a single entry to sysctl buffer. * * Layout: * rt_msghdr - generic RTM header to allow users to skip non-understood messages * nhop_external - nexhop description structure (with length) * nhop_addrs - structure encapsulating GW/SRC sockaddrs */ static int dump_nhop_entry(struct rib_head *rh, struct nhop_object *nh, struct sysctl_req *w) { struct { struct rt_msghdr rtm; struct nhop_external nhe; struct nhop_addrs na; } arpc; struct nhop_external *pnhe; struct sockaddr *gw_sa, *src_sa; struct sockaddr_storage ss; size_t addrs_len; int error; //DPRINTF("Dumping: head %p nh %p flags %X req %p\n", rh, nh, nh->nh_flags, w); memset(&arpc, 0, sizeof(arpc)); arpc.rtm.rtm_msglen = sizeof(arpc); arpc.rtm.rtm_version = RTM_VERSION; arpc.rtm.rtm_type = RTM_GET; //arpc.rtm.rtm_flags = RTF_UP; arpc.rtm.rtm_flags = nh->nh_priv->rt_flags; /* nhop_external */ pnhe = &arpc.nhe; pnhe->nh_len = sizeof(struct nhop_external); pnhe->nh_idx = nh->nh_priv->nh_idx; pnhe->nh_fib = rh->rib_fibnum; pnhe->ifindex = nh->nh_ifp->if_index; pnhe->aifindex = nh->nh_aifp->if_index; pnhe->nh_family = nh->nh_priv->nh_family; pnhe->nh_type = nh->nh_priv->nh_type; pnhe->nh_mtu = nh->nh_mtu; pnhe->nh_flags = nh->nh_flags; memcpy(pnhe->nh_prepend, nh->nh_prepend, sizeof(nh->nh_prepend)); pnhe->prepend_len = nh->nh_prepend_len; pnhe->nh_refcount = nh->nh_priv->nh_refcnt; pnhe->nh_pksent = counter_u64_fetch(nh->nh_pksent); /* sockaddr container */ addrs_len = sizeof(struct nhop_addrs); arpc.na.gw_sa_off = addrs_len; gw_sa = (struct sockaddr *)&nh->gw4_sa; addrs_len += gw_sa->sa_len; src_sa = nh->nh_ifa->ifa_addr; if (src_sa->sa_family == AF_LINK) { /* Shorten structure */ memset(&ss, 0, sizeof(struct sockaddr_storage)); fill_sdl_from_ifp((struct sockaddr_dl_short *)&ss, nh->nh_ifa->ifa_ifp); src_sa = (struct sockaddr *)&ss; } arpc.na.src_sa_off = addrs_len; addrs_len += src_sa->sa_len; /* Write total container length */ arpc.na.na_len = addrs_len; arpc.rtm.rtm_msglen += arpc.na.na_len - sizeof(struct nhop_addrs); error = SYSCTL_OUT(w, &arpc, sizeof(arpc)); if (error == 0) error = SYSCTL_OUT(w, gw_sa, gw_sa->sa_len); if (error == 0) error = SYSCTL_OUT(w, src_sa, src_sa->sa_len); return (error); } uint32_t nhops_get_count(struct rib_head *rh) { struct nh_control *ctl; uint32_t count; ctl = rh->nh_control; NHOPS_RLOCK(ctl); count = ctl->nh_head.items_count; NHOPS_RUNLOCK(ctl); return (count); } int nhops_dump_sysctl(struct rib_head *rh, struct sysctl_req *w) { struct nh_control *ctl; struct nhop_priv *nh_priv; int error; ctl = rh->nh_control; NHOPS_RLOCK(ctl); DPRINTF("NHDUMP: count=%u", ctl->nh_head.items_count); CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) { error = dump_nhop_entry(rh, nh_priv->nh, w); if (error != 0) { NHOPS_RUNLOCK(ctl); return (error); } } CHT_SLIST_FOREACH_END; NHOPS_RUNLOCK(ctl); return (0); } diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c index 8e1bcddd6895..e9fb83773a4b 100644 --- a/sys/netgraph/netflow/netflow.c +++ b/sys/netgraph/netflow/netflow.c @@ -1,1214 +1,1176 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2010-2011 Alexander V. Chernikov * Copyright (c) 2004-2005 Gleb Smirnoff * Copyright (c) 2001-2003 Roman V. Palagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $ */ #include __FBSDID("$FreeBSD$"); +#include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include #include #include +#include #include #include #include #include #include +#include + #include #include #include #include #include #define NBUCKETS (65536) /* must be power of 2 */ /* This hash is for TCP or UDP packets. */ #define FULL_HASH(addr1, addr2, port1, port2) \ (((addr1 ^ (addr1 >> 16) ^ \ htons(addr2 ^ (addr2 >> 16))) ^ \ port1 ^ htons(port2)) & \ (NBUCKETS - 1)) /* This hash is for all other IP packets. */ #define ADDR_HASH(addr1, addr2) \ ((addr1 ^ (addr1 >> 16) ^ \ htons(addr2 ^ (addr2 >> 16))) & \ (NBUCKETS - 1)) /* Macros to shorten logical constructions */ /* XXX: priv must exist in namespace */ #define INACTIVE(fle) (time_uptime - fle->f.last > priv->nfinfo_inact_t) #define AGED(fle) (time_uptime - fle->f.first > priv->nfinfo_act_t) #define ISFREE(fle) (fle->f.packets == 0) /* * 4 is a magical number: statistically number of 4-packet flows is * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case * of reachable host and 4-packet otherwise. */ #define SMALL(fle) (fle->f.packets <= 4) MALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash"); static int export_add(item_p, struct flow_entry *); static int export_send(priv_p, fib_export_p, item_p, int); +#ifdef INET static int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *, int, uint8_t, uint8_t); +#endif #ifdef INET6 static int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *, int, uint8_t, uint8_t); #endif static void expire_flow(priv_p, fib_export_p, struct flow_entry *, int); /* * Generate hash for a given flow record. * * FIB is not used here, because: * most VRFS will carry public IPv4 addresses which are unique even * without FIB private addresses can overlap, but this is worked out * via flow_rec bcmp() containing fib id. In IPv6 world addresses are * all globally unique (it's not fully true, there is FC00::/7 for example, * but chances of address overlap are MUCH smaller) */ static inline uint32_t ip_hash(struct flow_rec *r) { switch (r->r_ip_p) { case IPPROTO_TCP: case IPPROTO_UDP: return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr, r->r_sport, r->r_dport); default: return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr); } } #ifdef INET6 /* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */ static inline uint32_t ip6_hash(struct flow6_rec *r) { switch (r->r_ip_p) { case IPPROTO_TCP: case IPPROTO_UDP: return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport, r->r_dport); default: return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], r->dst.r_dst6.__u6_addr.__u6_addr32[3]); } } -static inline int -ip6_masklen(struct in6_addr *saddr, struct rt_addrinfo *info) -{ - const int nbits = sizeof(*saddr) * NBBY; - int mlen; - - if (info->rti_addrs & RTA_NETMASK) - bit_count((bitstr_t *)saddr, 0, nbits, &mlen); - else - mlen = nbits; - return (mlen); -} #endif /* * Detach export datagram from priv, if there is any. * If there is no, allocate a new one. */ static item_p get_export_dgram(priv_p priv, fib_export_p fe) { item_p item = NULL; mtx_lock(&fe->export_mtx); if (fe->exp.item != NULL) { item = fe->exp.item; fe->exp.item = NULL; } mtx_unlock(&fe->export_mtx); if (item == NULL) { struct netflow_v5_export_dgram *dgram; struct mbuf *m; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (NULL); item = ng_package_data(m, NG_NOFLAGS); if (item == NULL) return (NULL); dgram = mtod(m, struct netflow_v5_export_dgram *); dgram->header.count = 0; dgram->header.version = htons(NETFLOW_V5); dgram->header.pad = 0; } return (item); } /* * Re-attach incomplete datagram back to priv. * If there is already another one, then send incomplete. */ static void return_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags) { /* * It may happen on SMP, that some thread has already * put its item there, in this case we bail out and * send what we have to collector. */ mtx_lock(&fe->export_mtx); if (fe->exp.item == NULL) { fe->exp.item = item; mtx_unlock(&fe->export_mtx); } else { mtx_unlock(&fe->export_mtx); export_send(priv, fe, item, flags); } } /* * The flow is over. Call export_add() and free it. If datagram is * full, then call export_send(). */ static void expire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags) { struct netflow_export_item exp; uint16_t version = fle->f.version; if ((priv->export != NULL) && (version == IPVERSION)) { exp.item = get_export_dgram(priv, fe); if (exp.item == NULL) { priv->nfinfo_export_failed++; if (priv->export9 != NULL) priv->nfinfo_export9_failed++; /* fle definitely contains IPv4 flow. */ uma_zfree_arg(priv->zone, fle, priv); return; } if (export_add(exp.item, fle) > 0) export_send(priv, fe, exp.item, flags); else return_export_dgram(priv, fe, exp.item, NG_QUEUE); } if (priv->export9 != NULL) { exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt); if (exp.item9 == NULL) { priv->nfinfo_export9_failed++; if (version == IPVERSION) uma_zfree_arg(priv->zone, fle, priv); #ifdef INET6 else if (version == IP6VERSION) uma_zfree_arg(priv->zone6, fle, priv); #endif else panic("ng_netflow: Unknown IP proto: %d", version); return; } if (export9_add(exp.item9, exp.item9_opt, fle) > 0) export9_send(priv, fe, exp.item9, exp.item9_opt, flags); else return_export9_dgram(priv, fe, exp.item9, exp.item9_opt, NG_QUEUE); } if (version == IPVERSION) uma_zfree_arg(priv->zone, fle, priv); #ifdef INET6 else if (version == IP6VERSION) uma_zfree_arg(priv->zone6, fle, priv); #endif } /* Get a snapshot of node statistics */ void ng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i) { i->nfinfo_bytes = counter_u64_fetch(priv->nfinfo_bytes); i->nfinfo_packets = counter_u64_fetch(priv->nfinfo_packets); i->nfinfo_bytes6 = counter_u64_fetch(priv->nfinfo_bytes6); i->nfinfo_packets6 = counter_u64_fetch(priv->nfinfo_packets6); i->nfinfo_sbytes = counter_u64_fetch(priv->nfinfo_sbytes); i->nfinfo_spackets = counter_u64_fetch(priv->nfinfo_spackets); i->nfinfo_sbytes6 = counter_u64_fetch(priv->nfinfo_sbytes6); i->nfinfo_spackets6 = counter_u64_fetch(priv->nfinfo_spackets6); i->nfinfo_act_exp = counter_u64_fetch(priv->nfinfo_act_exp); i->nfinfo_inact_exp = counter_u64_fetch(priv->nfinfo_inact_exp); i->nfinfo_used = uma_zone_get_cur(priv->zone); #ifdef INET6 i->nfinfo_used6 = uma_zone_get_cur(priv->zone6); #endif i->nfinfo_alloc_failed = priv->nfinfo_alloc_failed; i->nfinfo_export_failed = priv->nfinfo_export_failed; i->nfinfo_export9_failed = priv->nfinfo_export9_failed; i->nfinfo_realloc_mbuf = priv->nfinfo_realloc_mbuf; i->nfinfo_alloc_fibs = priv->nfinfo_alloc_fibs; i->nfinfo_inact_t = priv->nfinfo_inact_t; i->nfinfo_act_t = priv->nfinfo_act_t; } /* * Insert a record into defined slot. * * First we get for us a free flow entry, then fill in all * possible fields in it. * * TODO: consider dropping hash mutex while filling in datagram, * as this was done in previous version. Need to test & profile * to be sure. */ +#ifdef INET static int hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, int plen, uint8_t flags, uint8_t tcp_flags) { struct flow_entry *fle; - struct sockaddr_in sin, sin_mask; - struct sockaddr_dl rt_gateway; - struct rt_addrinfo info; mtx_assert(&hsh->mtx, MA_OWNED); fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT); if (fle == NULL) { priv->nfinfo_alloc_failed++; return (ENOMEM); } /* * Now fle is totally ours. It is detached from all lists, * we can safely edit it. */ fle->f.version = IPVERSION; bcopy(r, &fle->f.r, sizeof(struct flow_rec)); fle->f.bytes = plen; fle->f.packets = 1; fle->f.tcp_flags = tcp_flags; fle->f.first = fle->f.last = time_uptime; /* * First we do route table lookup on destination address. So we can * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. */ if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { - bzero(&sin, sizeof(sin)); - sin.sin_len = sizeof(struct sockaddr_in); - sin.sin_family = AF_INET; - sin.sin_addr = fle->f.r.r_dst; - - rt_gateway.sdl_len = sizeof(rt_gateway); - sin_mask.sin_len = sizeof(struct sockaddr_in); - bzero(&info, sizeof(info)); - - info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; - info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin_mask; - - if (rib_lookup_info(r->fib, (struct sockaddr *)&sin, NHR_REF, 0, - &info) == 0) { - fle->f.fle_o_ifx = info.rti_ifp->if_index; - - if (info.rti_flags & RTF_GATEWAY && - rt_gateway.sdl_family == AF_INET) - fle->f.next_hop = - ((struct sockaddr_in *)&rt_gateway)->sin_addr; - - if (info.rti_addrs & RTA_NETMASK) - fle->f.dst_mask = bitcount32(sin_mask.sin_addr.s_addr); - else if (info.rti_flags & RTF_HOST) - /* Give up. We can't determine mask :( */ - fle->f.dst_mask = 32; - - rib_free_info(&info); + struct rtentry *rt; + struct route_nhop_data rnd; + + rt = fib4_lookup_rt(r->fib, fle->f.r.r_dst, 0, NHR_NONE, &rnd); + if (rt != NULL) { + struct in_addr addr; + uint32_t scopeid; + struct nhop_object *nh = nhop_select_func(rnd.rnd_nhop, 0); + int plen; + + rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid); + fle->f.fle_o_ifx = nh->nh_ifp->if_index; + if (nh->gw_sa.sa_len == AF_INET) + fle->f.next_hop = nh->gw4_sa.sin_addr; + fle->f.dst_mask = plen; } } /* Do route lookup on source address, to fill in src_mask. */ if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { - bzero(&sin, sizeof(sin)); - sin.sin_len = sizeof(struct sockaddr_in); - sin.sin_family = AF_INET; - sin.sin_addr = fle->f.r.r_src; - - sin_mask.sin_len = sizeof(struct sockaddr_in); - bzero(&info, sizeof(info)); - - info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin_mask; - - if (rib_lookup_info(r->fib, (struct sockaddr *)&sin, 0, 0, - &info) == 0) { - if (info.rti_addrs & RTA_NETMASK) - fle->f.src_mask = - bitcount32(sin_mask.sin_addr.s_addr); - else if (info.rti_flags & RTF_HOST) - /* Give up. We can't determine mask :( */ - fle->f.src_mask = 32; + struct rtentry *rt; + struct route_nhop_data rnd; + + rt = fib4_lookup_rt(r->fib, fle->f.r.r_src, 0, NHR_NONE, &rnd); + if (rt != NULL) { + struct in_addr addr; + uint32_t scopeid; + int plen; + + rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid); + fle->f.src_mask = plen; } } /* Push new flow at the and of hash. */ TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); return (0); } +#endif #ifdef INET6 static int hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, int plen, uint8_t flags, uint8_t tcp_flags) { struct flow6_entry *fle6; - struct sockaddr_in6 sin6, sin6_mask; - struct sockaddr_dl rt_gateway; - struct rt_addrinfo info; mtx_assert(&hsh6->mtx, MA_OWNED); fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT); if (fle6 == NULL) { priv->nfinfo_alloc_failed++; return (ENOMEM); } /* * Now fle is totally ours. It is detached from all lists, * we can safely edit it. */ fle6->f.version = IP6VERSION; bcopy(r, &fle6->f.r, sizeof(struct flow6_rec)); fle6->f.bytes = plen; fle6->f.packets = 1; fle6->f.tcp_flags = tcp_flags; fle6->f.first = fle6->f.last = time_uptime; /* * First we do route table lookup on destination address. So we can * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. */ if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { - bzero(&sin6, sizeof(struct sockaddr_in6)); - sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_family = AF_INET6; - sin6.sin6_addr = r->dst.r_dst6; - - rt_gateway.sdl_len = sizeof(rt_gateway); - sin6_mask.sin6_len = sizeof(struct sockaddr_in6); - bzero(&info, sizeof(info)); - - info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; - info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin6_mask; - - if (rib_lookup_info(r->fib, (struct sockaddr *)&sin6, NHR_REF, - 0, &info) == 0) { - fle6->f.fle_o_ifx = info.rti_ifp->if_index; - - if (info.rti_flags & RTF_GATEWAY && - rt_gateway.sdl_family == AF_INET6) - fle6->f.n.next_hop6 = - ((struct sockaddr_in6 *)&rt_gateway)->sin6_addr; - - fle6->f.dst_mask = - ip6_masklen(&sin6_mask.sin6_addr, &info); - - rib_free_info(&info); + struct rtentry *rt; + struct route_nhop_data rnd; + + rt = fib6_lookup_rt(r->fib, &fle6->f.r.dst.r_dst6, 0, NHR_NONE, &rnd); + if (rt != NULL) { + struct in6_addr addr; + uint32_t scopeid; + struct nhop_object *nh = nhop_select_func(rnd.rnd_nhop, 0); + int plen; + + rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid); + fle6->f.fle_o_ifx = nh->nh_ifp->if_index; + if (nh->gw_sa.sa_len == AF_INET6) + fle6->f.n.next_hop6 = nh->gw6_sa.sin6_addr; + fle6->f.dst_mask = plen; } } if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { /* Do route lookup on source address, to fill in src_mask. */ - bzero(&sin6, sizeof(struct sockaddr_in6)); - sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_family = AF_INET6; - sin6.sin6_addr = r->src.r_src6; - - sin6_mask.sin6_len = sizeof(struct sockaddr_in6); - bzero(&info, sizeof(info)); + struct rtentry *rt; + struct route_nhop_data rnd; - info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin6_mask; + rt = fib6_lookup_rt(r->fib, &fle6->f.r.src.r_src6, 0, NHR_NONE, &rnd); + if (rt != NULL) { + struct in6_addr addr; + uint32_t scopeid; + int plen; - if (rib_lookup_info(r->fib, (struct sockaddr *)&sin6, 0, 0, - &info) == 0) - fle6->f.src_mask = - ip6_masklen(&sin6_mask.sin6_addr, &info); + rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid); + fle6->f.src_mask = plen; + } } /* Push new flow at the and of hash. */ TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash); return (0); } #endif /* * Non-static functions called from ng_netflow.c */ /* Allocate memory and set up flow cache */ void ng_netflow_cache_init(priv_p priv) { struct flow_hash_entry *hsh; int i; /* Initialize cache UMA zone. */ priv->zone = uma_zcreate("NetFlow IPv4 cache", sizeof(struct flow_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(priv->zone, CACHESIZE); #ifdef INET6 priv->zone6 = uma_zcreate("NetFlow IPv6 cache", sizeof(struct flow6_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(priv->zone6, CACHESIZE); #endif /* Allocate hash. */ priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry), M_NETFLOW_HASH, M_WAITOK | M_ZERO); /* Initialize hash. */ for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) { mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); TAILQ_INIT(&hsh->head); } #ifdef INET6 /* Allocate hash. */ priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry), M_NETFLOW_HASH, M_WAITOK | M_ZERO); /* Initialize hash. */ for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) { mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); TAILQ_INIT(&hsh->head); } #endif priv->nfinfo_bytes = counter_u64_alloc(M_WAITOK); priv->nfinfo_packets = counter_u64_alloc(M_WAITOK); priv->nfinfo_bytes6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_packets6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_sbytes = counter_u64_alloc(M_WAITOK); priv->nfinfo_spackets = counter_u64_alloc(M_WAITOK); priv->nfinfo_sbytes6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_spackets6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_act_exp = counter_u64_alloc(M_WAITOK); priv->nfinfo_inact_exp = counter_u64_alloc(M_WAITOK); ng_netflow_v9_cache_init(priv); CTR0(KTR_NET, "ng_netflow startup()"); } /* Initialize new FIB table for v5 and v9 */ int ng_netflow_fib_init(priv_p priv, int fib) { fib_export_p fe = priv_to_fib(priv, fib); CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib); if (fe != NULL) return (0); if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF); mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF); fe->fib = fib; fe->domain_id = fib; if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib], (uintptr_t)NULL, (uintptr_t)fe) == 0) { /* FIB already set up by other ISR */ CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p", fib, fe, priv_to_fib(priv, fib)); mtx_destroy(&fe->export_mtx); mtx_destroy(&fe->export9_mtx); free(fe, M_NETGRAPH); } else { /* Increase counter for statistics */ CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)", fib, fe, priv_to_fib(priv, fib)); priv->nfinfo_alloc_fibs++; } return (0); } /* Free all flow cache memory. Called from node close method. */ void ng_netflow_cache_flush(priv_p priv) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; struct netflow_export_item exp; fib_export_p fe; int i; bzero(&exp, sizeof(exp)); /* * We are going to free probably billable data. * Expire everything before freeing it. * No locking is required since callout is already drained. */ for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); fe = priv_to_fib(priv, fle->f.r.fib); expire_flow(priv, fe, fle, NG_QUEUE); } #ifdef INET6 for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); fe = priv_to_fib(priv, fle->f.r.fib); expire_flow(priv, fe, fle, NG_QUEUE); } #endif uma_zdestroy(priv->zone); /* Destroy hash mutexes. */ for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) mtx_destroy(&hsh->mtx); /* Free hash memory. */ if (priv->hash != NULL) free(priv->hash, M_NETFLOW_HASH); #ifdef INET6 uma_zdestroy(priv->zone6); /* Destroy hash mutexes. */ for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) mtx_destroy(&hsh->mtx); /* Free hash memory. */ if (priv->hash6 != NULL) free(priv->hash6, M_NETFLOW_HASH); #endif for (i = 0; i < priv->maxfibs; i++) { if ((fe = priv_to_fib(priv, i)) == NULL) continue; if (fe->exp.item != NULL) export_send(priv, fe, fe->exp.item, NG_QUEUE); if (fe->exp.item9 != NULL) export9_send(priv, fe, fe->exp.item9, fe->exp.item9_opt, NG_QUEUE); mtx_destroy(&fe->export_mtx); mtx_destroy(&fe->export9_mtx); free(fe, M_NETGRAPH); } counter_u64_free(priv->nfinfo_bytes); counter_u64_free(priv->nfinfo_packets); counter_u64_free(priv->nfinfo_bytes6); counter_u64_free(priv->nfinfo_packets6); counter_u64_free(priv->nfinfo_sbytes); counter_u64_free(priv->nfinfo_spackets); counter_u64_free(priv->nfinfo_sbytes6); counter_u64_free(priv->nfinfo_spackets6); counter_u64_free(priv->nfinfo_act_exp); counter_u64_free(priv->nfinfo_inact_exp); ng_netflow_v9_cache_flush(priv); } +#ifdef INET /* Insert packet from into flow cache. */ int ng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip, caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags, unsigned int src_if_index) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; struct flow_rec r; int hlen, plen; int error = 0; uint16_t eproto; uint8_t tcp_flags = 0; bzero(&r, sizeof(r)); if (ip->ip_v != IPVERSION) return (EINVAL); hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) return (EINVAL); eproto = ETHERTYPE_IP; /* Assume L4 template by default */ r.flow_type = NETFLOW_V9_FLOW_V4_L4; r.r_src = ip->ip_src; r.r_dst = ip->ip_dst; r.fib = fe->fib; plen = ntohs(ip->ip_len); r.r_ip_p = ip->ip_p; r.r_tos = ip->ip_tos; r.r_i_ifx = src_if_index; /* * XXX NOTE: only first fragment of fragmented TCP, UDP and * ICMP packet will be recorded with proper s_port and d_port. * Following fragments will be recorded simply as IP packet with * ip_proto = ip->ip_p and s_port, d_port set to zero. * I know, it looks like bug. But I don't want to re-implement * ip packet assebmling here. Anyway, (in)famous trafd works this way - * and nobody complains yet :) */ if ((ip->ip_off & htons(IP_OFFMASK)) == 0) switch(r.r_ip_p) { case IPPROTO_TCP: { struct tcphdr *tcp; tcp = (struct tcphdr *)((caddr_t )ip + hlen); r.r_sport = tcp->th_sport; r.r_dport = tcp->th_dport; tcp_flags = tcp->th_flags; break; } case IPPROTO_UDP: r.r_ports = *(uint32_t *)((caddr_t )ip + hlen); break; } counter_u64_add(priv->nfinfo_packets, 1); counter_u64_add(priv->nfinfo_bytes, plen); /* Find hash slot. */ hsh = &priv->hash[ip_hash(&r)]; mtx_lock(&hsh->mtx); /* * Go through hash and find our entry. If we encounter an * entry, that should be expired, purge it. We do a reverse * search since most active entries are first, and most * searches are done on most active entries. */ TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0) break; if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } } if (fle) { /* An existent entry. */ fle->f.bytes += plen; fle->f.packets ++; fle->f.tcp_flags |= tcp_flags; fle->f.last = time_uptime; /* * We have the following reasons to expire flow in active way: * - it hit active timeout * - a TCP connection closed * - it is going to overflow counter */ if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) || (fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } else { /* * It is the newest, move it to the tail, * if it isn't there already. Next search will * locate it quicker. */ if (fle != TAILQ_LAST(&hsh->head, fhead)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); } } } else /* A new flow entry. */ error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags); mtx_unlock(&hsh->mtx); return (error); } +#endif #ifdef INET6 /* Insert IPv6 packet from into flow cache. */ int ng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6, caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags, unsigned int src_if_index) { struct flow_entry *fle = NULL, *fle1; struct flow6_entry *fle6; struct flow_hash_entry *hsh; struct flow6_rec r; int plen; int error = 0; uint8_t tcp_flags = 0; /* check version */ if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) return (EINVAL); bzero(&r, sizeof(r)); r.src.r_src6 = ip6->ip6_src; r.dst.r_dst6 = ip6->ip6_dst; r.fib = fe->fib; /* Assume L4 template by default */ r.flow_type = NETFLOW_V9_FLOW_V6_L4; plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr); #if 0 /* XXX: set DSCP/CoS value */ r.r_tos = ip->ip_tos; #endif if ((flags & NG_NETFLOW_IS_FRAG) == 0) { switch(upper_proto) { case IPPROTO_TCP: { struct tcphdr *tcp; tcp = (struct tcphdr *)upper_ptr; r.r_ports = *(uint32_t *)upper_ptr; tcp_flags = tcp->th_flags; break; } case IPPROTO_UDP: case IPPROTO_SCTP: r.r_ports = *(uint32_t *)upper_ptr; break; } } r.r_ip_p = upper_proto; r.r_i_ifx = src_if_index; counter_u64_add(priv->nfinfo_packets6, 1); counter_u64_add(priv->nfinfo_bytes6, plen); /* Find hash slot. */ hsh = &priv->hash6[ip6_hash(&r)]; mtx_lock(&hsh->mtx); /* * Go through hash and find our entry. If we encounter an * entry, that should be expired, purge it. We do a reverse * search since most active entries are first, and most * searches are done on most active entries. */ TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { if (fle->f.version != IP6VERSION) continue; fle6 = (struct flow6_entry *)fle; if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0) break; if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } } if (fle != NULL) { /* An existent entry. */ fle6 = (struct flow6_entry *)fle; fle6->f.bytes += plen; fle6->f.packets ++; fle6->f.tcp_flags |= tcp_flags; fle6->f.last = time_uptime; /* * We have the following reasons to expire flow in active way: * - it hit active timeout * - a TCP connection closed * - it is going to overflow counter */ if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) || (fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } else { /* * It is the newest, move it to the tail, * if it isn't there already. Next search will * locate it quicker. */ if (fle != TAILQ_LAST(&hsh->head, fhead)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); } } } else /* A new flow entry. */ error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags); mtx_unlock(&hsh->mtx); return (error); } #endif /* * Return records from cache to userland. * * TODO: matching particular IP should be done in kernel, here. */ int ng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req, struct ngnf_show_header *resp) { struct flow_hash_entry *hsh; struct flow_entry *fle; struct flow_entry_data *data = (struct flow_entry_data *)(resp + 1); #ifdef INET6 struct flow6_entry_data *data6 = (struct flow6_entry_data *)(resp + 1); #endif int i, max; i = req->hash_id; if (i > NBUCKETS-1) return (EINVAL); #ifdef INET6 if (req->version == 6) { resp->version = 6; hsh = priv->hash6 + i; max = NREC6_AT_ONCE; } else #endif if (req->version == 4) { resp->version = 4; hsh = priv->hash + i; max = NREC_AT_ONCE; } else return (EINVAL); /* * We will transfer not more than NREC_AT_ONCE. More data * will come in next message. * We send current hash index and current record number in list * to userland, and userland should return it back to us. * Then, we will restart with new entry. * * The resulting cache snapshot can be inaccurate if flow expiration * is taking place on hash item between userland data requests for * this hash item id. */ resp->nentries = 0; for (; i < NBUCKETS; hsh++, i++) { int list_id; if (mtx_trylock(&hsh->mtx) == 0) { /* * Requested hash index is not available, * relay decision to skip or re-request data * to userland. */ resp->hash_id = i; resp->list_id = 0; return (0); } list_id = 0; TAILQ_FOREACH(fle, &hsh->head, fle_hash) { if (hsh->mtx.mtx_lock & MTX_CONTESTED) { resp->hash_id = i; resp->list_id = list_id; mtx_unlock(&hsh->mtx); return (0); } list_id++; /* Search for particular record in list. */ if (req->list_id > 0) { if (list_id < req->list_id) continue; /* Requested list position found. */ req->list_id = 0; } #ifdef INET6 if (req->version == 6) { struct flow6_entry *fle6; fle6 = (struct flow6_entry *)fle; bcopy(&fle6->f, data6 + resp->nentries, sizeof(fle6->f)); } else #endif bcopy(&fle->f, data + resp->nentries, sizeof(fle->f)); resp->nentries++; if (resp->nentries == max) { resp->hash_id = i; /* * If it was the last item in list * we simply skip to next hash_id. */ resp->list_id = list_id + 1; mtx_unlock(&hsh->mtx); return (0); } } mtx_unlock(&hsh->mtx); } resp->hash_id = resp->list_id = 0; return (0); } /* We have full datagram in privdata. Send it to export hook. */ static int export_send(priv_p priv, fib_export_p fe, item_p item, int flags) { struct mbuf *m = NGI_M(item); struct netflow_v5_export_dgram *dgram = mtod(m, struct netflow_v5_export_dgram *); struct netflow_v5_header *header = &dgram->header; struct timespec ts; int error = 0; /* Fill mbuf header. */ m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) * header->count + sizeof(struct netflow_v5_header); /* Fill export header. */ header->sys_uptime = htonl(MILLIUPTIME(time_uptime)); getnanotime(&ts); header->unix_secs = htonl(ts.tv_sec); header->unix_nsecs = htonl(ts.tv_nsec); header->engine_type = 0; header->engine_id = fe->domain_id; header->pad = 0; header->flow_seq = htonl(atomic_fetchadd_32(&fe->flow_seq, header->count)); header->count = htons(header->count); if (priv->export != NULL) NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags); else NG_FREE_ITEM(item); return (error); } /* Add export record to dgram. */ static int export_add(item_p item, struct flow_entry *fle) { struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item), struct netflow_v5_export_dgram *); struct netflow_v5_header *header = &dgram->header; struct netflow_v5_record *rec; rec = &dgram->r[header->count]; header->count ++; KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS, ("ng_netflow: export too big")); /* Fill in export record. */ rec->src_addr = fle->f.r.r_src.s_addr; rec->dst_addr = fle->f.r.r_dst.s_addr; rec->next_hop = fle->f.next_hop.s_addr; rec->i_ifx = htons(fle->f.fle_i_ifx); rec->o_ifx = htons(fle->f.fle_o_ifx); rec->packets = htonl(fle->f.packets); rec->octets = htonl(fle->f.bytes); rec->first = htonl(MILLIUPTIME(fle->f.first)); rec->last = htonl(MILLIUPTIME(fle->f.last)); rec->s_port = fle->f.r.r_sport; rec->d_port = fle->f.r.r_dport; rec->flags = fle->f.tcp_flags; rec->prot = fle->f.r.r_ip_p; rec->tos = fle->f.r.r_tos; rec->dst_mask = fle->f.dst_mask; rec->src_mask = fle->f.src_mask; rec->pad1 = 0; rec->pad2 = 0; /* Not supported fields. */ rec->src_as = rec->dst_as = 0; if (header->count == NETFLOW_V5_MAX_RECORDS) return (1); /* end of datagram */ else return (0); } /* Periodic flow expiry run. */ void ng_netflow_expire(void *arg) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; priv_p priv = (priv_p )arg; int used, i; /* * Going through all the cache. */ used = uma_zone_get_cur(priv->zone); for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) { /* * Skip entries, that are already being worked on. */ if (mtx_trylock(&hsh->mtx) == 0) continue; TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { /* * Interrupt thread wants this entry! * Quick! Quick! Bail out! */ if (hsh->mtx.mtx_lock & MTX_CONTESTED) break; /* * Don't expire aggressively while hash collision * ratio is predicted small. */ if (used <= (NBUCKETS*2) && !INACTIVE(fle)) break; if ((INACTIVE(fle) && (SMALL(fle) || (used > (NBUCKETS*2)))) || AGED(fle)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_NOFLAGS); used--; counter_u64_add(priv->nfinfo_inact_exp, 1); } } mtx_unlock(&hsh->mtx); } #ifdef INET6 used = uma_zone_get_cur(priv->zone6); for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) { struct flow6_entry *fle6; /* * Skip entries, that are already being worked on. */ if (mtx_trylock(&hsh->mtx) == 0) continue; TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { fle6 = (struct flow6_entry *)fle; /* * Interrupt thread wants this entry! * Quick! Quick! Bail out! */ if (hsh->mtx.mtx_lock & MTX_CONTESTED) break; /* * Don't expire aggressively while hash collision * ratio is predicted small. */ if (used <= (NBUCKETS*2) && !INACTIVE(fle6)) break; if ((INACTIVE(fle6) && (SMALL(fle6) || (used > (NBUCKETS*2)))) || AGED(fle6)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_NOFLAGS); used--; counter_u64_add(priv->nfinfo_inact_exp, 1); } } mtx_unlock(&hsh->mtx); } #endif /* Schedule next expire. */ callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire, (void *)priv); } diff --git a/sys/netgraph/netflow/ng_netflow.c b/sys/netgraph/netflow/ng_netflow.c index 6180fe573b60..187e18a3130e 100644 --- a/sys/netgraph/netflow/ng_netflow.c +++ b/sys/netgraph/netflow/ng_netflow.c @@ -1,1038 +1,1051 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2010-2011 Alexander V. Chernikov * Copyright (c) 2004-2005 Gleb Smirnoff * Copyright (c) 2001-2003 Roman V. Palagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $SourceForge: ng_netflow.c,v 1.30 2004/09/05 11:37:43 glebius Exp $ */ #include __FBSDID("$FreeBSD$"); +#include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Netgraph methods */ static ng_constructor_t ng_netflow_constructor; static ng_rcvmsg_t ng_netflow_rcvmsg; static ng_close_t ng_netflow_close; static ng_shutdown_t ng_netflow_rmnode; static ng_newhook_t ng_netflow_newhook; static ng_rcvdata_t ng_netflow_rcvdata; static ng_disconnect_t ng_netflow_disconnect; /* Parse type for struct ng_netflow_info */ static const struct ng_parse_struct_field ng_netflow_info_type_fields[] = NG_NETFLOW_INFO_TYPE; static const struct ng_parse_type ng_netflow_info_type = { &ng_parse_struct_type, &ng_netflow_info_type_fields }; /* Parse type for struct ng_netflow_ifinfo */ static const struct ng_parse_struct_field ng_netflow_ifinfo_type_fields[] = NG_NETFLOW_IFINFO_TYPE; static const struct ng_parse_type ng_netflow_ifinfo_type = { &ng_parse_struct_type, &ng_netflow_ifinfo_type_fields }; /* Parse type for struct ng_netflow_setdlt */ static const struct ng_parse_struct_field ng_netflow_setdlt_type_fields[] = NG_NETFLOW_SETDLT_TYPE; static const struct ng_parse_type ng_netflow_setdlt_type = { &ng_parse_struct_type, &ng_netflow_setdlt_type_fields }; /* Parse type for ng_netflow_setifindex */ static const struct ng_parse_struct_field ng_netflow_setifindex_type_fields[] = NG_NETFLOW_SETIFINDEX_TYPE; static const struct ng_parse_type ng_netflow_setifindex_type = { &ng_parse_struct_type, &ng_netflow_setifindex_type_fields }; /* Parse type for ng_netflow_settimeouts */ static const struct ng_parse_struct_field ng_netflow_settimeouts_type_fields[] = NG_NETFLOW_SETTIMEOUTS_TYPE; static const struct ng_parse_type ng_netflow_settimeouts_type = { &ng_parse_struct_type, &ng_netflow_settimeouts_type_fields }; /* Parse type for ng_netflow_setconfig */ static const struct ng_parse_struct_field ng_netflow_setconfig_type_fields[] = NG_NETFLOW_SETCONFIG_TYPE; static const struct ng_parse_type ng_netflow_setconfig_type = { &ng_parse_struct_type, &ng_netflow_setconfig_type_fields }; /* Parse type for ng_netflow_settemplate */ static const struct ng_parse_struct_field ng_netflow_settemplate_type_fields[] = NG_NETFLOW_SETTEMPLATE_TYPE; static const struct ng_parse_type ng_netflow_settemplate_type = { &ng_parse_struct_type, &ng_netflow_settemplate_type_fields }; /* Parse type for ng_netflow_setmtu */ static const struct ng_parse_struct_field ng_netflow_setmtu_type_fields[] = NG_NETFLOW_SETMTU_TYPE; static const struct ng_parse_type ng_netflow_setmtu_type = { &ng_parse_struct_type, &ng_netflow_setmtu_type_fields }; /* Parse type for struct ng_netflow_v9info */ static const struct ng_parse_struct_field ng_netflow_v9info_type_fields[] = NG_NETFLOW_V9INFO_TYPE; static const struct ng_parse_type ng_netflow_v9info_type = { &ng_parse_struct_type, &ng_netflow_v9info_type_fields }; /* List of commands and how to convert arguments to/from ASCII */ static const struct ng_cmdlist ng_netflow_cmds[] = { { NGM_NETFLOW_COOKIE, NGM_NETFLOW_INFO, "info", NULL, &ng_netflow_info_type }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_IFINFO, "ifinfo", &ng_parse_uint16_type, &ng_netflow_ifinfo_type }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETDLT, "setdlt", &ng_netflow_setdlt_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETIFINDEX, "setifindex", &ng_netflow_setifindex_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETTIMEOUTS, "settimeouts", &ng_netflow_settimeouts_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETCONFIG, "setconfig", &ng_netflow_setconfig_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETTEMPLATE, "settemplate", &ng_netflow_settemplate_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_SETMTU, "setmtu", &ng_netflow_setmtu_type, NULL }, { NGM_NETFLOW_COOKIE, NGM_NETFLOW_V9INFO, "v9info", NULL, &ng_netflow_v9info_type }, { 0 } }; /* Netgraph node type descriptor */ static struct ng_type ng_netflow_typestruct = { .version = NG_ABI_VERSION, .name = NG_NETFLOW_NODE_TYPE, .constructor = ng_netflow_constructor, .rcvmsg = ng_netflow_rcvmsg, .close = ng_netflow_close, .shutdown = ng_netflow_rmnode, .newhook = ng_netflow_newhook, .rcvdata = ng_netflow_rcvdata, .disconnect = ng_netflow_disconnect, .cmdlist = ng_netflow_cmds, }; NETGRAPH_INIT(netflow, &ng_netflow_typestruct); /* Called at node creation */ static int ng_netflow_constructor(node_p node) { priv_p priv; int i; /* Initialize private data */ priv = malloc(sizeof(*priv), M_NETGRAPH, M_WAITOK | M_ZERO); /* Initialize fib data */ priv->maxfibs = rt_numfibs; priv->fib_data = malloc(sizeof(fib_export_p) * priv->maxfibs, M_NETGRAPH, M_WAITOK | M_ZERO); /* Make node and its data point at each other */ NG_NODE_SET_PRIVATE(node, priv); priv->node = node; /* Initialize timeouts to default values */ priv->nfinfo_inact_t = INACTIVE_TIMEOUT; priv->nfinfo_act_t = ACTIVE_TIMEOUT; /* Set default config */ for (i = 0; i < NG_NETFLOW_MAXIFACES; i++) priv->ifaces[i].info.conf = NG_NETFLOW_CONF_INGRESS; /* Initialize callout handle */ callout_init(&priv->exp_callout, 1); /* Allocate memory and set up flow cache */ ng_netflow_cache_init(priv); return (0); } /* * ng_netflow supports two hooks: data and export. * Incoming traffic is expected on data, and expired * netflow datagrams are sent to export. */ static int ng_netflow_newhook(node_p node, hook_p hook, const char *name) { const priv_p priv = NG_NODE_PRIVATE(node); if (strncmp(name, NG_NETFLOW_HOOK_DATA, /* an iface hook? */ strlen(NG_NETFLOW_HOOK_DATA)) == 0) { iface_p iface; int ifnum = -1; const char *cp; char *eptr; cp = name + strlen(NG_NETFLOW_HOOK_DATA); if (!isdigit(*cp) || (cp[0] == '0' && cp[1] != '\0')) return (EINVAL); ifnum = (int)strtoul(cp, &eptr, 10); if (*eptr != '\0' || ifnum < 0 || ifnum >= NG_NETFLOW_MAXIFACES) return (EINVAL); /* See if hook is already connected */ if (priv->ifaces[ifnum].hook != NULL) return (EISCONN); iface = &priv->ifaces[ifnum]; /* Link private info and hook together */ NG_HOOK_SET_PRIVATE(hook, iface); iface->hook = hook; /* * In most cases traffic accounting is done on an * Ethernet interface, so default data link type * will be DLT_EN10MB. */ iface->info.ifinfo_dlt = DLT_EN10MB; } else if (strncmp(name, NG_NETFLOW_HOOK_OUT, strlen(NG_NETFLOW_HOOK_OUT)) == 0) { iface_p iface; int ifnum = -1; const char *cp; char *eptr; cp = name + strlen(NG_NETFLOW_HOOK_OUT); if (!isdigit(*cp) || (cp[0] == '0' && cp[1] != '\0')) return (EINVAL); ifnum = (int)strtoul(cp, &eptr, 10); if (*eptr != '\0' || ifnum < 0 || ifnum >= NG_NETFLOW_MAXIFACES) return (EINVAL); /* See if hook is already connected */ if (priv->ifaces[ifnum].out != NULL) return (EISCONN); iface = &priv->ifaces[ifnum]; /* Link private info and hook together */ NG_HOOK_SET_PRIVATE(hook, iface); iface->out = hook; } else if (strcmp(name, NG_NETFLOW_HOOK_EXPORT) == 0) { if (priv->export != NULL) return (EISCONN); /* Netflow version 5 supports 32-bit counters only */ if (CNTR_MAX == UINT64_MAX) return (EINVAL); priv->export = hook; /* Exporter is ready. Let's schedule expiry. */ callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire, (void *)priv); } else if (strcmp(name, NG_NETFLOW_HOOK_EXPORT9) == 0) { if (priv->export9 != NULL) return (EISCONN); priv->export9 = hook; /* Exporter is ready. Let's schedule expiry. */ callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire, (void *)priv); } else return (EINVAL); return (0); } /* Get a netgraph control message. */ static int ng_netflow_rcvmsg (node_p node, item_p item, hook_p lasthook) { const priv_p priv = NG_NODE_PRIVATE(node); struct ng_mesg *resp = NULL; int error = 0; struct ng_mesg *msg; NGI_GET_MSG(item, msg); /* Deal with message according to cookie and command */ switch (msg->header.typecookie) { case NGM_NETFLOW_COOKIE: switch (msg->header.cmd) { case NGM_NETFLOW_INFO: { struct ng_netflow_info *i; NG_MKRESPONSE(resp, msg, sizeof(struct ng_netflow_info), M_NOWAIT); i = (struct ng_netflow_info *)resp->data; ng_netflow_copyinfo(priv, i); break; } case NGM_NETFLOW_IFINFO: { struct ng_netflow_ifinfo *i; const uint16_t *index; if (msg->header.arglen != sizeof(uint16_t)) ERROUT(EINVAL); index = (uint16_t *)msg->data; if (*index >= NG_NETFLOW_MAXIFACES) ERROUT(EINVAL); /* connected iface? */ if (priv->ifaces[*index].hook == NULL) ERROUT(EINVAL); NG_MKRESPONSE(resp, msg, sizeof(struct ng_netflow_ifinfo), M_NOWAIT); i = (struct ng_netflow_ifinfo *)resp->data; memcpy((void *)i, (void *)&priv->ifaces[*index].info, sizeof(priv->ifaces[*index].info)); break; } case NGM_NETFLOW_SETDLT: { struct ng_netflow_setdlt *set; struct ng_netflow_iface *iface; if (msg->header.arglen != sizeof(struct ng_netflow_setdlt)) ERROUT(EINVAL); set = (struct ng_netflow_setdlt *)msg->data; if (set->iface >= NG_NETFLOW_MAXIFACES) ERROUT(EINVAL); iface = &priv->ifaces[set->iface]; /* connected iface? */ if (iface->hook == NULL) ERROUT(EINVAL); switch (set->dlt) { case DLT_EN10MB: iface->info.ifinfo_dlt = DLT_EN10MB; break; case DLT_RAW: iface->info.ifinfo_dlt = DLT_RAW; break; default: ERROUT(EINVAL); } break; } case NGM_NETFLOW_SETIFINDEX: { struct ng_netflow_setifindex *set; struct ng_netflow_iface *iface; if (msg->header.arglen != sizeof(struct ng_netflow_setifindex)) ERROUT(EINVAL); set = (struct ng_netflow_setifindex *)msg->data; if (set->iface >= NG_NETFLOW_MAXIFACES) ERROUT(EINVAL); iface = &priv->ifaces[set->iface]; /* connected iface? */ if (iface->hook == NULL) ERROUT(EINVAL); iface->info.ifinfo_index = set->index; break; } case NGM_NETFLOW_SETTIMEOUTS: { struct ng_netflow_settimeouts *set; if (msg->header.arglen != sizeof(struct ng_netflow_settimeouts)) ERROUT(EINVAL); set = (struct ng_netflow_settimeouts *)msg->data; priv->nfinfo_inact_t = set->inactive_timeout; priv->nfinfo_act_t = set->active_timeout; break; } case NGM_NETFLOW_SETCONFIG: { struct ng_netflow_setconfig *set; if (msg->header.arglen != sizeof(struct ng_netflow_setconfig)) ERROUT(EINVAL); set = (struct ng_netflow_setconfig *)msg->data; if (set->iface >= NG_NETFLOW_MAXIFACES) ERROUT(EINVAL); priv->ifaces[set->iface].info.conf = set->conf; break; } case NGM_NETFLOW_SETTEMPLATE: { struct ng_netflow_settemplate *set; if (msg->header.arglen != sizeof(struct ng_netflow_settemplate)) ERROUT(EINVAL); set = (struct ng_netflow_settemplate *)msg->data; priv->templ_packets = set->packets; priv->templ_time = set->time; break; } case NGM_NETFLOW_SETMTU: { struct ng_netflow_setmtu *set; if (msg->header.arglen != sizeof(struct ng_netflow_setmtu)) ERROUT(EINVAL); set = (struct ng_netflow_setmtu *)msg->data; if ((set->mtu < MIN_MTU) || (set->mtu > MAX_MTU)) ERROUT(EINVAL); priv->mtu = set->mtu; break; } case NGM_NETFLOW_SHOW: if (msg->header.arglen != sizeof(struct ngnf_show_header)) ERROUT(EINVAL); NG_MKRESPONSE(resp, msg, NGRESP_SIZE, M_NOWAIT); if (!resp) ERROUT(ENOMEM); error = ng_netflow_flow_show(priv, (struct ngnf_show_header *)msg->data, (struct ngnf_show_header *)resp->data); if (error) NG_FREE_MSG(resp); break; case NGM_NETFLOW_V9INFO: { struct ng_netflow_v9info *i; NG_MKRESPONSE(resp, msg, sizeof(struct ng_netflow_v9info), M_NOWAIT); i = (struct ng_netflow_v9info *)resp->data; ng_netflow_copyv9info(priv, i); break; } default: ERROUT(EINVAL); /* unknown command */ break; } break; default: ERROUT(EINVAL); /* incorrect cookie */ break; } /* * Take care of synchronous response, if any. * Free memory and return. */ done: NG_RESPOND_MSG(error, node, item, resp); NG_FREE_MSG(msg); return (error); } /* Receive data on hook. */ static int ng_netflow_rcvdata (hook_p hook, item_p item) { const node_p node = NG_HOOK_NODE(hook); const priv_p priv = NG_NODE_PRIVATE(node); const iface_p iface = NG_HOOK_PRIVATE(hook); hook_p out; struct mbuf *m = NULL, *m_old = NULL; struct ip *ip = NULL; struct ip6_hdr *ip6 = NULL; struct m_tag *mtag; int pullup_len = 0, off; uint8_t acct = 0, bypass = 0, flags = 0, upper_proto = 0; int error = 0, l3_off = 0; unsigned int src_if_index; caddr_t upper_ptr = NULL; fib_export_p fe; uint32_t fib; if ((hook == priv->export) || (hook == priv->export9)) { /* * Data arrived on export hook. * This must not happen. */ log(LOG_ERR, "ng_netflow: incoming data on export hook!\n"); ERROUT(EINVAL); } if (hook == iface->hook) { if ((iface->info.conf & NG_NETFLOW_CONF_INGRESS) == 0) bypass = 1; out = iface->out; } else if (hook == iface->out) { if ((iface->info.conf & NG_NETFLOW_CONF_EGRESS) == 0) bypass = 1; out = iface->hook; } else ERROUT(EINVAL); if ((!bypass) && (iface->info.conf & (NG_NETFLOW_CONF_ONCE | NG_NETFLOW_CONF_THISONCE))) { mtag = m_tag_locate(NGI_M(item), MTAG_NETFLOW, MTAG_NETFLOW_CALLED, NULL); while (mtag != NULL) { if ((iface->info.conf & NG_NETFLOW_CONF_ONCE) || ((ng_ID_t *)(mtag + 1))[0] == NG_NODE_ID(node)) { bypass = 1; break; } mtag = m_tag_locate(NGI_M(item), MTAG_NETFLOW, MTAG_NETFLOW_CALLED, mtag); } } if (bypass) { if (out == NULL) ERROUT(ENOTCONN); NG_FWD_ITEM_HOOK(error, item, out); return (error); } if (iface->info.conf & (NG_NETFLOW_CONF_ONCE | NG_NETFLOW_CONF_THISONCE)) { mtag = m_tag_alloc(MTAG_NETFLOW, MTAG_NETFLOW_CALLED, sizeof(ng_ID_t), M_NOWAIT); if (mtag) { ((ng_ID_t *)(mtag + 1))[0] = NG_NODE_ID(node); m_tag_prepend(NGI_M(item), mtag); } } /* Import configuration flags related to flow creation */ flags = iface->info.conf & NG_NETFLOW_FLOW_FLAGS; NGI_GET_M(item, m); m_old = m; /* Increase counters. */ iface->info.ifinfo_packets++; /* * Depending on interface data link type and packet contents * we pullup enough data, so that ng_netflow_flow_add() does not * need to know about mbuf at all. We keep current length of data * needed to be contiguous in pullup_len. mtod() is done at the * very end one more time, since m can had changed after pulluping. * * In case of unrecognized data we don't return error, but just * pass data to downstream hook, if it is available. */ #define M_CHECK(length) do { \ pullup_len += length; \ if (((m)->m_pkthdr.len < (pullup_len)) || \ ((pullup_len) > MHLEN)) { \ error = EINVAL; \ goto bypass; \ } \ if ((m)->m_len < (pullup_len) && \ (((m) = m_pullup((m),(pullup_len))) == NULL)) { \ error = ENOBUFS; \ goto done; \ } \ } while (0) switch (iface->info.ifinfo_dlt) { case DLT_EN10MB: /* Ethernet */ { struct ether_header *eh; uint16_t etype; M_CHECK(sizeof(struct ether_header)); eh = mtod(m, struct ether_header *); /* Make sure this is IP frame. */ etype = ntohs(eh->ether_type); switch (etype) { +#ifdef INET case ETHERTYPE_IP: M_CHECK(sizeof(struct ip)); eh = mtod(m, struct ether_header *); ip = (struct ip *)(eh + 1); l3_off = sizeof(struct ether_header); break; +#endif #ifdef INET6 case ETHERTYPE_IPV6: /* * m_pullup() called by M_CHECK() pullups * kern.ipc.max_protohdr (default 60 bytes) * which is enough. */ M_CHECK(sizeof(struct ip6_hdr)); eh = mtod(m, struct ether_header *); ip6 = (struct ip6_hdr *)(eh + 1); l3_off = sizeof(struct ether_header); break; #endif case ETHERTYPE_VLAN: { struct ether_vlan_header *evh; M_CHECK(sizeof(struct ether_vlan_header) - sizeof(struct ether_header)); evh = mtod(m, struct ether_vlan_header *); etype = ntohs(evh->evl_proto); l3_off = sizeof(struct ether_vlan_header); if (etype == ETHERTYPE_IP) { +#ifdef INET M_CHECK(sizeof(struct ip)); ip = (struct ip *)(evh + 1); break; +#endif #ifdef INET6 } else if (etype == ETHERTYPE_IPV6) { M_CHECK(sizeof(struct ip6_hdr)); ip6 = (struct ip6_hdr *)(evh + 1); break; #endif } } default: goto bypass; /* pass this frame */ } break; } case DLT_RAW: /* IP packets */ M_CHECK(sizeof(struct ip)); ip = mtod(m, struct ip *); /* l3_off is already zero */ #ifdef INET6 /* * If INET6 is not defined IPv6 packets * will be discarded in ng_netflow_flow_add(). */ if (ip->ip_v == IP6VERSION) { ip = NULL; M_CHECK(sizeof(struct ip6_hdr) - sizeof(struct ip)); ip6 = mtod(m, struct ip6_hdr *); } +#endif +#ifndef INET + ip = NULL; #endif break; default: goto bypass; break; } off = pullup_len; if ((ip != NULL) && ((ip->ip_off & htons(IP_OFFMASK)) == 0)) { if ((ip->ip_v != IPVERSION) || ((ip->ip_hl << 2) < sizeof(struct ip))) goto bypass; /* * In case of IPv4 header with options, we haven't pulled * up enough, yet. */ M_CHECK((ip->ip_hl << 2) - sizeof(struct ip)); /* Save upper layer offset and proto */ off = pullup_len; upper_proto = ip->ip_p; /* * XXX: in case of wrong upper layer header we will * forward this packet but skip this record in netflow. */ switch (ip->ip_p) { case IPPROTO_TCP: M_CHECK(sizeof(struct tcphdr)); break; case IPPROTO_UDP: M_CHECK(sizeof(struct udphdr)); break; case IPPROTO_SCTP: M_CHECK(sizeof(struct sctphdr)); break; } } else if (ip != NULL) { /* * Nothing to save except upper layer proto, * since this is a packet fragment. */ flags |= NG_NETFLOW_IS_FRAG; upper_proto = ip->ip_p; if ((ip->ip_v != IPVERSION) || ((ip->ip_hl << 2) < sizeof(struct ip))) goto bypass; #ifdef INET6 } else if (ip6 != NULL) { int cur = ip6->ip6_nxt, hdr_off = 0; struct ip6_ext *ip6e; struct ip6_frag *ip6f; if (priv->export9 == NULL) goto bypass; /* Save upper layer info. */ off = pullup_len; upper_proto = cur; if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) goto bypass; /* * Loop through IPv6 extended headers to get upper * layer header / frag. */ for (;;) { switch (cur) { /* * Same as in IPv4, we can forward a 'bad' * packet without accounting. */ case IPPROTO_TCP: M_CHECK(sizeof(struct tcphdr)); goto loopend; case IPPROTO_UDP: M_CHECK(sizeof(struct udphdr)); goto loopend; case IPPROTO_SCTP: M_CHECK(sizeof(struct sctphdr)); goto loopend; /* Loop until 'real' upper layer headers */ case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: M_CHECK(sizeof(struct ip6_ext)); ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off); upper_proto = ip6e->ip6e_nxt; hdr_off = (ip6e->ip6e_len + 1) << 3; break; /* RFC4302, can be before DSTOPTS */ case IPPROTO_AH: M_CHECK(sizeof(struct ip6_ext)); ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off); upper_proto = ip6e->ip6e_nxt; hdr_off = (ip6e->ip6e_len + 2) << 2; break; case IPPROTO_FRAGMENT: M_CHECK(sizeof(struct ip6_frag)); ip6f = (struct ip6_frag *)(mtod(m, caddr_t) + off); upper_proto = ip6f->ip6f_nxt; hdr_off = sizeof(struct ip6_frag); off += hdr_off; flags |= NG_NETFLOW_IS_FRAG; goto loopend; #if 0 case IPPROTO_NONE: goto loopend; #endif /* * Any unknown header (new extension or IPv6/IPv4 * header for tunnels) ends loop. */ default: goto loopend; } off += hdr_off; cur = upper_proto; } #endif } #undef M_CHECK #ifdef INET6 loopend: #endif /* Just in case of real reallocation in M_CHECK() / m_pullup() */ if (m != m_old) { priv->nfinfo_realloc_mbuf++; /* Restore ip/ipv6 pointer */ if (ip != NULL) ip = (struct ip *)(mtod(m, caddr_t) + l3_off); else if (ip6 != NULL) ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + l3_off); } upper_ptr = (caddr_t)(mtod(m, caddr_t) + off); /* Determine packet input interface. Prefer configured. */ src_if_index = 0; if (hook == iface->out || iface->info.ifinfo_index == 0) { if (m->m_pkthdr.rcvif != NULL) src_if_index = m->m_pkthdr.rcvif->if_index; } else src_if_index = iface->info.ifinfo_index; /* Check packet FIB */ fib = M_GETFIB(m); if (fib >= priv->maxfibs) { CTR2(KTR_NET, "ng_netflow_rcvdata(): packet fib %d is out of " "range of available fibs: 0 .. %d", fib, priv->maxfibs); goto bypass; } if ((fe = priv_to_fib(priv, fib)) == NULL) { /* Setup new FIB */ if (ng_netflow_fib_init(priv, fib) != 0) { /* malloc() failed */ goto bypass; } fe = priv_to_fib(priv, fib); } +#ifdef INET if (ip != NULL) error = ng_netflow_flow_add(priv, fe, ip, upper_ptr, upper_proto, flags, src_if_index); -#ifdef INET6 - else if (ip6 != NULL) +#endif +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET6 + if (ip6 != NULL) error = ng_netflow_flow6_add(priv, fe, ip6, upper_ptr, upper_proto, flags, src_if_index); #endif else goto bypass; acct = 1; bypass: if (out != NULL) { if (acct == 0) { /* Accounting failure */ if (ip != NULL) { counter_u64_add(priv->nfinfo_spackets, 1); counter_u64_add(priv->nfinfo_sbytes, m->m_pkthdr.len); } else if (ip6 != NULL) { counter_u64_add(priv->nfinfo_spackets6, 1); counter_u64_add(priv->nfinfo_sbytes6, m->m_pkthdr.len); } } /* XXX: error gets overwritten here */ NG_FWD_NEW_DATA(error, item, out, m); return (error); } done: if (item) NG_FREE_ITEM(item); if (m) NG_FREE_M(m); return (error); } /* We will be shut down in a moment */ static int ng_netflow_close(node_p node) { const priv_p priv = NG_NODE_PRIVATE(node); callout_drain(&priv->exp_callout); ng_netflow_cache_flush(priv); return (0); } /* Do local shutdown processing. */ static int ng_netflow_rmnode(node_p node) { const priv_p priv = NG_NODE_PRIVATE(node); NG_NODE_SET_PRIVATE(node, NULL); NG_NODE_UNREF(priv->node); free(priv->fib_data, M_NETGRAPH); free(priv, M_NETGRAPH); return (0); } /* Hook disconnection. */ static int ng_netflow_disconnect(hook_p hook) { node_p node = NG_HOOK_NODE(hook); priv_p priv = NG_NODE_PRIVATE(node); iface_p iface = NG_HOOK_PRIVATE(hook); if (iface != NULL) { if (iface->hook == hook) iface->hook = NULL; if (iface->out == hook) iface->out = NULL; } /* if export hook disconnected stop running expire(). */ if (hook == priv->export) { if (priv->export9 == NULL) callout_drain(&priv->exp_callout); priv->export = NULL; } if (hook == priv->export9) { if (priv->export == NULL) callout_drain(&priv->exp_callout); priv->export9 = NULL; } /* Removal of the last link destroys the node. */ if (NG_NODE_NUMHOOKS(node) == 0) ng_rmnode_self(node); return (0); }