diff --git a/lib/libifconfig/libifconfig.h b/lib/libifconfig/libifconfig.h index 03a0d0b199b7..8d5ca01b0ce6 100644 --- a/lib/libifconfig/libifconfig.h +++ b/lib/libifconfig/libifconfig.h @@ -1,377 +1,380 @@ /* * Copyright (c) 2016-2017, Marie Helene Kvello-Aune * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * thislist of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation and/or * other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #pragma once #include #include #include #include #include #define ND6_IFF_DEFAULTIF 0x8000 typedef enum { OK = 0, OTHER, IOCTL, SOCKET, NETLINK } ifconfig_errtype; /* * Opaque definition so calling application can just pass a * pointer to it for library use. */ struct ifconfig_handle; typedef struct ifconfig_handle ifconfig_handle_t; struct ifaddrs; struct ifbropreq; struct ifbreq; struct in6_ndireq; struct lagg_reqall; struct lagg_reqflags; struct lagg_reqopts; struct lagg_reqport; /** Stores extra info associated with a bridge(4) interface */ struct ifconfig_bridge_status { struct ifbropreq *params; /**< current operational parameters */ struct ifbreq *members; /**< list of bridge members */ size_t members_count; /**< how many member interfaces */ uint32_t cache_size; /**< size of address cache */ uint32_t cache_lifetime; /**< address cache entry lifetime */ }; struct ifconfig_capabilities { /** Current capabilities (ifconfig prints this as 'options')*/ int curcap; /** Requested capabilities (ifconfig prints this as 'capabilities')*/ int reqcap; }; /** Stores extra info associated with an inet address */ struct ifconfig_inet_addr { const struct sockaddr_in *sin; const struct sockaddr_in *netmask; const struct sockaddr_in *dst; const struct sockaddr_in *broadcast; int prefixlen; uint8_t vhid; }; /** Stores extra info associated with an inet6 address */ struct ifconfig_inet6_addr { struct sockaddr_in6 *sin6; struct sockaddr_in6 *dstin6; struct in6_addrlifetime lifetime; int prefixlen; uint32_t flags; uint8_t vhid; }; /** Stores extra info associated with a lagg(4) interface */ struct ifconfig_lagg_status { struct lagg_reqall *ra; struct lagg_reqopts *ro; struct lagg_reqflags *rf; }; /** Retrieves a new state object for use in other API calls. * Example usage: *{@code * // Create state object * ifconfig_handle_t *lifh; * lifh = ifconfig_open(); * if (lifh == NULL) { * // Handle error * } * * // Do stuff with the handle * * // Dispose of the state object * ifconfig_close(lifh); * lifh = NULL; *} */ ifconfig_handle_t *ifconfig_open(void); /** Frees resources held in the provided state object. * @param h The state object to close. * @see #ifconfig_open(void) */ void ifconfig_close(ifconfig_handle_t *h); /** Identifies what kind of error occurred. */ ifconfig_errtype ifconfig_err_errtype(ifconfig_handle_t *h); /** Retrieves the errno associated with the error, if any. */ int ifconfig_err_errno(ifconfig_handle_t *h); typedef void (*ifconfig_foreach_func_t)(ifconfig_handle_t *h, struct ifaddrs *ifa, void *udata); /** Iterate over every network interface * @param h An open ifconfig state object * @param cb A callback function to call with a pointer to each interface * @param udata An opaque value that will be passed to the callback. * @return 0 on success, nonzero if the list could not be iterated */ int ifconfig_foreach_iface(ifconfig_handle_t *h, ifconfig_foreach_func_t cb, void *udata); /** Iterate over every address on a single network interface * @param h An open ifconfig state object * @param ifa A pointer that was supplied by a previous call to * ifconfig_foreach_iface * @param udata An opaque value that will be passed to the callback. * @param cb A callback function to call with a pointer to each ifaddr */ void ifconfig_foreach_ifaddr(ifconfig_handle_t *h, struct ifaddrs *ifa, ifconfig_foreach_func_t cb, void *udata); /** If error type was IOCTL, this identifies which request failed. */ unsigned long ifconfig_err_ioctlreq(ifconfig_handle_t *h); int ifconfig_get_description(ifconfig_handle_t *h, const char *name, char **description); int ifconfig_set_description(ifconfig_handle_t *h, const char *name, const char *newdescription); int ifconfig_unset_description(ifconfig_handle_t *h, const char *name); int ifconfig_set_name(ifconfig_handle_t *h, const char *name, const char *newname); int ifconfig_get_orig_name(ifconfig_handle_t *h, const char *ifname, char **orig_name); int ifconfig_set_fib(ifconfig_handle_t *h, const char *name, int fib); int ifconfig_get_fib(ifconfig_handle_t *h, const char *name, int *fib); int ifconfig_set_mtu(ifconfig_handle_t *h, const char *name, const int mtu); int ifconfig_get_mtu(ifconfig_handle_t *h, const char *name, int *mtu); int ifconfig_get_nd6(ifconfig_handle_t *h, const char *name, struct in6_ndireq *nd); int ifconfig_set_metric(ifconfig_handle_t *h, const char *name, const int metric); int ifconfig_get_metric(ifconfig_handle_t *h, const char *name, int *metric); int ifconfig_set_capability(ifconfig_handle_t *h, const char *name, const int capability); int ifconfig_get_capability(ifconfig_handle_t *h, const char *name, struct ifconfig_capabilities *capability); /** Retrieve the list of groups to which this interface belongs * @param h An open ifconfig state object * @param name The interface name * @param ifgr return argument. The caller is responsible for freeing * ifgr->ifgr_groups * @return 0 on success, nonzero on failure */ int ifconfig_get_groups(ifconfig_handle_t *h, const char *name, struct ifgroupreq *ifgr); int ifconfig_get_ifstatus(ifconfig_handle_t *h, const char *name, struct ifstat *stat); /** Retrieve the interface media information * @param h An open ifconfig state object * @param name The interface name * @param ifmr Return argument. The caller is responsible for freeing it * @return 0 on success, nonzero on failure */ int ifconfig_media_get_mediareq(ifconfig_handle_t *h, const char *name, struct ifmediareq **ifmr); const char *ifconfig_media_get_status(const struct ifmediareq *ifmr); typedef int ifmedia_t; #define INVALID_IFMEDIA ((ifmedia_t)-1) /** Retrieve the name of a media type * @param media The media to be named * @return A pointer to the media type name, or NULL on failure */ const char *ifconfig_media_get_type(ifmedia_t media); /** Retrieve a media type by its name * @param name The name of a media type * @return The media type value, or INVALID_IFMEDIA on failure */ ifmedia_t ifconfig_media_lookup_type(const char *name); /** Retrieve the name of a media subtype * @param media The media subtype to be named * @return A pointer to the media subtype name, or NULL on failure */ const char *ifconfig_media_get_subtype(ifmedia_t media); /** Retrieve a media subtype by its name * @param media The top level media type whose subtype we want * @param name The name of a media subtype * @return The media subtype value, or INVALID_IFMEDIA on failure */ ifmedia_t ifconfig_media_lookup_subtype(ifmedia_t media, const char *name); /** Retrieve the name of a media mode * @param media The media mode to be named * @return A pointer to the media mode name, or NULL on failure */ const char *ifconfig_media_get_mode(ifmedia_t media); /** Retrieve a media mode by its name * @param media The top level media type whose mode we want * @param name The name of a media mode * @return The media mode value, or INVALID_IFMEDIA on failure */ ifmedia_t ifconfig_media_lookup_mode(ifmedia_t media, const char *name); /** Retrieve an array of media options * @param media The media for which to obtain the options * @return Pointer to an array of pointers to option names, * terminated by a NULL pointer, or simply NULL on failure. * The caller is responsible for freeing the array but not its * contents. */ const char **ifconfig_media_get_options(ifmedia_t media); /** Retrieve an array of media options by names * @param media The top level media type whose options we want * @param opts Pointer to an array of string pointers naming options * @param nopts Number of elements in the opts array * @return Pointer to an array of media options, one for each option named * in opts. NULL is returned instead with errno set to ENOMEM if * allocating the return array fails or EINVAL if media is not * valid. A media option in the array will be INVALID_IFMEDIA * when lookup failed for the option named in that position in * opts. The caller is responsible for freeing the array. */ ifmedia_t *ifconfig_media_lookup_options(ifmedia_t media, const char **opts, size_t nopts); /** Retrieve the reason the interface is down * @param h An open ifconfig state object * @param name The interface name * @param ifdr Return argument. * @return 0 on success, nonzero on failure */ int ifconfig_media_get_downreason(ifconfig_handle_t *h, const char *name, struct ifdownreason *ifdr); struct ifconfig_carp { size_t carpr_count; uint32_t carpr_vhid; uint32_t carpr_state; int32_t carpr_advbase; int32_t carpr_advskew; uint8_t carpr_key[CARP_KEY_LEN]; struct in_addr carpr_addr; struct in6_addr carpr_addr6; + carp_version_t carpr_version; + uint8_t carpr_vrrp_prio; + uint16_t carpr_vrrp_adv_inter; }; int ifconfig_carp_get_vhid(ifconfig_handle_t *h, const char *name, struct ifconfig_carp *carpr, uint32_t vhid); int ifconfig_carp_get_info(ifconfig_handle_t *h, const char *name, struct ifconfig_carp *carpr, size_t ncarp); int ifconfig_carp_set_info(ifconfig_handle_t *h, const char *name, const struct ifconfig_carp *carpr); /** Retrieve additional information about an inet address * @param h An open ifconfig state object * @param name The interface name * @param ifa Pointer to the address structure of interest * @param addr Return argument. It will be filled with additional information * about the address. * @return 0 on success, nonzero on failure. */ int ifconfig_inet_get_addrinfo(ifconfig_handle_t *h, const char *name, struct ifaddrs *ifa, struct ifconfig_inet_addr *addr); /** Retrieve additional information about an inet6 address * @param h An open ifconfig state object * @param name The interface name * @param ifa Pointer to the address structure of interest * @param addr Return argument. It will be filled with additional information * about the address. * @return 0 on success, nonzero on failure. */ int ifconfig_inet6_get_addrinfo(ifconfig_handle_t *h, const char *name, struct ifaddrs *ifa, struct ifconfig_inet6_addr *addr); /** Retrieve additional information about a bridge(4) interface */ int ifconfig_bridge_get_bridge_status(ifconfig_handle_t *h, const char *name, struct ifconfig_bridge_status **bridge); /** Frees the structure returned by ifconfig_bridge_get_bridge_status. Does * nothing if the argument is NULL * @param bridge Pointer to the structure to free */ void ifconfig_bridge_free_bridge_status(struct ifconfig_bridge_status *bridge); /** Retrieve additional information about a lagg(4) interface */ int ifconfig_lagg_get_lagg_status(ifconfig_handle_t *h, const char *name, struct ifconfig_lagg_status **lagg_status); /** Retrieve additional information about a member of a lagg(4) interface */ int ifconfig_lagg_get_laggport_status(ifconfig_handle_t *h, const char *name, struct lagg_reqport *rp); /** Frees the structure returned by ifconfig_lagg_get_lagg_status. Does * nothing if the argument is NULL * @param laggstat Pointer to the structure to free */ void ifconfig_lagg_free_lagg_status(struct ifconfig_lagg_status *laggstat); /** Destroy a virtual interface * @param name Interface to destroy */ int ifconfig_destroy_interface(ifconfig_handle_t *h, const char *name); /** Creates a (virtual) interface * @param name Name of interface to create. Example: bridge or bridge42 * @param name ifname Is set to actual name of created interface */ int ifconfig_create_interface(ifconfig_handle_t *h, const char *name, char **ifname); /** Creates a (virtual) interface * @param name Name of interface to create. Example: vlan0 or ix0.50 * @param name ifname Is set to actual name of created interface * @param vlandev Name of interface to attach to * @param vlanid VLAN ID/Tag. Must not be 0. */ int ifconfig_create_interface_vlan(ifconfig_handle_t *h, const char *name, char **ifname, const char *vlandev, const unsigned short vlantag); int ifconfig_set_vlantag(ifconfig_handle_t *h, const char *name, const char *vlandev, const unsigned short vlantag); /** Gets the names of all interface cloners available on the system * @param bufp Set to the address of the names buffer on success or NULL * if an error occurs. This buffer must be freed when done. * @param lenp Set to the number of names in the returned buffer or 0 * if an error occurs. Each name is contained within an * IFNAMSIZ length slice of the buffer, for a total buffer * length of *lenp * IFNAMSIZ bytes. */ int ifconfig_list_cloners(ifconfig_handle_t *h, char **bufp, size_t *lenp); diff --git a/lib/libifconfig/libifconfig_carp.c b/lib/libifconfig/libifconfig_carp.c index 40556eda813f..59faa8def496 100644 --- a/lib/libifconfig/libifconfig_carp.c +++ b/lib/libifconfig/libifconfig_carp.c @@ -1,213 +1,219 @@ /* * Copyright (c) 1983, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "libifconfig.h" #include "libifconfig_internal.h" #include #define _OUT(_field) offsetof(struct ifconfig_carp, _field) static struct snl_attr_parser ap_carp_get[] = { { .type = CARP_NL_VHID, .off = _OUT(carpr_vhid), .cb = snl_attr_get_uint32 }, { .type = CARP_NL_STATE, .off = _OUT(carpr_state), .cb = snl_attr_get_uint32 }, { .type = CARP_NL_ADVBASE, .off = _OUT(carpr_advbase), .cb = snl_attr_get_int32 }, { .type = CARP_NL_ADVSKEW, .off = _OUT(carpr_advskew), .cb = snl_attr_get_int32 }, { .type = CARP_NL_KEY, .off = _OUT(carpr_key), .cb = snl_attr_copy_string, .arg_u32 = CARP_KEY_LEN }, { .type = CARP_NL_ADDR, .off = _OUT(carpr_addr), .cb = snl_attr_get_in_addr }, { .type = CARP_NL_ADDR6, .off = _OUT(carpr_addr6), .cb = snl_attr_get_in6_addr }, + { .type = CARP_NL_VERSION, .off = _OUT(carpr_version), .cb = snl_attr_get_uint8 }, + { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(carpr_vrrp_prio), .cb = snl_attr_get_uint8 }, + { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(carpr_vrrp_adv_inter), .cb = snl_attr_get_uint16 }, }; #undef _OUT SNL_DECLARE_GENL_PARSER(carp_get_parser, ap_carp_get); static int _ifconfig_carp_get(ifconfig_handle_t *h, const char *name, struct ifconfig_carp *carp, size_t ncarp, uint32_t vhid) { struct snl_state ss = {}; struct snl_errmsg_data e = {}; struct snl_writer nw; struct nlmsghdr *hdr; size_t i = 0; uint32_t seq_id; int family_id; ifconfig_error_clear(h); bzero(carp, sizeof(*carp) * ncarp); if (! snl_init(&ss, NETLINK_GENERIC)) { ifconfig_error(h, NETLINK, ENOTSUP); return (-1); } snl_init_writer(&ss, &nw); family_id = snl_get_genl_family(&ss, CARP_NL_FAMILY_NAME); if (family_id == 0) { ifconfig_error(h, NETLINK, EPROTONOSUPPORT); goto out; } hdr = snl_create_genl_msg_request(&nw, family_id, CARP_NL_CMD_GET); hdr->nlmsg_flags |= NLM_F_DUMP; snl_add_msg_attr_string(&nw, CARP_NL_IFNAME, name); if (vhid != 0) snl_add_msg_attr_u32(&nw, CARP_NL_VHID, vhid); hdr = snl_finalize_msg(&nw); if (hdr == NULL) { ifconfig_error(h, NETLINK, ENOMEM); goto out; } seq_id = hdr->nlmsg_seq; if (! snl_send_message(&ss, hdr)) { ifconfig_error(h, NETLINK, EIO); goto out; } while ((hdr = snl_read_reply_multi(&ss, seq_id, &e)) != NULL) { if (e.error != 0) { ifconfig_error(h, NETLINK, e.error); break; } if (i >= ncarp) { ifconfig_error(h, NETLINK, E2BIG); break; } memset(&carp[i], 0, sizeof(carp[0])); if (! snl_parse_nlmsg(&ss, hdr, &carp_get_parser, &carp[i])) continue; i++; carp[0].carpr_count = i; if (i > ncarp) { ifconfig_error(h, NETLINK, E2BIG); break; } } out: snl_free(&ss); return (h->error.errcode ? -1 : 0); } int ifconfig_carp_set_info(ifconfig_handle_t *h, const char *name, const struct ifconfig_carp *carpr) { struct snl_state ss = {}; struct snl_writer nw; struct nlmsghdr *hdr; int family_id; uint32_t seq_id; ifconfig_error_clear(h); if (! snl_init(&ss, NETLINK_GENERIC)) { ifconfig_error(h, NETLINK, ENOTSUP); return (-1); } snl_init_writer(&ss, &nw); family_id = snl_get_genl_family(&ss, CARP_NL_FAMILY_NAME); if (family_id == 0) { ifconfig_error(h, NETLINK, EPROTONOSUPPORT); return (-1); } hdr = snl_create_genl_msg_request(&nw, family_id, CARP_NL_CMD_SET); snl_add_msg_attr_u32(&nw, CARP_NL_VHID, carpr->carpr_vhid); snl_add_msg_attr_u32(&nw, CARP_NL_STATE, carpr->carpr_state); snl_add_msg_attr_s32(&nw, CARP_NL_ADVBASE, carpr->carpr_advbase); snl_add_msg_attr_s32(&nw, CARP_NL_ADVSKEW, carpr->carpr_advskew); snl_add_msg_attr_string(&nw, CARP_NL_IFNAME, name); snl_add_msg_attr(&nw, CARP_NL_ADDR, sizeof(carpr->carpr_addr), &carpr->carpr_addr); snl_add_msg_attr(&nw, CARP_NL_ADDR6, sizeof(carpr->carpr_addr6), &carpr->carpr_addr6); snl_add_msg_attr_string(&nw, CARP_NL_KEY, carpr->carpr_key); + snl_add_msg_attr_u8(&nw, CARP_NL_VERSION, carpr->carpr_version); + snl_add_msg_attr_u8(&nw, CARP_NL_VRRP_PRIORITY, carpr->carpr_vrrp_prio); + snl_add_msg_attr_u16(&nw, CARP_NL_VRRP_ADV_INTER, carpr->carpr_vrrp_adv_inter); hdr = snl_finalize_msg(&nw); if (hdr == NULL) { ifconfig_error(h, NETLINK, ENOMEM); goto out; } seq_id = hdr->nlmsg_seq; if (! snl_send_message(&ss, hdr)) { ifconfig_error(h, NETLINK, EIO); goto out; } struct snl_errmsg_data e = { }; if (! snl_read_reply_code(&ss, seq_id, &e)) ifconfig_error(h, NETLINK, e.error); out: snl_free(&ss); return (h->error.errcode ? -1 : 0); } int ifconfig_carp_get_vhid(ifconfig_handle_t *h, const char *name, struct ifconfig_carp *carp, uint32_t vhid) { return (_ifconfig_carp_get(h, name, carp, 1, vhid)); } int ifconfig_carp_get_info(ifconfig_handle_t *h, const char *name, struct ifconfig_carp *carp, size_t ncarp) { return (_ifconfig_carp_get(h, name, carp, ncarp, 0)); } diff --git a/sbin/ifconfig/carp.c b/sbin/ifconfig/carp.c index dd09838b5d8b..36b5d85ab7b8 100644 --- a/sbin/ifconfig/carp.c +++ b/sbin/ifconfig/carp.c @@ -1,256 +1,302 @@ /* from $OpenBSD: ifconfig.c,v 1.82 2003/10/19 05:43:35 mcbride Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Michael Shalayeff. All rights reserved. * Copyright (c) 2003 Ryan McBride. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifconfig.h" static const char *carp_states[] = { CARP_STATES }; static void setcarp_callback(if_ctx *, void *); static int carpr_vhid = -1; static int carpr_advskew = -1; static int carpr_advbase = -1; static int carpr_state = -1; static struct in_addr carp_addr; static struct in6_addr carp_addr6; static unsigned char const *carpr_key; +static carp_version_t carpr_version; +static uint8_t carpr_vrrp_prio; +static uint16_t carpr_vrrp_adv_inter; static void carp_status(if_ctx *ctx) { struct ifconfig_carp carpr[CARP_MAXVHID]; char addr_buf[NI_MAXHOST]; if (ifconfig_carp_get_info(lifh, ctx->ifname, carpr, CARP_MAXVHID) == -1) return; for (size_t i = 0; i < carpr[0].carpr_count; i++) { - printf("\tcarp: %s vhid %d advbase %d advskew %d", - carp_states[carpr[i].carpr_state], carpr[i].carpr_vhid, - carpr[i].carpr_advbase, carpr[i].carpr_advskew); - if (ctx->args->printkeys && carpr[i].carpr_key[0] != '\0') - printf(" key \"%s\"\n", carpr[i].carpr_key); - else - printf("\n"); - - inet_ntop(AF_INET6, &carpr[i].carpr_addr6, addr_buf, - sizeof(addr_buf)); - - printf("\t peer %s peer6 %s\n", - inet_ntoa(carpr[i].carpr_addr), addr_buf); + switch (carpr[i].carpr_version) { + case CARP_VERSION_CARP: + printf("\tcarp: %s vhid %d advbase %d advskew %d", + carp_states[carpr[i].carpr_state], carpr[i].carpr_vhid, + carpr[i].carpr_advbase, carpr[i].carpr_advskew); + if (ctx->args->printkeys && carpr[i].carpr_key[0] != '\0') + printf(" key \"%s\"\n", carpr[i].carpr_key); + else + printf("\n"); + + inet_ntop(AF_INET6, &carpr[i].carpr_addr6, addr_buf, + sizeof(addr_buf)); + + printf("\t peer %s peer6 %s\n", + inet_ntoa(carpr[i].carpr_addr), addr_buf); + break; + case CARP_VERSION_VRRPv3: + printf("\tvrrp: %s vrid %d prio %d interval %d\n", + carp_states[carpr[i].carpr_state], carpr[i].carpr_vhid, + carpr[i].carpr_vrrp_prio, carpr[i].carpr_vrrp_adv_inter); + break; + } } } static void setcarp_vhid(if_ctx *ctx, const char *val, int dummy __unused) { const struct afswtch *afp = ctx->afp; carpr_vhid = atoi(val); if (carpr_vhid <= 0 || carpr_vhid > CARP_MAXVHID) errx(1, "vhid must be greater than 0 and less than %u", CARP_MAXVHID); if (afp->af_setvhid == NULL) errx(1, "%s doesn't support carp(4)", afp->af_name); afp->af_setvhid(carpr_vhid); callback_register(setcarp_callback, NULL); } static void setcarp_callback(if_ctx *ctx, void *arg __unused) { struct ifconfig_carp carpr = { }; if (ifconfig_carp_get_vhid(lifh, ctx->ifname, &carpr, carpr_vhid) == -1) { if (ifconfig_err_errno(lifh) != ENOENT) return; } carpr.carpr_vhid = carpr_vhid; if (carpr_key != NULL) /* XXX Should hash the password into the key here? */ strlcpy(carpr.carpr_key, carpr_key, CARP_KEY_LEN); if (carpr_advskew > -1) carpr.carpr_advskew = carpr_advskew; if (carpr_advbase > -1) carpr.carpr_advbase = carpr_advbase; if (carpr_state > -1) carpr.carpr_state = carpr_state; if (carp_addr.s_addr != INADDR_ANY) carpr.carpr_addr = carp_addr; if (! IN6_IS_ADDR_UNSPECIFIED(&carp_addr6)) memcpy(&carpr.carpr_addr6, &carp_addr6, sizeof(carp_addr6)); + if (carpr_version != 0) + carpr.carpr_version = carpr_version; + if (carpr_vrrp_prio != 0) + carpr.carpr_vrrp_prio = carpr_vrrp_prio; + if (carpr_vrrp_adv_inter != 0) + carpr.carpr_vrrp_adv_inter = carpr_vrrp_adv_inter; if (ifconfig_carp_set_info(lifh, ctx->ifname, &carpr)) err(1, "SIOCSVH"); } static void setcarp_passwd(if_ctx *ctx __unused, const char *val, int dummy __unused) { if (carpr_vhid == -1) errx(1, "passwd requires vhid"); carpr_key = val; } static void setcarp_advskew(if_ctx *ctx __unused, const char *val, int dummy __unused) { if (carpr_vhid == -1) errx(1, "advskew requires vhid"); carpr_advskew = atoi(val); } static void setcarp_advbase(if_ctx *ctx __unused, const char *val, int dummy __unused) { if (carpr_vhid == -1) errx(1, "advbase requires vhid"); carpr_advbase = atoi(val); } static void setcarp_state(if_ctx *ctx __unused, const char *val, int dummy __unused) { int i; if (carpr_vhid == -1) errx(1, "state requires vhid"); for (i = 0; i <= CARP_MAXSTATE; i++) if (strcasecmp(carp_states[i], val) == 0) { carpr_state = i; return; } errx(1, "unknown state"); } static void setcarp_peer(if_ctx *ctx __unused, const char *val, int dummy __unused) { carp_addr.s_addr = inet_addr(val); } static void setcarp_mcast(if_ctx *ctx __unused, const char *val __unused, int dummy __unused) { carp_addr.s_addr = htonl(INADDR_CARP_GROUP); } static void setcarp_peer6(if_ctx *ctx __unused, const char *val, int dummy __unused) { struct addrinfo hints, *res; memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_INET6; hints.ai_flags = AI_NUMERICHOST; if (getaddrinfo(val, NULL, &hints, &res) != 0) errx(1, "Invalid IPv6 address %s", val); memcpy(&carp_addr6, &(satosin6(res->ai_addr))->sin6_addr, sizeof(carp_addr6)); freeaddrinfo(res); } static void setcarp_mcast6(if_ctx *ctx __unused, const char *val __unused, int dummy __unused) { bzero(&carp_addr6, sizeof(carp_addr6)); carp_addr6.s6_addr[0] = 0xff; carp_addr6.s6_addr[1] = 0x02; carp_addr6.s6_addr[15] = 0x12; } +static void +setcarp_version(if_ctx *ctx __unused, const char *val, int dummy __unused) +{ + carpr_version = atoi(val); + + if (carpr_version != CARP_VERSION_CARP && carpr_version != CARP_VERSION_VRRPv3) + errx(1, "version must be %d or %d", CARP_VERSION_CARP, + CARP_VERSION_VRRPv3); +} + +static void +setvrrp_prio(if_ctx *ctx __unused, const char *val, int dummy __unused) +{ + carpr_vrrp_prio = atoi(val); +} + +static void +setvrrp_interval(if_ctx *ctx __unused, const char *val, int dummy __unused) +{ + carpr_vrrp_adv_inter = atoi(val); + + if (carpr_vrrp_adv_inter == 0 || carpr_vrrp_adv_inter > VRRP_MAX_INTERVAL) + errx(1, "vrrpinterval must be greater than 0 and less than %d", VRRP_MAX_INTERVAL); +} + static struct cmd carp_cmds[] = { DEF_CMD_ARG("advbase", setcarp_advbase), DEF_CMD_ARG("advskew", setcarp_advskew), DEF_CMD_ARG("pass", setcarp_passwd), DEF_CMD_ARG("vhid", setcarp_vhid), DEF_CMD_ARG("state", setcarp_state), DEF_CMD_ARG("peer", setcarp_peer), DEF_CMD("mcast", 0, setcarp_mcast), DEF_CMD_ARG("peer6", setcarp_peer6), DEF_CMD("mcast6", 0, setcarp_mcast6), + DEF_CMD_ARG("carpver", setcarp_version), + DEF_CMD_ARG("vrrpprio", setvrrp_prio), + DEF_CMD_ARG("vrrpinterval", setvrrp_interval), }; static struct afswtch af_carp = { .af_name = "af_carp", .af_af = AF_UNSPEC, .af_other_status = carp_status, }; static __constructor void carp_ctor(void) { /* Default to multicast. */ setcarp_mcast(NULL, NULL, 0); setcarp_mcast6(NULL, NULL, 0); for (size_t i = 0; i < nitems(carp_cmds); i++) cmd_register(&carp_cmds[i]); af_register(&af_carp); } diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index ddbc13e7c878..cf7c8d03ca73 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -1,2593 +1,3075 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Michael Shalayeff. * Copyright (c) 2003 Ryan McBride. * Copyright (c) 2011 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "opt_bpf.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #include #endif #ifdef INET #include #include #endif #ifdef INET6 #include #include #include #include #include #include #endif #include #include #include #include #include static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); struct carp_softc { struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ struct ifaddr **sc_ifas; /* Our ifaddrs. */ + carp_version_t sc_version; /* carp or VRRPv3 */ struct sockaddr_dl sc_addr; /* Our link level address. */ struct callout sc_ad_tmo; /* Advertising timeout. */ #ifdef INET struct callout sc_md_tmo; /* Master down timeout. */ #endif #ifdef INET6 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ #endif struct mtx sc_mtx; int sc_vhid; - int sc_advskew; - int sc_advbase; - struct in_addr sc_carpaddr; - struct in6_addr sc_carpaddr6; + struct { /* CARP specific context */ + int sc_advskew; + int sc_advbase; + struct in_addr sc_carpaddr; + struct in6_addr sc_carpaddr6; + }; + struct { /* VRRPv3 specific context */ + uint8_t sc_vrrp_prio; + uint16_t sc_vrrp_adv_inter; + uint16_t sc_vrrp_master_inter; + }; int sc_naddrs; int sc_naddrs6; int sc_ifasiz; enum { INIT = 0, BACKUP, MASTER } sc_state; int sc_suppress; int sc_sendad_errors; #define CARP_SENDAD_MAX_ERRORS 3 int sc_sendad_success; #define CARP_SENDAD_MIN_SUCCESS 3 int sc_init_counter; uint64_t sc_counter; /* authentication */ #define CARP_HMAC_PAD 64 unsigned char sc_key[CARP_KEY_LEN]; unsigned char sc_pad[CARP_HMAC_PAD]; SHA1_CTX sc_sha1; TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ }; struct carp_if { #ifdef INET int cif_naddrs; #endif #ifdef INET6 int cif_naddrs6; #endif TAILQ_HEAD(, carp_softc) cif_vrs; #ifdef INET struct ip_moptions cif_imo; #endif #ifdef INET6 struct ip6_moptions cif_im6o; #endif struct ifnet *cif_ifp; struct mtx cif_mtx; uint32_t cif_flags; #define CIF_PROMISC 0x00000001 }; /* Kernel equivalent of struct carpreq, but with more fields for new features. * */ struct carpkreq { int carpr_count; int carpr_vhid; int carpr_state; int carpr_advskew; int carpr_advbase; unsigned char carpr_key[CARP_KEY_LEN]; /* Everything above this is identical to carpreq */ struct in_addr carpr_addr; struct in6_addr carpr_addr6; + carp_version_t carpr_version; + uint8_t carpr_vrrp_priority; + uint16_t carpr_vrrp_adv_inter; }; /* * Brief design of carp(4). * * Any carp-capable ifnet may have a list of carp softcs hanging off * its ifp->if_carp pointer. Each softc represents one unique virtual * host id, or vhid. The softc has a back pointer to the ifnet. All * softcs are joined in a global list, which has quite limited use. * * Any interface address that takes part in CARP negotiation has a * pointer to the softc of its vhid, ifa->ifa_carp. That could be either * AF_INET or AF_INET6 address. * * Although, one can get the softc's backpointer to ifnet and traverse * through its ifp->if_addrhead queue to find all interface addresses * involved in CARP, we keep a growable array of ifaddr pointers. This * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that * do calls into the network stack, thus avoiding LORs. * * Locking: * * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), * callout-driven events and ioctl()s. * * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. * To traverse the global list we use the mutex carp_mtx. * * Known issues with locking: * * - Sending ad, we put the pointer to the softc in an mtag, and no reference * counting is done on the softc. * - On module unload we may race (?) with packet processing thread * dereferencing our function pointers. */ /* Accept incoming CARP packets. */ VNET_DEFINE_STATIC(int, carp_allow) = 1; #define V_carp_allow VNET(carp_allow) /* Set DSCP in outgoing CARP packets. */ VNET_DEFINE_STATIC(int, carp_dscp) = 56; #define V_carp_dscp VNET(carp_dscp) /* Preempt slower nodes. */ VNET_DEFINE_STATIC(int, carp_preempt) = 0; #define V_carp_preempt VNET(carp_preempt) /* Log level. */ VNET_DEFINE_STATIC(int, carp_log) = 1; #define V_carp_log VNET(carp_log) /* Global advskew demotion. */ VNET_DEFINE_STATIC(int, carp_demotion) = 0; #define V_carp_demotion VNET(carp_demotion) /* Send error demotion factor. */ VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; #define V_carp_senderr_adj VNET(carp_senderr_adj) /* Iface down demotion factor. */ VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; #define V_carp_ifdown_adj VNET(carp_ifdown_adj) static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "CARP"); SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I", "Accept incoming CARP packets"); SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, carp_dscp_sysctl, "I", "DSCP value for carp packets"); SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_log), 0, "CARP log level"); SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, carp_demote_adj_sysctl, "I", "Adjust demotion factor (skew of advskew)"); SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_ifdown_adj), 0, "Interface down demotion factor adjustment"); VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); VNET_PCPUSTAT_SYSINIT(carpstats); VNET_PCPUSTAT_SYSUNINIT(carpstats); #define CARPSTATS_ADD(name, val) \ counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ sizeof(uint64_t)], (val)) #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ NULL, MTX_DEF) #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ NULL, MTX_DEF) #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) #define CIF_FREE(cif) do { \ CIF_LOCK(cif); \ if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ carp_free_if(cif); \ else \ CIF_UNLOCK(cif); \ } while (0) #define CARP_LOG(...) do { \ if (V_carp_log > 0) \ log(LOG_INFO, "carp: " __VA_ARGS__); \ } while (0) #define CARP_DEBUG(...) do { \ if (V_carp_log > 1) \ log(LOG_DEBUG, __VA_ARGS__); \ } while (0) #define IFNET_FOREACH_IFA(ifp, ifa) \ CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ if ((ifa)->ifa_carp != NULL) #define CARP_FOREACH_IFA(sc, ifa) \ CARP_LOCK_ASSERT(sc); \ for (int _i = 0; \ _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ ((ifa) = sc->sc_ifas[_i]) != NULL; \ ++_i) #define IFNET_FOREACH_CARP(ifp, sc) \ KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) #define DEMOTE_ADVSKEW(sc) \ (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ CARP_MAXSKEW : \ (((sc)->sc_advskew + V_carp_demotion < 0) ? \ 0 : ((sc)->sc_advskew + V_carp_demotion))) static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); +static void vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t); static struct carp_softc *carp_alloc(struct ifnet *); static void carp_destroy(struct carp_softc *); static struct carp_if *carp_alloc_if(struct ifnet *); static void carp_free_if(struct carp_if *); static void carp_set_state(struct carp_softc *, int, const char* reason); static void carp_sc_state(struct carp_softc *); static void carp_setrun(struct carp_softc *, sa_family_t); static void carp_master_down(void *); static void carp_master_down_locked(struct carp_softc *, const char* reason); static void carp_send_ad(void *); static void carp_send_ad_locked(struct carp_softc *); +static void vrrp_send_ad_locked(struct carp_softc *); static void carp_addroute(struct carp_softc *); static void carp_ifa_addroute(struct ifaddr *); static void carp_delroute(struct carp_softc *); static void carp_ifa_delroute(struct ifaddr *); static void carp_send_ad_all(void *, int); static void carp_demote_adj(int, char *); static LIST_HEAD(, carp_softc) carp_list; static struct mtx carp_mtx; static struct sx carp_sx; static struct task carp_sendall_task = TASK_INITIALIZER(0, carp_send_ad_all, NULL); static int carp_is_supported_if(if_t ifp) { if (ifp == NULL) return (ENXIO); switch (ifp->if_type) { case IFT_ETHER: case IFT_L2VLAN: case IFT_BRIDGE: break; default: return (EOPNOTSUPP); } return (0); } static void carp_hmac_prepare(struct carp_softc *sc) { - uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; + uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT; uint8_t vhid = sc->sc_vhid & 0xff; struct ifaddr *ifa; int i, found; #ifdef INET struct in_addr last, cur, in; #endif #ifdef INET6 struct in6_addr last6, cur6, in6; #endif CARP_LOCK_ASSERT(sc); /* Compute ipad from key. */ bzero(sc->sc_pad, sizeof(sc->sc_pad)); bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); for (i = 0; i < sizeof(sc->sc_pad); i++) sc->sc_pad[i] ^= 0x36; /* Precompute first part of inner hash. */ SHA1Init(&sc->sc_sha1); SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); #ifdef INET cur.s_addr = 0; do { found = 0; last = cur; cur.s_addr = 0xffffffff; CARP_FOREACH_IFA(sc, ifa) { in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; if (ifa->ifa_addr->sa_family == AF_INET && ntohl(in.s_addr) > ntohl(last.s_addr) && ntohl(in.s_addr) < ntohl(cur.s_addr)) { cur.s_addr = in.s_addr; found++; } } if (found) SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); } while (found); #endif /* INET */ #ifdef INET6 memset(&cur6, 0, sizeof(cur6)); do { found = 0; last6 = cur6; memset(&cur6, 0xff, sizeof(cur6)); CARP_FOREACH_IFA(sc, ifa) { in6 = ifatoia6(ifa)->ia_addr.sin6_addr; if (IN6_IS_SCOPE_EMBED(&in6)) in6.s6_addr16[1] = 0; if (ifa->ifa_addr->sa_family == AF_INET6 && memcmp(&in6, &last6, sizeof(in6)) > 0 && memcmp(&in6, &cur6, sizeof(in6)) < 0) { cur6 = in6; found++; } } if (found) SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); } while (found); #endif /* INET6 */ /* convert ipad to opad */ for (i = 0; i < sizeof(sc->sc_pad); i++) sc->sc_pad[i] ^= 0x36 ^ 0x5c; } static void carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], unsigned char md[20]) { SHA1_CTX sha1ctx; CARP_LOCK_ASSERT(sc); /* fetch first half of inner hash */ bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); SHA1Final(md, &sha1ctx); /* outer hash */ SHA1Init(&sha1ctx); SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); SHA1Update(&sha1ctx, md, 20); SHA1Final(md, &sha1ctx); } static int carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], unsigned char md[20]) { unsigned char md2[20]; CARP_LOCK_ASSERT(sc); carp_hmac_generate(sc, counter, md2); return (bcmp(md, md2, sizeof(md2))); } +static int +vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum) +{ + uint16_t cksum; + + /* + * Note that VRRPv3 checksums are different from CARP checksums. + * Carp just calculates the checksum over the packet. + * VRRPv3 includes the pseudo-header checksum as well. + */ + cksum = in_cksum_skip(m, off + len, off); + cksum -= phdrcksum; + + return (cksum); +} + /* * process input packet. * we have rearranged checks order compared to the rfc, * but it seems more efficient this way or not possible otherwise. */ #ifdef INET static int carp_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); - struct carp_header *ch; - int iplen, len; + struct vrrpv3_header *vh; + int iplen; + int minlen; + int totlen; + uint16_t phdrcksum = 0; iplen = *offp; *mp = NULL; CARPSTATS_INC(carps_ipackets); if (!V_carp_allow) { m_freem(m); return (IPPROTO_DONE); } iplen = ip->ip_hl << 2; + totlen = ntohs(ip->ip_len); - if (m->m_pkthdr.len < iplen + sizeof(*ch)) { + /* Ensure we have enough header to figure out the version. */ + if (m->m_pkthdr.len < iplen + sizeof(*vh)) { CARPSTATS_INC(carps_badlen); - CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " + CARP_DEBUG("%s: received len %zd < sizeof(struct vrrpv3_header) " "on %s\n", __func__, m->m_len - sizeof(struct ip), if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - if (iplen + sizeof(*ch) < m->m_len) { - if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { + if (iplen + sizeof(*vh) < m->m_len) { + if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) { CARPSTATS_INC(carps_hdrops); - CARP_DEBUG("%s: pullup failed\n", __func__); + CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); } - ch = (struct carp_header *)((char *)ip + iplen); + vh = (struct vrrpv3_header *)((char *)ip + iplen); - /* - * verify that the received packet length is - * equal to the CARP header - */ - len = iplen + sizeof(*ch); - if (len > m->m_pkthdr.len) { + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: + minlen = sizeof(struct carp_header); + break; + case CARP_VERSION_VRRPv3: + minlen = sizeof(struct vrrpv3_header); + break; + default: + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, + vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + + /* And now check the length again but with the real minimal length. */ + if (m->m_pkthdr.len < iplen + minlen) { CARPSTATS_INC(carps_badlen); - CARP_DEBUG("%s: packet too short %d on %s\n", __func__, - m->m_pkthdr.len, + CARP_DEBUG("%s: received len %zd < %d " + "on %s\n", __func__, m->m_len - sizeof(struct ip), + iplen + minlen, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - if ((m = m_pullup(m, len)) == NULL) { - CARPSTATS_INC(carps_hdrops); - return (IPPROTO_DONE); + if (iplen + minlen < m->m_len) { + if ((m = m_pullup(m, iplen + minlen)) == NULL) { + CARPSTATS_INC(carps_hdrops); + CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); + return (IPPROTO_DONE); + } + ip = mtod(m, struct ip *); + vh = (struct vrrpv3_header *)((char *)ip + iplen); } - ip = mtod(m, struct ip *); - ch = (struct carp_header *)((char *)ip + iplen); - /* verify the CARP checksum */ - m->m_data += iplen; - if (in_cksum(m, len - iplen)) { - CARPSTATS_INC(carps_badsum); - CARP_DEBUG("%s: checksum failed on %s\n", __func__, - if_name(m->m_pkthdr.rcvif)); - m_freem(m); - return (IPPROTO_DONE); + phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htonl((u_short)(totlen - iplen) + ip->ip_p)); + + if (vh->vrrp_version == CARP_VERSION_CARP) { + /* verify the CARP checksum */ + m->m_data += iplen; + if (in_cksum(m, totlen - iplen)) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: checksum failed on %s\n", __func__, + if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + m->m_data -= iplen; + } + + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: { + struct carp_header *ch = (struct carp_header *)((char *)ip + iplen); + carp_input_c(m, ch, AF_INET, ip->ip_ttl); + break; + } + case CARP_VERSION_VRRPv3: + vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen, phdrcksum); + break; + default: + KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); } - m->m_data -= iplen; - carp_input_c(m, ch, AF_INET, ip->ip_ttl); return (IPPROTO_DONE); } #endif #ifdef INET6 static int carp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - struct carp_header *ch; - u_int len; + struct vrrpv3_header *vh; + u_int len, minlen; + uint16_t phdrcksum = 0; CARPSTATS_INC(carps_ipackets6); if (!V_carp_allow) { m_freem(m); return (IPPROTO_DONE); } /* check if received on a valid carp interface */ if (m->m_pkthdr.rcvif->if_carp == NULL) { CARPSTATS_INC(carps_badif); CARP_DEBUG("%s: packet received on non-carp interface: %s\n", __func__, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - /* verify that we have a complete carp packet */ - if (m->m_len < *offp + sizeof(*ch)) { + if (m->m_len < *offp + sizeof(*vh)) { len = m->m_len; - m = m_pullup(m, *offp + sizeof(*ch)); + m = m_pullup(m, *offp + sizeof(*vh)); if (m == NULL) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: packet size %u too small\n", __func__, len); return (IPPROTO_DONE); } ip6 = mtod(m, struct ip6_hdr *); } - ch = (struct carp_header *)(mtod(m, char *) + *offp); + vh = (struct vrrpv3_header *)(mtod(m, char *) + *offp); - /* verify the CARP checksum */ - m->m_data += *offp; - if (in_cksum(m, sizeof(*ch))) { - CARPSTATS_INC(carps_badsum); - CARP_DEBUG("%s: checksum failed, on %s\n", __func__, + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: + minlen = sizeof(struct carp_header); + break; + case CARP_VERSION_VRRPv3: + minlen = sizeof(struct vrrpv3_header); + break; + default: + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, + vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + + /* And now check the length again but with the real minimal length. */ + if (m->m_pkthdr.len < sizeof(*ip6) + minlen) { + CARPSTATS_INC(carps_badlen); + CARP_DEBUG("%s: received len %zd < %zd " + "on %s\n", __func__, m->m_len - sizeof(struct ip), + sizeof(*ip6) + minlen, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - m->m_data -= *offp; - carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); + if (sizeof (*ip6) + minlen < m->m_len) { + if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) { + CARPSTATS_INC(carps_hdrops); + CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); + return (IPPROTO_DONE); + } + ip6 = mtod(m, struct ip6_hdr *); + vh = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); + } + + phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), ip6->ip6_nxt, 0); + + /* verify the CARP checksum */ + if (vh->vrrp_version == CARP_VERSION_CARP) { + m->m_data += *offp; + if (in_cksum(m, sizeof(struct carp_header))) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: checksum failed, on %s\n", __func__, + if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + m->m_data -= *offp; + } + + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: { + struct carp_header *ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6)); + carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); + break; + } + case CARP_VERSION_VRRPv3: + vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim, ntohs(ip6->ip6_plen), + phdrcksum); + break; + default: + KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); + } return (IPPROTO_DONE); } #endif /* INET6 */ /* * This routine should not be necessary at all, but some switches * (VMWare ESX vswitches) can echo our own packets back at us, * and we must ignore them or they will cause us to drop out of * MASTER mode. * * We cannot catch all cases of network loops. Instead, what we * do here is catch any packet that arrives with a carp header * with a VHID of 0, that comes from an address that is our own. * These packets are by definition "from us" (even if they are from * a misconfigured host that is pretending to be us). * * The VHID test is outside this mini-function. */ static int -carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) +carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af) { #ifdef INET struct ip *ip4; struct in_addr in4; #endif #ifdef INET6 struct ip6_hdr *ip6; struct in6_addr in6; #endif switch (af) { #ifdef INET case AF_INET: ip4 = mtod(m, struct ip *); in4 = ifatoia(ifa)->ia_addr.sin_addr; return (in4.s_addr == ip4->ip_src.s_addr); #endif #ifdef INET6 case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); in6 = ifatoia6(ifa)->ia_addr.sin6_addr; return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); #endif default: break; } return (0); } -static void -carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) +static struct ifaddr * +carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa, *match; - struct carp_softc *sc; - uint64_t tmp_counter; - struct timeval sc_tv, ch_tv; int error; - bool multicast = false; NET_EPOCH_ASSERT(); /* * Verify that the VHID is valid on the receiving interface. * * There should be just one match. If there are none * the VHID is not valid and we drop the packet. If * there are multiple VHID matches, take just the first * one, for compatibility with previous code. While we're * scanning, check for obvious loops in the network topology * (these should never happen, and as noted above, we may * miss real loops; this is just a double-check). */ error = 0; match = NULL; IFNET_FOREACH_IFA(ifp, ifa) { if (match == NULL && ifa->ifa_carp != NULL && ifa->ifa_addr->sa_family == af && - ifa->ifa_carp->sc_vhid == ch->carp_vhid) + ifa->ifa_carp->sc_vhid == vhid) match = ifa; - if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) + if (vhid == 0 && carp_source_is_self(m, ifa, af)) error = ELOOP; } ifa = error ? NULL : match; if (ifa != NULL) ifa_ref(ifa); if (ifa == NULL) { if (error == ELOOP) { CARP_DEBUG("dropping looped packet on interface %s\n", if_name(ifp)); CARPSTATS_INC(carps_badif); /* ??? */ } else { CARPSTATS_INC(carps_badvhid); } + } + + return (ifa); +} + +static void +carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ifaddr *ifa; + struct carp_softc *sc; + uint64_t tmp_counter; + struct timeval sc_tv, ch_tv; + bool multicast = false; + + NET_EPOCH_ASSERT(); + + ifa = carp_find_ifa(m, af, ch->carp_vhid); + if (ifa == NULL) { m_freem(m); return; } + sc = ifa->ifa_carp; + CARP_LOCK(sc); + /* verify the CARP version. */ - if (ch->carp_version != CARP_VERSION) { + if (ch->carp_version != CARP_VERSION_CARP || + sc->sc_version != CARP_VERSION_CARP) { + CARP_UNLOCK(sc); + CARPSTATS_INC(carps_badver); CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), ch->carp_version); ifa_free(ifa); m_freem(m); return; } - sc = ifa->ifa_carp; - CARP_LOCK(sc); if (ifa->ifa_addr->sa_family == AF_INET) { multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr); } else { multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6); } ifa_free(ifa); /* verify that the IP TTL is 255, but only if we're not in unicast mode. */ if (multicast && ttl != CARP_DFLTTL) { CARPSTATS_INC(carps_badttl); CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, ttl, if_name(m->m_pkthdr.rcvif)); goto out; } if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { CARPSTATS_INC(carps_badauth); CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, sc->sc_vhid, if_name(ifp)); goto out; } tmp_counter = ntohl(ch->carp_counter[0]); tmp_counter = tmp_counter<<32; tmp_counter += ntohl(ch->carp_counter[1]); /* XXX Replay protection goes here */ sc->sc_init_counter = 0; sc->sc_counter = tmp_counter; sc_tv.tv_sec = sc->sc_advbase; sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; ch_tv.tv_sec = ch->carp_advbase; ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; switch (sc->sc_state) { case INIT: break; case MASTER: /* * If we receive an advertisement from a master who's going to * be more frequent than us, go into BACKUP state. */ if (timevalcmp(&sc_tv, &ch_tv, >) || timevalcmp(&sc_tv, &ch_tv, ==)) { callout_stop(&sc->sc_ad_tmo); carp_set_state(sc, BACKUP, "more frequent advertisement received"); carp_setrun(sc, 0); carp_delroute(sc); } break; case BACKUP: /* * If we're pre-empting masters who advertise slower than us, * and this one claims to be slower, treat him as down. */ if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { carp_master_down_locked(sc, "preempting a slower master"); break; } /* * If the master is going to advertise at such a low frequency * that he's guaranteed to time out, we'd might as well just * treat him as timed out now. */ sc_tv.tv_sec = sc->sc_advbase * 3; if (timevalcmp(&sc_tv, &ch_tv, <)) { carp_master_down_locked(sc, "master will time out"); break; } /* * Otherwise, we reset the counter and wait for the next * advertisement. */ carp_setrun(sc, af); break; } out: CARP_UNLOCK(sc); m_freem(m); } +static void +vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl, + int len, uint16_t phdrcksum) +{ + struct vrrpv3_header *vh = mtodo(m, off); + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ifaddr *ifa; + struct carp_softc *sc; + + NET_EPOCH_ASSERT(); + + ifa = carp_find_ifa(m, af, vh->vrrp_vrtid); + if (ifa == NULL) { + m_freem(m); + return; + } + + sc = ifa->ifa_carp; + CARP_LOCK(sc); + + ifa_free(ifa); + + /* verify the CARP version. */ + if (vh->vrrp_version != CARP_VERSION_VRRPv3 || sc->sc_version != CARP_VERSION_VRRPv3) { + CARP_UNLOCK(sc); + + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), + vh->vrrp_version); + m_freem(m); + return; + } + + /* verify that the IP TTL is 255. */ + if (ttl != CARP_DFLTTL) { + CARPSTATS_INC(carps_badttl); + CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, + ttl, if_name(m->m_pkthdr.rcvif)); + goto out; + } + + if (vrrp_checksum_verify(m, off, len, phdrcksum)) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__, + sc->sc_vhid, if_name(ifp)); + goto out; + } + + /* RFC9568, 7.1 Receiving VRRP packets. */ + if (sc->sc_vrrp_prio == 255) { + CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n", + __func__); + goto out; + } + + /* XXX TODO Check IP address payload. */ + + sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int); + + switch (sc->sc_state) { + case INIT: + break; + case MASTER: + /* + * If we receive an advertisement from a master who's going to + * be more frequent than us, go into BACKUP state. + * Same if the peer has a higher priority than us. + */ + if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter || + vh->vrrp_priority > sc->sc_vrrp_prio) { + callout_stop(&sc->sc_ad_tmo); + carp_set_state(sc, BACKUP, + "more frequent advertisement received"); + carp_setrun(sc, 0); + carp_delroute(sc); + } + break; + case BACKUP: + /* + * If we're pre-empting masters who advertise slower than us, + * and this one claims to be slower, treat him as down. + */ + if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter + || vh->vrrp_priority < sc->sc_vrrp_prio)) { + carp_master_down_locked(sc, + "preempting a slower master"); + break; + } + + /* + * Otherwise, we reset the counter and wait for the next + * advertisement. + */ + carp_setrun(sc, af); + break; + } + +out: + CARP_UNLOCK(sc); + m_freem(m); +} + static int -carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) +carp_tag(struct carp_softc *sc, struct mbuf *m) { struct m_tag *mtag; + /* Tag packet for carp_output */ + if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), + M_NOWAIT)) == NULL) { + m_freem(m); + CARPSTATS_INC(carps_onomem); + return (ENOMEM); + } + bcopy(&sc, mtag + 1, sizeof(sc)); + m_tag_prepend(m, mtag); + + return (0); +} + +static int +carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) +{ + if (sc->sc_init_counter) { /* this could also be seconds since unix epoch */ sc->sc_counter = arc4random(); sc->sc_counter = sc->sc_counter << 32; sc->sc_counter += arc4random(); } else sc->sc_counter++; ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); - /* Tag packet for carp_output */ - if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), - M_NOWAIT)) == NULL) { - m_freem(m); - CARPSTATS_INC(carps_onomem); - return (ENOMEM); - } - bcopy(&sc, mtag + 1, sizeof(sc)); - m_tag_prepend(m, mtag); - - return (0); + return (carp_tag(sc, m)); } /* * To avoid LORs and possible recursions this function shouldn't * be called directly, but scheduled via taskqueue. */ static void carp_send_ad_all(void *ctx __unused, int pending __unused) { struct carp_softc *sc; struct epoch_tracker et; NET_EPOCH_ENTER(et); mtx_lock(&carp_mtx); LIST_FOREACH(sc, &carp_list, sc_next) if (sc->sc_state == MASTER) { CARP_LOCK(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); carp_send_ad_locked(sc); CURVNET_RESTORE(); CARP_UNLOCK(sc); } mtx_unlock(&carp_mtx); NET_EPOCH_EXIT(et); } /* Send a periodic advertisement, executed in callout context. */ static void carp_send_ad(void *v) { struct carp_softc *sc = v; struct epoch_tracker et; NET_EPOCH_ENTER(et); CARP_LOCK_ASSERT(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); carp_send_ad_locked(sc); CURVNET_RESTORE(); CARP_UNLOCK(sc); NET_EPOCH_EXIT(et); } static void carp_send_ad_error(struct carp_softc *sc, int error) { /* * We track errors and successful sends with this logic: * - Any error resets success counter to 0. * - MAX_ERRORS triggers demotion. * - MIN_SUCCESS successes resets error counter to 0. * - MIN_SUCCESS reverts demotion, if it was triggered before. */ if (error) { if (sc->sc_sendad_errors < INT_MAX) sc->sc_sendad_errors++; if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { static const char fmt[] = "send error %d on %s"; char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, error, if_name(sc->sc_carpdev)); carp_demote_adj(V_carp_senderr_adj, msg); } sc->sc_sendad_success = 0; } else if (sc->sc_sendad_errors > 0) { if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { static const char fmt[] = "send ok on %s"; char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, if_name(sc->sc_carpdev)); carp_demote_adj(-V_carp_senderr_adj, msg); } sc->sc_sendad_errors = 0; } } } /* * Pick the best ifaddr on the given ifp for sending CARP * advertisements. * * "Best" here is defined by ifa_preferred(). This function is much * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). * * (This could be simplified to return the actual address, except that * it has a different format in AF_INET and AF_INET6.) */ static struct ifaddr * carp_best_ifa(int af, struct ifnet *ifp) { struct ifaddr *ifa, *best; NET_EPOCH_ASSERT(); if (af >= AF_MAX) return (NULL); best = NULL; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == af && (best == NULL || ifa_preferred(best, ifa))) best = ifa; } if (best != NULL) ifa_ref(best); return (best); } static void carp_send_ad_locked(struct carp_softc *sc) { struct carp_header ch; struct timeval tv; struct ifaddr *ifa; struct carp_header *ch_ptr; struct mbuf *m; int len, advskew; NET_EPOCH_ASSERT(); CARP_LOCK_ASSERT(sc); + if (sc->sc_version == CARP_VERSION_VRRPv3) { + vrrp_send_ad_locked(sc); + return; + } + advskew = DEMOTE_ADVSKEW(sc); tv.tv_sec = sc->sc_advbase; tv.tv_usec = advskew * 1000000 / 256; - ch.carp_version = CARP_VERSION; + ch.carp_version = CARP_VERSION_CARP; ch.carp_type = CARP_ADVERTISEMENT; ch.carp_vhid = sc->sc_vhid; ch.carp_advbase = sc->sc_advbase; ch.carp_advskew = advskew; ch.carp_authlen = 7; /* XXX DEFINE */ ch.carp_pad1 = 0; /* must be zero */ ch.carp_cksum = 0; /* XXXGL: OpenBSD picks first ifaddr with needed family. */ #ifdef INET if (sc->sc_naddrs) { struct ip *ip; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CARPSTATS_INC(carps_onomem); goto resched; } len = sizeof(*ip) + sizeof(ch); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; M_ALIGN(m, m->m_len); if (IN_MULTICAST(sc->sc_carpaddr.s_addr)) m->m_flags |= M_MCAST; ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; ip->ip_len = htons(len); ip->ip_off = htons(IP_DF); ip->ip_ttl = CARP_DFLTTL; ip->ip_p = IPPROTO_CARP; ip->ip_sum = 0; ip_fillid(ip); ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); if (ifa != NULL) { ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; ifa_free(ifa); } else ip->ip_src.s_addr = 0; ip->ip_dst = sc->sc_carpaddr; ch_ptr = (struct carp_header *)(&ip[1]); bcopy(&ch, ch_ptr, sizeof(ch)); if (carp_prepare_ad(m, sc, ch_ptr)) goto resched; m->m_data += sizeof(*ip); ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); m->m_data -= sizeof(*ip); CARPSTATS_INC(carps_opackets); carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_carpdev->if_carp->cif_imo, NULL)); } #endif /* INET */ #ifdef INET6 if (sc->sc_naddrs6) { struct ip6_hdr *ip6; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CARPSTATS_INC(carps_onomem); goto resched; } len = sizeof(*ip6) + sizeof(ch); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; M_ALIGN(m, m->m_len); ip6 = mtod(m, struct ip6_hdr *); bzero(ip6, sizeof(*ip6)); ip6->ip6_vfc |= IPV6_VERSION; /* Traffic class isn't defined in ip6 struct instead * it gets offset into flowid field */ ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + IPTOS_DSCP_OFFSET)); ip6->ip6_hlim = CARP_DFLTTL; ip6->ip6_nxt = IPPROTO_CARP; /* set the source address */ ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); if (ifa != NULL) { bcopy(IFA_IN6(ifa), &ip6->ip6_src, sizeof(struct in6_addr)); ifa_free(ifa); } else /* This should never happen with IPv6. */ bzero(&ip6->ip6_src, sizeof(struct in6_addr)); /* Set the multicast destination. */ memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst)); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) { if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { m_freem(m); CARP_DEBUG("%s: in6_setscope failed\n", __func__); goto resched; } } ch_ptr = (struct carp_header *)(&ip6[1]); bcopy(&ch, ch_ptr, sizeof(ch)); if (carp_prepare_ad(m, sc, ch_ptr)) goto resched; m->m_data += sizeof(*ip6); ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); m->m_data -= sizeof(*ip6); CARPSTATS_INC(carps_opackets6); carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); } #endif /* INET6 */ resched: callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); } +static void +vrrp_send_ad_locked(struct carp_softc *sc) +{ + struct vrrpv3_header *vh_ptr; + struct ifaddr *ifa; + struct mbuf *m; + int len; + struct vrrpv3_header vh = { + .vrrp_version = CARP_VERSION_VRRPv3, + .vrrp_type = VRRP_TYPE_ADVERTISEMENT, + .vrrp_vrtid = sc->sc_vhid, + .vrrp_priority = sc->sc_vrrp_prio, + .vrrp_count_addr = 0, + .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter), + .vrrp_checksum = 0, + }; + + NET_EPOCH_ASSERT(); + CARP_LOCK_ASSERT(sc); + MPASS(sc->sc_version == CARP_VERSION_VRRPv3); + +#ifdef INET + if (sc->sc_naddrs) { + struct ip *ip; + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + CARPSTATS_INC(carps_onomem); + goto resched; + } + len = sizeof(*ip) + sizeof(vh); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + M_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; + ip->ip_off = htons(IP_DF); + ip->ip_ttl = CARP_DFLTTL; + ip->ip_p = IPPROTO_CARP; + ip->ip_sum = 0; + ip_fillid(ip); + + ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); + if (ifa != NULL) { + ip->ip_src.s_addr = + ifatoia(ifa)->ia_addr.sin_addr.s_addr; + ifa_free(ifa); + } else + ip->ip_src.s_addr = 0; + ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); + + /* Include the IP addresses in the announcement. */ + for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { + struct sockaddr_in *in; + + MPASS(sc->sc_ifas[i] != NULL); + if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET) + continue; + + in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr; + + if (m_append(m, sizeof(in->sin_addr), + (caddr_t)&in->sin_addr) != 1) { + m_freem(m); + goto resched; + } + + vh.vrrp_count_addr++; + len += sizeof(in->sin_addr); + } + ip->ip_len = htons(len); + + vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip)); + bcopy(&vh, vh_ptr, sizeof(vh)); + + vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, + htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p)); + vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip)); + + if (carp_tag(sc, m)) + goto resched; + + CARPSTATS_INC(carps_opackets); + + carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, + &sc->sc_carpdev->if_carp->cif_imo, NULL)); + } +#endif +#ifdef INET6 + if (sc->sc_naddrs6) { + struct ip6_hdr *ip6; + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + CARPSTATS_INC(carps_onomem); + goto resched; + } + len = sizeof(*ip6) + sizeof(vh); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + M_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip6 = mtod(m, struct ip6_hdr *); + bzero(ip6, sizeof(*ip6)); + ip6->ip6_vfc |= IPV6_VERSION; + /* Traffic class isn't defined in ip6 struct instead + * it gets offset into flowid field */ + ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + + IPTOS_DSCP_OFFSET)); + ip6->ip6_hlim = CARP_DFLTTL; + ip6->ip6_nxt = IPPROTO_CARP; + + /* set the source address */ + ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); + if (ifa != NULL) { + bcopy(IFA_IN6(ifa), &ip6->ip6_src, + sizeof(struct in6_addr)); + ifa_free(ifa); + } else + /* This should never happen with IPv6. */ + bzero(&ip6->ip6_src, sizeof(struct in6_addr)); + + /* Set the multicast destination. */ + bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst)); + ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; + ip6->ip6_dst.s6_addr8[15] = 0x12; + + /* Include the IP addresses in the announcement. */ + len = sizeof(vh); + for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { + struct sockaddr_in6 *in6; + + MPASS(sc->sc_ifas[i] != NULL); + if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6) + continue; + + in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr; + + if (m_append(m, sizeof(in6->sin6_addr), + (char *)&in6->sin6_addr) != 1) { + m_freem(m); + goto resched; + } + + vh.vrrp_count_addr++; + len += sizeof(in6->sin6_addr); + } + ip6->ip6_plen = htonl(len); + + vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); + bcopy(&vh, vh_ptr, sizeof(vh)); + + vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0); + vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6)); + + if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { + m_freem(m); + CARP_DEBUG("%s: in6_setscope failed\n", __func__); + goto resched; + } + + if (carp_tag(sc, m)) + goto resched; + CARPSTATS_INC(carps_opackets6); + + carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, + &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); + } +#endif + +resched: + callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100, + carp_send_ad, sc); +} + static void carp_addroute(struct carp_softc *sc) { struct ifaddr *ifa; CARP_FOREACH_IFA(sc, ifa) carp_ifa_addroute(ifa); } static void carp_ifa_addroute(struct ifaddr *ifa) { switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: in_addprefix(ifatoia(ifa)); ifa_add_loopback_route(ifa, (struct sockaddr *)&ifatoia(ifa)->ia_addr); break; #endif #ifdef INET6 case AF_INET6: ifa_add_loopback_route(ifa, (struct sockaddr *)&ifatoia6(ifa)->ia_addr); nd6_add_ifa_lle(ifatoia6(ifa)); break; #endif } } static void carp_delroute(struct carp_softc *sc) { struct ifaddr *ifa; CARP_FOREACH_IFA(sc, ifa) carp_ifa_delroute(ifa); } static void carp_ifa_delroute(struct ifaddr *ifa) { switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifa_del_loopback_route(ifa, (struct sockaddr *)&ifatoia(ifa)->ia_addr); in_scrubprefix(ifatoia(ifa), LLE_STATIC); break; #endif #ifdef INET6 case AF_INET6: ifa_del_loopback_route(ifa, (struct sockaddr *)&ifatoia6(ifa)->ia_addr); nd6_rem_ifa_lle(ifatoia6(ifa), 1); break; #endif } } int carp_master(struct ifaddr *ifa) { struct carp_softc *sc = ifa->ifa_carp; return (sc->sc_state == MASTER); } #ifdef INET /* * Broadcast a gratuitous ARP request containing * the virtual router MAC address for each IP address * associated with the virtual router. */ static void carp_send_arp(struct carp_softc *sc) { struct ifaddr *ifa; struct in_addr addr; NET_EPOCH_ASSERT(); CARP_FOREACH_IFA(sc, ifa) { if (ifa->ifa_addr->sa_family != AF_INET) continue; addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr)); } } int carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) { struct carp_softc *sc = ifa->ifa_carp; if (sc->sc_state == MASTER) { *enaddr = LLADDR(&sc->sc_addr); return (1); } return (0); } #endif #ifdef INET6 static void carp_send_na(struct carp_softc *sc) { static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; struct ifaddr *ifa; struct in6_addr *in6; CARP_FOREACH_IFA(sc, ifa) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6 = IFA_IN6(ifa); nd6_na_output(sc->sc_carpdev, &mcast, in6, ND_NA_FLAG_OVERRIDE, 1, NULL); DELAY(1000); /* XXX */ } } /* * Returns ifa in case it's a carp address and it is MASTER, or if the address * matches and is not a carp address. Returns NULL otherwise. */ struct ifaddr * carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) { struct ifaddr *ifa; NET_EPOCH_ASSERT(); ifa = NULL; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) continue; if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) ifa = NULL; else ifa_ref(ifa); break; } return (ifa); } char * carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) { struct ifaddr *ifa; NET_EPOCH_ASSERT(); IFNET_FOREACH_IFA(ifp, ifa) if (ifa->ifa_addr->sa_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { struct carp_softc *sc = ifa->ifa_carp; struct m_tag *mtag; mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), M_NOWAIT); if (mtag == NULL) /* Better a bit than nothing. */ return (LLADDR(&sc->sc_addr)); bcopy(&sc, mtag + 1, sizeof(sc)); m_tag_prepend(m, mtag); return (LLADDR(&sc->sc_addr)); } return (NULL); } #endif /* INET6 */ int carp_forus(struct ifnet *ifp, u_char *dhost) { struct carp_softc *sc; uint8_t *ena = dhost; if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) return (0); CIF_LOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { /* * CARP_LOCK() is not here, since would protect nothing, but * cause deadlock with if_bridge, calling this under its lock. */ if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), ETHER_ADDR_LEN)) { CIF_UNLOCK(ifp->if_carp); return (1); } } CIF_UNLOCK(ifp->if_carp); return (0); } /* Master down timeout event, executed in callout context. */ static void carp_master_down(void *v) { struct carp_softc *sc = v; struct epoch_tracker et; NET_EPOCH_ENTER(et); CARP_LOCK_ASSERT(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); if (sc->sc_state == BACKUP) { carp_master_down_locked(sc, "master timed out"); } CURVNET_RESTORE(); CARP_UNLOCK(sc); NET_EPOCH_EXIT(et); } static void carp_master_down_locked(struct carp_softc *sc, const char *reason) { NET_EPOCH_ASSERT(); CARP_LOCK_ASSERT(sc); switch (sc->sc_state) { case BACKUP: carp_set_state(sc, MASTER, reason); carp_send_ad_locked(sc); #ifdef INET carp_send_arp(sc); #endif #ifdef INET6 carp_send_na(sc); #endif carp_setrun(sc, 0); carp_addroute(sc); break; case INIT: case MASTER: #ifdef INVARIANTS panic("carp: VHID %u@%s: master_down event in %s state\n", sc->sc_vhid, if_name(sc->sc_carpdev), sc->sc_state ? "MASTER" : "INIT"); #endif break; } } /* * When in backup state, af indicates whether to reset the master down timer * for v4 or v6. If it's set to zero, reset the ones which are already pending. */ static void carp_setrun(struct carp_softc *sc, sa_family_t af) { struct timeval tv; + int timeout; CARP_LOCK_ASSERT(sc); if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || sc->sc_carpdev->if_link_state != LINK_STATE_UP || (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || !V_carp_allow) return; switch (sc->sc_state) { case INIT: carp_set_state(sc, BACKUP, "initialization complete"); carp_setrun(sc, 0); break; case BACKUP: callout_stop(&sc->sc_ad_tmo); - tv.tv_sec = 3 * sc->sc_advbase; - tv.tv_usec = sc->sc_advskew * 1000000 / 256; + + if (sc->sc_version == CARP_VERSION_CARP) { + tv.tv_sec = 3 * sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + timeout = tvtohz(&tv); + } else { + /* skew time */ + timeout = (256 - sc->sc_vrrp_prio) * sc->sc_vrrp_master_inter / 256; + timeout += (3 * sc->sc_vrrp_master_inter); + timeout *= hz; + timeout /= 100; /* master interval is in centiseconds. */ + } + switch (af) { #ifdef INET case AF_INET: - callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md_tmo, timeout, carp_master_down, sc); break; #endif #ifdef INET6 case AF_INET6: - callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md6_tmo, timeout, carp_master_down, sc); break; #endif default: #ifdef INET if (sc->sc_naddrs) - callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md_tmo, timeout, carp_master_down, sc); #endif #ifdef INET6 if (sc->sc_naddrs6) - callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md6_tmo, timeout, carp_master_down, sc); #endif break; } break; case MASTER: - tv.tv_sec = sc->sc_advbase; - tv.tv_usec = sc->sc_advskew * 1000000 / 256; - callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), - carp_send_ad, sc); + if (sc->sc_version == CARP_VERSION_CARP) { + tv.tv_sec = sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_send_ad, sc); + } else { + callout_reset(&sc->sc_ad_tmo, + sc->sc_vrrp_adv_inter * hz / 100, + carp_send_ad, sc); + } break; } } /* * Setup multicast structures. */ static int carp_multicast_setup(struct carp_if *cif, sa_family_t sa) { struct ifnet *ifp = cif->cif_ifp; int error = 0; switch (sa) { #ifdef INET case AF_INET: { struct ip_moptions *imo = &cif->cif_imo; struct in_mfilter *imf; struct in_addr addr; if (ip_mfilter_first(&imo->imo_head) != NULL) return (0); imf = ip_mfilter_alloc(M_WAITOK, 0, 0); ip_mfilter_init(&imo->imo_head); imo->imo_multicast_vif = -1; addr.s_addr = htonl(INADDR_CARP_GROUP); if ((error = in_joingroup(ifp, &addr, NULL, &imf->imf_inm)) != 0) { ip_mfilter_free(imf); break; } ip_mfilter_insert(&imo->imo_head, imf); imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = CARP_DFLTTL; imo->imo_multicast_loop = 0; break; } #endif #ifdef INET6 case AF_INET6: { struct ip6_moptions *im6o = &cif->cif_im6o; struct in6_mfilter *im6f[2]; struct in6_addr in6; if (ip6_mfilter_first(&im6o->im6o_head)) return (0); im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); ip6_mfilter_init(&im6o->im6o_head); im6o->im6o_multicast_hlim = CARP_DFLTTL; im6o->im6o_multicast_ifp = ifp; /* Join IPv6 CARP multicast group. */ bzero(&in6, sizeof(in6)); in6.s6_addr16[0] = htons(0xff02); in6.s6_addr8[15] = 0x12; if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { ip6_mfilter_free(im6f[0]); ip6_mfilter_free(im6f[1]); break; } if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { ip6_mfilter_free(im6f[0]); ip6_mfilter_free(im6f[1]); break; } /* Join solicited multicast address. */ bzero(&in6, sizeof(in6)); in6.s6_addr16[0] = htons(0xff02); in6.s6_addr32[1] = 0; in6.s6_addr32[2] = htonl(1); in6.s6_addr32[3] = 0; in6.s6_addr8[12] = 0xff; if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { ip6_mfilter_free(im6f[0]); ip6_mfilter_free(im6f[1]); break; } if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { in6_leavegroup(im6f[0]->im6f_in6m, NULL); ip6_mfilter_free(im6f[0]); ip6_mfilter_free(im6f[1]); break; } ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); break; } #endif } return (error); } /* * Free multicast structures. */ static void carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) { #ifdef INET struct ip_moptions *imo = &cif->cif_imo; struct in_mfilter *imf; #endif #ifdef INET6 struct ip6_moptions *im6o = &cif->cif_im6o; struct in6_mfilter *im6f; #endif sx_assert(&carp_sx, SA_XLOCKED); switch (sa) { #ifdef INET case AF_INET: if (cif->cif_naddrs != 0) break; while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { ip_mfilter_remove(&imo->imo_head, imf); in_leavegroup(imf->imf_inm, NULL); ip_mfilter_free(imf); } break; #endif #ifdef INET6 case AF_INET6: if (cif->cif_naddrs6 != 0) break; while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { ip6_mfilter_remove(&im6o->im6o_head, im6f); in6_leavegroup(im6f->im6f_in6m, NULL); ip6_mfilter_free(im6f); } break; #endif } } int carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) { struct m_tag *mtag; struct carp_softc *sc; if (!sa) return (0); switch (sa->sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: return (0); } mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); if (mtag == NULL) return (0); bcopy(mtag + 1, &sc, sizeof(sc)); switch (sa->sa_family) { case AF_INET: if (! IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr))) return (0); break; case AF_INET6: if (! IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6)) return (0); break; default: panic("Unknown af"); } /* Set the source MAC address to the Virtual Router MAC Address. */ switch (ifp->if_type) { case IFT_ETHER: case IFT_BRIDGE: case IFT_L2VLAN: { struct ether_header *eh; eh = mtod(m, struct ether_header *); eh->ether_shost[0] = 0; eh->ether_shost[1] = 0; eh->ether_shost[2] = 0x5e; eh->ether_shost[3] = 0; eh->ether_shost[4] = 1; eh->ether_shost[5] = sc->sc_vhid; } break; default: printf("%s: carp is not supported for the %d interface type\n", if_name(ifp), ifp->if_type); return (EOPNOTSUPP); } return (0); } static struct carp_softc* carp_alloc(struct ifnet *ifp) { struct carp_softc *sc; struct carp_if *cif; sx_assert(&carp_sx, SA_XLOCKED); if ((cif = ifp->if_carp) == NULL) cif = carp_alloc_if(ifp); sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); + sc->sc_version = CARP_VERSION_CARP; sc->sc_advbase = CARP_DFLTINTV; sc->sc_vhid = -1; /* required setting */ sc->sc_init_counter = 1; sc->sc_state = INIT; sc->sc_ifasiz = sizeof(struct ifaddr *); sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); sc->sc_carpdev = ifp; sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP); sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; sc->sc_carpaddr6.s6_addr8[15] = 0x12; + sc->sc_vrrp_adv_inter = 100; + sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter; + sc->sc_vrrp_prio = 100; + CARP_LOCK_INIT(sc); #ifdef INET callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); #endif #ifdef INET6 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); #endif callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); CIF_LOCK(cif); TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); CIF_UNLOCK(cif); mtx_lock(&carp_mtx); LIST_INSERT_HEAD(&carp_list, sc, sc_next); mtx_unlock(&carp_mtx); return (sc); } static void carp_grow_ifas(struct carp_softc *sc) { struct ifaddr **new; new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); CARP_LOCK(sc); bcopy(sc->sc_ifas, new, sc->sc_ifasiz); free(sc->sc_ifas, M_CARP); sc->sc_ifas = new; sc->sc_ifasiz *= 2; CARP_UNLOCK(sc); } static void carp_destroy(struct carp_softc *sc) { struct ifnet *ifp = sc->sc_carpdev; struct carp_if *cif = ifp->if_carp; sx_assert(&carp_sx, SA_XLOCKED); if (sc->sc_suppress) carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); CARP_UNLOCK(sc); CIF_LOCK(cif); TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); CIF_UNLOCK(cif); mtx_lock(&carp_mtx); LIST_REMOVE(sc, sc_next); mtx_unlock(&carp_mtx); callout_drain(&sc->sc_ad_tmo); #ifdef INET callout_drain(&sc->sc_md_tmo); #endif #ifdef INET6 callout_drain(&sc->sc_md6_tmo); #endif CARP_LOCK_DESTROY(sc); free(sc->sc_ifas, M_CARP); free(sc, M_CARP); } static struct carp_if* carp_alloc_if(struct ifnet *ifp) { struct carp_if *cif; int error; cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); if ((error = ifpromisc(ifp, 1)) != 0) printf("%s: ifpromisc(%s) failed: %d\n", __func__, if_name(ifp), error); else cif->cif_flags |= CIF_PROMISC; CIF_LOCK_INIT(cif); cif->cif_ifp = ifp; TAILQ_INIT(&cif->cif_vrs); IF_ADDR_WLOCK(ifp); ifp->if_carp = cif; if_ref(ifp); IF_ADDR_WUNLOCK(ifp); return (cif); } static void carp_free_if(struct carp_if *cif) { struct ifnet *ifp = cif->cif_ifp; CIF_LOCK_ASSERT(cif); KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", __func__)); IF_ADDR_WLOCK(ifp); ifp->if_carp = NULL; IF_ADDR_WUNLOCK(ifp); CIF_LOCK_DESTROY(cif); if (cif->cif_flags & CIF_PROMISC) ifpromisc(ifp, 0); if_rele(ifp); free(cif, M_CARP); } static bool carp_carprcp(void *arg, struct carp_softc *sc, int priv) { struct carpreq *carpr = arg; CARP_LOCK(sc); carpr->carpr_state = sc->sc_state; carpr->carpr_vhid = sc->sc_vhid; carpr->carpr_advbase = sc->sc_advbase; carpr->carpr_advskew = sc->sc_advskew; if (priv) bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); else bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); CARP_UNLOCK(sc); return (true); } static int carp_ioctl_set(if_t ifp, struct carpkreq *carpr) { struct epoch_tracker et; struct carp_softc *sc = NULL; int error = 0; - if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID || - carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) { + carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0 || + (carpr->carpr_version != CARP_VERSION_CARP && + carpr->carpr_version != CARP_VERSION_VRRPv3)) { return (EINVAL); } if (ifp->if_carp) { IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr->carpr_vhid) break; } if (sc == NULL) { sc = carp_alloc(ifp); CARP_LOCK(sc); sc->sc_vhid = carpr->carpr_vhid; LLADDR(&sc->sc_addr)[0] = 0; LLADDR(&sc->sc_addr)[1] = 0; LLADDR(&sc->sc_addr)[2] = 0x5e; LLADDR(&sc->sc_addr)[3] = 0; LLADDR(&sc->sc_addr)[4] = 1; LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; } else CARP_LOCK(sc); + sc->sc_version = carpr->carpr_version; if (carpr->carpr_advbase > 0) { if (carpr->carpr_advbase > 255 || carpr->carpr_advbase < CARP_DFLTINTV) { error = EINVAL; goto out; } sc->sc_advbase = carpr->carpr_advbase; } if (carpr->carpr_advskew >= 255) { error = EINVAL; goto out; } sc->sc_advskew = carpr->carpr_advskew; if (carpr->carpr_addr.s_addr != INADDR_ANY) sc->sc_carpaddr = carpr->carpr_addr; if (! IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) { memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6, sizeof(sc->sc_carpaddr6)); } if (carpr->carpr_key[0] != '\0') { bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); carp_hmac_prepare(sc); } + if (carpr->carpr_vrrp_priority != 0) + sc->sc_vrrp_prio = carpr->carpr_vrrp_priority; + if (carpr->carpr_vrrp_adv_inter) + sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter; + if (sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) { switch (carpr->carpr_state) { case BACKUP: callout_stop(&sc->sc_ad_tmo); carp_set_state(sc, BACKUP, "user requested via ifconfig"); carp_setrun(sc, 0); carp_delroute(sc); break; case MASTER: NET_EPOCH_ENTER(et); carp_master_down_locked(sc, "user requested via ifconfig"); NET_EPOCH_EXIT(et); break; default: break; } } out: CARP_UNLOCK(sc); return (error); } static int carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr, bool (*outfn)(void *, struct carp_softc *, int), void *arg) { int priveleged; struct carp_softc *sc; if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID) return (EINVAL); if (carpr->carpr_count < 1) return (EMSGSIZE); if (ifp->if_carp == NULL) return (ENOENT); priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0); if (carpr->carpr_vhid != 0) { IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr->carpr_vhid) break; if (sc == NULL) return (ENOENT); if (! outfn(arg, sc, priveleged)) return (ENOMEM); carpr->carpr_count = 1; } else { int count; count = 0; IFNET_FOREACH_CARP(ifp, sc) count++; if (count > carpr->carpr_count) return (EMSGSIZE); IFNET_FOREACH_CARP(ifp, sc) { if (! outfn(arg, sc, priveleged)) return (ENOMEM); carpr->carpr_count = count; } } return (0); } int carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) { struct carpreq carpr; struct carpkreq carprk = { }; struct ifnet *ifp; int error = 0; if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) return (error); ifp = ifunit_ref(ifr->ifr_name); if ((error = carp_is_supported_if(ifp)) != 0) goto out; if ((ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; goto out; } sx_xlock(&carp_sx); switch (cmd) { case SIOCSVH: if ((error = priv_check(td, PRIV_NETINET_CARP))) break; memcpy(&carprk, &carpr, sizeof(carpr)); error = carp_ioctl_set(ifp, &carprk); break; case SIOCGVH: error = carp_ioctl_get(ifp, td->td_ucred, &carpr, carp_carprcp, &carpr); if (error == 0) { error = copyout(&carpr, (char *)ifr_data_get_ptr(ifr), carpr.carpr_count * sizeof(carpr)); } break; default: error = EINVAL; } sx_xunlock(&carp_sx); out: if (ifp != NULL) if_rele(ifp); return (error); } static int carp_get_vhid(struct ifaddr *ifa) { if (ifa == NULL || ifa->ifa_carp == NULL) return (0); return (ifa->ifa_carp->sc_vhid); } int carp_attach(struct ifaddr *ifa, int vhid) { struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; struct carp_softc *sc; int index, error; KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: #endif #ifdef INET6 case AF_INET6: #endif break; default: return (EPROTOTYPE); } sx_xlock(&carp_sx); if (ifp->if_carp == NULL) { sx_xunlock(&carp_sx); return (ENOPROTOOPT); } IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == vhid) break; if (sc == NULL) { sx_xunlock(&carp_sx); return (ENOENT); } error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); if (error) { CIF_FREE(cif); sx_xunlock(&carp_sx); return (error); } index = sc->sc_naddrs + sc->sc_naddrs6 + 1; if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) carp_grow_ifas(sc); switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: cif->cif_naddrs++; sc->sc_naddrs++; break; #endif #ifdef INET6 case AF_INET6: cif->cif_naddrs6++; sc->sc_naddrs6++; break; #endif } ifa_ref(ifa); CARP_LOCK(sc); sc->sc_ifas[index - 1] = ifa; ifa->ifa_carp = sc; carp_hmac_prepare(sc); carp_sc_state(sc); CARP_UNLOCK(sc); sx_xunlock(&carp_sx); return (0); } void carp_detach(struct ifaddr *ifa, bool keep_cif) { struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; struct carp_softc *sc = ifa->ifa_carp; int i, index; KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); sx_xlock(&carp_sx); CARP_LOCK(sc); /* Shift array. */ index = sc->sc_naddrs + sc->sc_naddrs6; for (i = 0; i < index; i++) if (sc->sc_ifas[i] == ifa) break; KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); for (; i < index - 1; i++) sc->sc_ifas[i] = sc->sc_ifas[i+1]; sc->sc_ifas[index - 1] = NULL; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: cif->cif_naddrs--; sc->sc_naddrs--; break; #endif #ifdef INET6 case AF_INET6: cif->cif_naddrs6--; sc->sc_naddrs6--; break; #endif } carp_ifa_delroute(ifa); carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); ifa->ifa_carp = NULL; ifa_free(ifa); carp_hmac_prepare(sc); carp_sc_state(sc); if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) carp_destroy(sc); else CARP_UNLOCK(sc); if (!keep_cif) CIF_FREE(cif); sx_xunlock(&carp_sx); } static void carp_set_state(struct carp_softc *sc, int state, const char *reason) { CARP_LOCK_ASSERT(sc); if (sc->sc_state != state) { const char *carp_states[] = { CARP_STATES }; char subsys[IFNAMSIZ+5]; snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, if_name(sc->sc_carpdev)); CARP_LOG("%s: %s -> %s (%s)\n", subsys, carp_states[sc->sc_state], carp_states[state], reason); sc->sc_state = state; devctl_notify("CARP", subsys, carp_states[state], NULL); } } static void carp_linkstate(struct ifnet *ifp) { struct carp_softc *sc; CIF_LOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { CARP_LOCK(sc); carp_sc_state(sc); CARP_UNLOCK(sc); } CIF_UNLOCK(ifp->if_carp); } static void carp_sc_state(struct carp_softc *sc) { CARP_LOCK_ASSERT(sc); if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || !(sc->sc_carpdev->if_flags & IFF_UP) || !V_carp_allow) { callout_stop(&sc->sc_ad_tmo); #ifdef INET callout_stop(&sc->sc_md_tmo); #endif #ifdef INET6 callout_stop(&sc->sc_md6_tmo); #endif carp_set_state(sc, INIT, "hardware interface down"); carp_setrun(sc, 0); carp_delroute(sc); if (!sc->sc_suppress) carp_demote_adj(V_carp_ifdown_adj, "interface down"); sc->sc_suppress = 1; } else { carp_set_state(sc, INIT, "hardware interface up"); carp_setrun(sc, 0); if (sc->sc_suppress) carp_demote_adj(-V_carp_ifdown_adj, "interface up"); sc->sc_suppress = 0; } } static void carp_demote_adj(int adj, char *reason) { atomic_add_int(&V_carp_demotion, adj); CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); } static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS) { int new, error; struct carp_softc *sc; new = V_carp_allow; error = sysctl_handle_int(oidp, &new, 0, req); if (error || !req->newptr) return (error); if (V_carp_allow != new) { V_carp_allow = new; mtx_lock(&carp_mtx); LIST_FOREACH(sc, &carp_list, sc_next) { CARP_LOCK(sc); if (curvnet == sc->sc_carpdev->if_vnet) carp_sc_state(sc); CARP_UNLOCK(sc); } mtx_unlock(&carp_mtx); } return (0); } static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) { int new, error; new = V_carp_dscp; error = sysctl_handle_int(oidp, &new, 0, req); if (error || !req->newptr) return (error); if (new < 0 || new > 63) return (EINVAL); V_carp_dscp = new; return (0); } static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) { int new, error; new = V_carp_demotion; error = sysctl_handle_int(oidp, &new, 0, req); if (error || !req->newptr) return (error); carp_demote_adj(new, "sysctl"); return (0); } static int nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) { if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN)) return (EINVAL); memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla)); return (0); } struct carp_nl_send_args { struct nlmsghdr *hdr; struct nl_pstate *npt; }; static bool carp_nl_send(void *arg, struct carp_softc *sc, int priv) { struct carp_nl_send_args *nlsa = arg; struct nlmsghdr *hdr = nlsa->hdr; struct nl_pstate *npt = nlsa->npt; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { nlmsg_abort(nw); return (false); } ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); if (ghdr_new == NULL) { nlmsg_abort(nw); return (false); } ghdr_new->cmd = CARP_NL_CMD_GET; ghdr_new->version = 0; ghdr_new->reserved = 0; CARP_LOCK(sc); nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid); nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state); nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase); nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); + nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version); + nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio); + nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER, sc->sc_vrrp_adv_inter); if (priv) nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key); CARP_UNLOCK(sc); if (! nlmsg_end(nw)) { nlmsg_abort(nw); return (false); } return (true); } struct nl_carp_parsed { unsigned int ifindex; char *ifname; uint32_t state; uint32_t vhid; int32_t advbase; int32_t advskew; char key[CARP_KEY_LEN]; struct in_addr addr; struct in6_addr addr6; + carp_version_t version; + uint8_t vrrp_prio; + uint16_t vrrp_adv_inter; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct nl_carp_parsed, _field) static const struct nlattr_parser nla_p_set[] = { { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 }, { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 }, { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 }, { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 }, { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key }, { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 }, { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string }, + { .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 }, + { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 }, + { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 }, }; static const struct nlfield_parser nlf_p_set[] = { }; NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_set, nla_p_set); #undef _IN #undef _OUT static int carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nl_carp_parsed attrs = { }; struct carp_nl_send_args args; struct carpreq carpr = { }; struct epoch_tracker et; if_t ifp = NULL; int error; error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); if (error != 0) return (error); NET_EPOCH_ENTER(et); if (attrs.ifname != NULL) ifp = ifunit_ref(attrs.ifname); else if (attrs.ifindex != 0) ifp = ifnet_byindex_ref(attrs.ifindex); NET_EPOCH_EXIT(et); if ((error = carp_is_supported_if(ifp)) != 0) goto out; hdr->nlmsg_flags |= NLM_F_MULTI; args.hdr = hdr; args.npt = npt; carpr.carpr_vhid = attrs.vhid; carpr.carpr_count = CARP_MAXVHID; sx_xlock(&carp_sx); error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr, carp_nl_send, &args); sx_xunlock(&carp_sx); if (! nlmsg_end_dump(npt->nw, error, hdr)) error = ENOMEM; out: if (ifp != NULL) if_rele(ifp); return (error); } static int carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nl_carp_parsed attrs = { }; struct carpkreq carpr; struct epoch_tracker et; if_t ifp = NULL; int error; error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); if (error != 0) return (error); if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID) return (EINVAL); if (attrs.state > CARP_MAXSTATE) return (EINVAL); if (attrs.advbase < 0 || attrs.advskew < 0) return (EINVAL); if (attrs.advbase > 255) return (EINVAL); if (attrs.advskew >= 255) return (EINVAL); + if (attrs.version == 0) + attrs.version = CARP_VERSION_CARP; + if (attrs.version != CARP_VERSION_CARP && + attrs.version != CARP_VERSION_VRRPv3) + return (EINVAL); + if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL) + return (EINVAL); NET_EPOCH_ENTER(et); if (attrs.ifname != NULL) ifp = ifunit_ref(attrs.ifname); else if (attrs.ifindex != 0) ifp = ifnet_byindex_ref(attrs.ifindex); NET_EPOCH_EXIT(et); if ((error = carp_is_supported_if(ifp)) != 0) goto out; if ((ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; goto out; } carpr.carpr_count = 1; carpr.carpr_vhid = attrs.vhid; carpr.carpr_state = attrs.state; carpr.carpr_advbase = attrs.advbase; carpr.carpr_advskew = attrs.advskew; carpr.carpr_addr = attrs.addr; carpr.carpr_addr6 = attrs.addr6; + carpr.carpr_version = attrs.version; + carpr.carpr_vrrp_priority = attrs.vrrp_prio; + carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter; memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); sx_xlock(&carp_sx); error = carp_ioctl_set(ifp, &carpr); sx_xunlock(&carp_sx); out: if (ifp != NULL) if_rele(ifp); return (error); } static const struct nlhdr_parser *all_parsers[] = { &carp_parser }; static const struct genl_cmd carp_cmds[] = { { .cmd_num = CARP_NL_CMD_GET, .cmd_name = "SIOCGVH", .cmd_cb = carp_nl_get, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, }, { .cmd_num = CARP_NL_CMD_SET, .cmd_name = "SIOCSVH", .cmd_cb = carp_nl_set, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_CARP, }, }; static void carp_nl_register(void) { bool ret __diagused; int family_id __diagused; NL_VERIFY_PARSERS(all_parsers); family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2, CARP_NL_CMD_MAX); MPASS(family_id != 0); ret = genl_register_cmds(CARP_NL_FAMILY_NAME, carp_cmds, NL_ARRAY_LEN(carp_cmds)); MPASS(ret); } static void carp_nl_unregister(void) { genl_unregister_family(CARP_NL_FAMILY_NAME); } static void carp_mod_cleanup(void) { carp_nl_unregister(); #ifdef INET (void)ipproto_unregister(IPPROTO_CARP); carp_iamatch_p = NULL; #endif #ifdef INET6 (void)ip6proto_unregister(IPPROTO_CARP); carp_iamatch6_p = NULL; carp_macmatch6_p = NULL; #endif carp_ioctl_p = NULL; carp_attach_p = NULL; carp_detach_p = NULL; carp_get_vhid_p = NULL; carp_linkstate_p = NULL; carp_forus_p = NULL; carp_output_p = NULL; carp_demote_adj_p = NULL; carp_master_p = NULL; mtx_unlock(&carp_mtx); taskqueue_drain(taskqueue_swi, &carp_sendall_task); mtx_destroy(&carp_mtx); sx_destroy(&carp_sx); } static void ipcarp_sysinit(void) { /* Load allow as tunable so to postpone carp start after module load */ TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow); } VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL); static int carp_mod_load(void) { int err; mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); sx_init(&carp_sx, "carp_sx"); LIST_INIT(&carp_list); carp_get_vhid_p = carp_get_vhid; carp_forus_p = carp_forus; carp_output_p = carp_output; carp_linkstate_p = carp_linkstate; carp_ioctl_p = carp_ioctl; carp_attach_p = carp_attach; carp_detach_p = carp_detach; carp_demote_adj_p = carp_demote_adj; carp_master_p = carp_master; #ifdef INET6 carp_iamatch6_p = carp_iamatch6; carp_macmatch6_p = carp_macmatch6; err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL); if (err) { printf("carp: error %d registering with INET6\n", err); carp_mod_cleanup(); return (err); } #endif #ifdef INET carp_iamatch_p = carp_iamatch; err = ipproto_register(IPPROTO_CARP, carp_input, NULL); if (err) { printf("carp: error %d registering with INET\n", err); carp_mod_cleanup(); return (err); } #endif carp_nl_register(); return (0); } static int carp_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: return carp_mod_load(); /* NOTREACHED */ case MOD_UNLOAD: mtx_lock(&carp_mtx); if (LIST_EMPTY(&carp_list)) carp_mod_cleanup(); else { mtx_unlock(&carp_mtx); return (EBUSY); } break; default: return (EINVAL); } return (0); } static moduledata_t carp_mod = { "carp", carp_modevent, 0 }; DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h index 0c22e9434797..dc3d9a68b43b 100644 --- a/sys/netinet/ip_carp.h +++ b/sys/netinet/ip_carp.h @@ -1,178 +1,225 @@ /* $OpenBSD: ip_carp.h,v 1.8 2004/07/29 22:12:15 mcbride Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Michael Shalayeff. All rights reserved. * Copyright (c) 2003 Ryan McBride. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _IP_CARP_H #define _IP_CARP_H +#ifdef _KERNEL /* * The CARP header layout is as follows: * * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |Version| Type | VirtualHostID | AdvSkew | Auth Len | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Reserved | AdvBase | Checksum | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Counter (1) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Counter (2) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | SHA-1 HMAC (1) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | SHA-1 HMAC (2) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | SHA-1 HMAC (3) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | SHA-1 HMAC (4) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | SHA-1 HMAC (5) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * */ struct carp_header { #if BYTE_ORDER == LITTLE_ENDIAN u_int8_t carp_type:4, carp_version:4; #endif #if BYTE_ORDER == BIG_ENDIAN u_int8_t carp_version:4, carp_type:4; #endif u_int8_t carp_vhid; /* virtual host id */ u_int8_t carp_advskew; /* advertisement skew */ u_int8_t carp_authlen; /* size of counter+md, 32bit chunks */ u_int8_t carp_pad1; /* reserved */ u_int8_t carp_advbase; /* advertisement interval */ u_int16_t carp_cksum; u_int32_t carp_counter[2]; unsigned char carp_md[20]; /* SHA1 HMAC */ } __packed; -#ifdef CTASSERT CTASSERT(sizeof(struct carp_header) == 36); + +/* + * The VRRPv3 header layout is as follows: + * See RFC9568, 5.1. VRRP Packet Format + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Version| Type | Virtual Rtr ID| Priority |Count IPvX Addr| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |(rsvd) | Max Adver Int | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * + + + * | IPvX Address(es) | + * + + + * + + + * + + + * + + + * | | + * + + + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ + +struct vrrpv3_header { +#if BYTE_ORDER == LITTLE_ENDIAN + uint8_t vrrp_type:4, + vrrp_version:4; +#endif +#if BYTE_ORDER == BIG_ENDIAN + uint8_t vrrp_version:4, + vrrp_type:4; #endif + uint8_t vrrp_vrtid; + uint8_t vrrp_priority; + uint8_t vrrp_count_addr; + uint16_t vrrp_max_adver_int; + uint16_t vrrp_checksum; +} __packed; -#define CARP_DFLTTL 255 +CTASSERT(sizeof(struct vrrpv3_header) == 8); +#endif /* _KERNEL */ -/* carp_version */ -#define CARP_VERSION 2 +#define CARP_DFLTTL 255 /* carp_type */ #define CARP_ADVERTISEMENT 0x01 #define CARP_KEY_LEN 20 /* a sha1 hash of a passphrase */ /* carp_advbase */ #define CARP_DFLTINTV 1 +#define VRRP_TYPE_ADVERTISEMENT 0x01 +#define VRRP_MAX_INTERVAL (0x1000 - 1) /* * Statistics. */ struct carpstats { uint64_t carps_ipackets; /* total input packets, IPv4 */ uint64_t carps_ipackets6; /* total input packets, IPv6 */ uint64_t carps_badif; /* wrong interface */ uint64_t carps_badttl; /* TTL is not CARP_DFLTTL */ uint64_t carps_hdrops; /* packets shorter than hdr */ uint64_t carps_badsum; /* bad checksum */ uint64_t carps_badver; /* bad (incl unsupp) version */ uint64_t carps_badlen; /* data length does not match */ uint64_t carps_badauth; /* bad authentication */ uint64_t carps_badvhid; /* bad VHID */ uint64_t carps_badaddrs; /* bad address list */ uint64_t carps_opackets; /* total output packets, IPv4 */ uint64_t carps_opackets6; /* total output packets, IPv6 */ uint64_t carps_onomem; /* no memory for an mbuf */ uint64_t carps_ostates; /* total state updates sent */ uint64_t carps_preempt; /* if enabled, preemptions */ }; /* * Configuration structure for SIOCSVH SIOCGVH */ struct carpreq { int carpr_count; int carpr_vhid; #define CARP_MAXVHID 255 int carpr_state; #define CARP_STATES "INIT", "BACKUP", "MASTER" #define CARP_MAXSTATE 2 int carpr_advskew; #define CARP_MAXSKEW 240 int carpr_advbase; unsigned char carpr_key[CARP_KEY_LEN]; }; #define SIOCSVH _IOWR('i', 245, struct ifreq) #define SIOCGVH _IOWR('i', 246, struct ifreq) +typedef enum carp_version { + CARP_VERSION_CARP = 2, + CARP_VERSION_VRRPv3 = 3, +} carp_version_t; + #ifdef _KERNEL int carp_ioctl(struct ifreq *, u_long, struct thread *); int carp_attach(struct ifaddr *, int); void carp_detach(struct ifaddr *, bool); void carp_carpdev_state(struct ifnet *); int carp_output (struct ifnet *, struct mbuf *, const struct sockaddr *); int carp_master(struct ifaddr *); int carp_iamatch(struct ifaddr *, uint8_t **); struct ifaddr *carp_iamatch6(struct ifnet *, struct in6_addr *); char * carp_macmatch6(struct ifnet *, struct mbuf *, const struct in6_addr *); int carp_forus(struct ifnet *, u_char *); /* These are external networking stack hooks for CARP */ /* net/if.c */ extern int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); extern int (*carp_attach_p)(struct ifaddr *, int); extern void (*carp_detach_p)(struct ifaddr *, bool); extern void (*carp_linkstate_p)(struct ifnet *); extern void (*carp_demote_adj_p)(int, char *); extern int (*carp_master_p)(struct ifaddr *); /* net/if_bridge.c net/if_ethersubr.c */ extern int (*carp_forus_p)(struct ifnet *, u_char *); /* net/if_ethersubr.c */ extern int (*carp_output_p)(struct ifnet *, struct mbuf *, const struct sockaddr *); /* net/rtsock.c */ extern int (*carp_get_vhid_p)(struct ifaddr *); #ifdef INET /* netinet/if_ether.c */ extern int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); #endif #ifdef INET6 /* netinet6/nd6_nbr.c */ extern struct ifaddr *(*carp_iamatch6_p)(struct ifnet *, struct in6_addr *); extern char * (*carp_macmatch6_p)(struct ifnet *, struct mbuf *, const struct in6_addr *); #endif #endif #endif /* _IP_CARP_H */ diff --git a/sys/netinet/ip_carp_nl.h b/sys/netinet/ip_carp_nl.h index 89720af3e0dc..de4c0367c1d3 100644 --- a/sys/netinet/ip_carp_nl.h +++ b/sys/netinet/ip_carp_nl.h @@ -1,37 +1,40 @@ #ifndef _IP_CARP_NL_H #define _IP_CARP_NL_H #include #include #include /* * Netlink interface to carp(4). */ #define CARP_NL_FAMILY_NAME "carp" /* commands */ enum { CARP_NL_CMD_UNSPEC = 0, CARP_NL_CMD_GET = 1, CARP_NL_CMD_SET = 2, __CARP_NL_CMD_MAX, }; #define CARP_NL_CMD_MAX (__CARP_NL_CMD_MAX - 1) enum carp_nl_type_t { CARP_NL_UNSPEC, CARP_NL_VHID = 1, /* u32 */ CARP_NL_STATE = 2, /* u32 */ CARP_NL_ADVBASE = 3, /* s32 */ CARP_NL_ADVSKEW = 4, /* s32 */ CARP_NL_KEY = 5, /* byte array */ CARP_NL_IFINDEX = 6, /* u32 */ CARP_NL_ADDR = 7, /* in_addr_t */ CARP_NL_ADDR6 = 8, /* in6_addr_t */ CARP_NL_IFNAME = 9, /* string */ + CARP_NL_VERSION = 10, /* u8 */ + CARP_NL_VRRP_PRIORITY = 11, /* u8 */ + CARP_NL_VRRP_ADV_INTER = 12, /* u16, 12-bit field in centiseconds*/ }; #endif