diff --git a/lib/libifconfig/libifconfig.h b/lib/libifconfig/libifconfig.h --- a/lib/libifconfig/libifconfig.h +++ b/lib/libifconfig/libifconfig.h @@ -287,6 +287,9 @@ uint8_t carpr_key[CARP_KEY_LEN]; struct in_addr carpr_addr; struct in6_addr carpr_addr6; + carp_version_t carpr_version; + uint8_t carpr_vrrp_prio; + uint16_t carpr_vrrp_adv_inter; }; int ifconfig_carp_get_vhid(ifconfig_handle_t *h, const char *name, diff --git a/lib/libifconfig/libifconfig_carp.c b/lib/libifconfig/libifconfig_carp.c --- a/lib/libifconfig/libifconfig_carp.c +++ b/lib/libifconfig/libifconfig_carp.c @@ -56,6 +56,9 @@ { .type = CARP_NL_KEY, .off = _OUT(carpr_key), .cb = snl_attr_copy_string, .arg_u32 = CARP_KEY_LEN }, { .type = CARP_NL_ADDR, .off = _OUT(carpr_addr), .cb = snl_attr_get_in_addr }, { .type = CARP_NL_ADDR6, .off = _OUT(carpr_addr6), .cb = snl_attr_get_in6_addr }, + { .type = CARP_NL_VERSION, .off = _OUT(carpr_version), .cb = snl_attr_get_uint8 }, + { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(carpr_vrrp_prio), .cb = snl_attr_get_uint8 }, + { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(carpr_vrrp_adv_inter), .cb = snl_attr_get_uint16 }, }; #undef _OUT @@ -175,6 +178,9 @@ snl_add_msg_attr(&nw, CARP_NL_ADDR6, sizeof(carpr->carpr_addr6), &carpr->carpr_addr6); snl_add_msg_attr_string(&nw, CARP_NL_KEY, carpr->carpr_key); + snl_add_msg_attr_u8(&nw, CARP_NL_VERSION, carpr->carpr_version); + snl_add_msg_attr_u8(&nw, CARP_NL_VRRP_PRIORITY, carpr->carpr_vrrp_prio); + snl_add_msg_attr_u16(&nw, CARP_NL_VRRP_ADV_INTER, carpr->carpr_vrrp_adv_inter); hdr = snl_finalize_msg(&nw); if (hdr == NULL) { diff --git a/sbin/ifconfig/carp.c b/sbin/ifconfig/carp.c --- a/sbin/ifconfig/carp.c +++ b/sbin/ifconfig/carp.c @@ -68,6 +68,9 @@ static struct in_addr carp_addr; static struct in6_addr carp_addr6; static unsigned char const *carpr_key; +static carp_version_t carpr_version; +static uint8_t carpr_vrrp_prio; +static uint16_t carpr_vrrp_adv_inter; static void carp_status(if_ctx *ctx) @@ -79,19 +82,28 @@ return; for (size_t i = 0; i < carpr[0].carpr_count; i++) { - printf("\tcarp: %s vhid %d advbase %d advskew %d", - carp_states[carpr[i].carpr_state], carpr[i].carpr_vhid, - carpr[i].carpr_advbase, carpr[i].carpr_advskew); - if (ctx->args->printkeys && carpr[i].carpr_key[0] != '\0') - printf(" key \"%s\"\n", carpr[i].carpr_key); - else - printf("\n"); - - inet_ntop(AF_INET6, &carpr[i].carpr_addr6, addr_buf, - sizeof(addr_buf)); - - printf("\t peer %s peer6 %s\n", - inet_ntoa(carpr[i].carpr_addr), addr_buf); + switch (carpr[i].carpr_version) { + case CARP_VERSION_CARP: + printf("\tcarp: %s vhid %d advbase %d advskew %d", + carp_states[carpr[i].carpr_state], carpr[i].carpr_vhid, + carpr[i].carpr_advbase, carpr[i].carpr_advskew); + if (ctx->args->printkeys && carpr[i].carpr_key[0] != '\0') + printf(" key \"%s\"\n", carpr[i].carpr_key); + else + printf("\n"); + + inet_ntop(AF_INET6, &carpr[i].carpr_addr6, addr_buf, + sizeof(addr_buf)); + + printf("\t peer %s peer6 %s\n", + inet_ntoa(carpr[i].carpr_addr), addr_buf); + break; + case CARP_VERSION_VRRPv3: + printf("\tvrrp: %s vrid %d prio %d interval %d\n", + carp_states[carpr[i].carpr_state], carpr[i].carpr_vhid, + carpr[i].carpr_vrrp_prio, carpr[i].carpr_vrrp_adv_inter); + break; + } } } @@ -137,6 +149,12 @@ if (! IN6_IS_ADDR_UNSPECIFIED(&carp_addr6)) memcpy(&carpr.carpr_addr6, &carp_addr6, sizeof(carp_addr6)); + if (carpr_version != 0) + carpr.carpr_version = carpr_version; + if (carpr_vrrp_prio != 0) + carpr.carpr_vrrp_prio = carpr_vrrp_prio; + if (carpr_vrrp_adv_inter != 0) + carpr.carpr_vrrp_adv_inter = carpr_vrrp_adv_inter; if (ifconfig_carp_set_info(lifh, ctx->ifname, &carpr)) err(1, "SIOCSVH"); @@ -226,6 +244,31 @@ carp_addr6.s6_addr[15] = 0x12; } +static void +setcarp_version(if_ctx *ctx __unused, const char *val, int dummy __unused) +{ + carpr_version = atoi(val); + + if (carpr_version != CARP_VERSION_CARP && carpr_version != CARP_VERSION_VRRPv3) + errx(1, "version must be %d or %d", CARP_VERSION_CARP, + CARP_VERSION_VRRPv3); +} + +static void +setvrrp_prio(if_ctx *ctx __unused, const char *val, int dummy __unused) +{ + carpr_vrrp_prio = atoi(val); +} + +static void +setvrrp_interval(if_ctx *ctx __unused, const char *val, int dummy __unused) +{ + carpr_vrrp_adv_inter = atoi(val); + + if (carpr_vrrp_adv_inter == 0 || carpr_vrrp_adv_inter > VRRP_MAX_INTERVAL) + errx(1, "vrrpinterval must be greater than 0 and less than %d", VRRP_MAX_INTERVAL); +} + static struct cmd carp_cmds[] = { DEF_CMD_ARG("advbase", setcarp_advbase), DEF_CMD_ARG("advskew", setcarp_advskew), @@ -236,6 +279,9 @@ DEF_CMD("mcast", 0, setcarp_mcast), DEF_CMD_ARG("peer6", setcarp_peer6), DEF_CMD("mcast6", 0, setcarp_mcast6), + DEF_CMD_ARG("carpver", setcarp_version), + DEF_CMD_ARG("vrrpprio", setvrrp_prio), + DEF_CMD_ARG("vrrpinterval", setvrrp_interval), }; static struct afswtch af_carp = { .af_name = "af_carp", diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h --- a/sys/netinet/ip_carp.h +++ b/sys/netinet/ip_carp.h @@ -31,6 +31,7 @@ #ifndef _IP_CARP_H #define _IP_CARP_H +#ifdef _KERNEL /* * The CARP header layout is as follows: * @@ -77,14 +78,53 @@ unsigned char carp_md[20]; /* SHA1 HMAC */ } __packed; -#ifdef CTASSERT CTASSERT(sizeof(struct carp_header) == 36); + +/* + * The VRRPv3 header layout is as follows: + * See RFC9568, 5.1. VRRP Packet Format + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Version| Type | Virtual Rtr ID| Priority |Count IPvX Addr| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |(rsvd) | Max Adver Int | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * + + + * | IPvX Address(es) | + * + + + * + + + * + + + * + + + * | | + * + + + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ + +struct vrrpv3_header { +#if BYTE_ORDER == LITTLE_ENDIAN + uint8_t vrrp_type:4, + vrrp_version:4; +#endif +#if BYTE_ORDER == BIG_ENDIAN + uint8_t vrrp_version:4, + vrrp_type:4; #endif + uint8_t vrrp_vrtid; + uint8_t vrrp_priority; + uint8_t vrrp_count_addr; + uint16_t vrrp_max_adver_int; + uint16_t vrrp_checksum; +} __packed; -#define CARP_DFLTTL 255 +CTASSERT(sizeof(struct vrrpv3_header) == 8); +#endif /* _KERNEL */ -/* carp_version */ -#define CARP_VERSION 2 +#define CARP_DFLTTL 255 /* carp_type */ #define CARP_ADVERTISEMENT 0x01 @@ -94,6 +134,8 @@ /* carp_advbase */ #define CARP_DFLTINTV 1 +#define VRRP_TYPE_ADVERTISEMENT 0x01 +#define VRRP_MAX_INTERVAL (0x1000 - 1) /* * Statistics. */ @@ -136,6 +178,11 @@ #define SIOCSVH _IOWR('i', 245, struct ifreq) #define SIOCGVH _IOWR('i', 246, struct ifreq) +typedef enum carp_version { + CARP_VERSION_CARP = 2, + CARP_VERSION_VRRPv3 = 3, +} carp_version_t; + #ifdef _KERNEL int carp_ioctl(struct ifreq *, u_long, struct thread *); int carp_attach(struct ifaddr *, int); diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -95,6 +96,7 @@ struct carp_softc { struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ struct ifaddr **sc_ifas; /* Our ifaddrs. */ + carp_version_t sc_version; /* carp or VRRPv3 */ struct sockaddr_dl sc_addr; /* Our link level address. */ struct callout sc_ad_tmo; /* Advertising timeout. */ #ifdef INET @@ -106,10 +108,17 @@ struct mtx sc_mtx; int sc_vhid; - int sc_advskew; - int sc_advbase; - struct in_addr sc_carpaddr; - struct in6_addr sc_carpaddr6; + struct { /* CARP specific context */ + int sc_advskew; + int sc_advbase; + struct in_addr sc_carpaddr; + struct in6_addr sc_carpaddr6; + }; + struct { /* VRRPv3 specific context */ + uint8_t sc_vrrp_prio; + uint16_t sc_vrrp_adv_inter; + uint16_t sc_vrrp_master_inter; + }; int sc_naddrs; int sc_naddrs6; @@ -166,6 +175,9 @@ /* Everything above this is identical to carpreq */ struct in_addr carpr_addr; struct in6_addr carpr_addr6; + carp_version_t carpr_version; + uint8_t carpr_vrrp_priority; + uint16_t carpr_vrrp_adv_inter; }; /* @@ -325,6 +337,7 @@ 0 : ((sc)->sc_advskew + V_carp_demotion))) static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); +static void vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t); static struct carp_softc *carp_alloc(struct ifnet *); static void carp_destroy(struct carp_softc *); @@ -339,6 +352,7 @@ const char* reason); static void carp_send_ad(void *); static void carp_send_ad_locked(struct carp_softc *); +static void vrrp_send_ad_locked(struct carp_softc *); static void carp_addroute(struct carp_softc *); static void carp_ifa_addroute(struct ifaddr *); static void carp_delroute(struct carp_softc *); @@ -373,7 +387,7 @@ static void carp_hmac_prepare(struct carp_softc *sc) { - uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; + uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT; uint8_t vhid = sc->sc_vhid & 0xff; struct ifaddr *ifa; int i, found; @@ -478,6 +492,22 @@ return (bcmp(md, md2, sizeof(md2))); } +static int +vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum) +{ + uint16_t cksum; + + /* + * Note that VRRPv3 checksums are different from CARP checksums. + * Carp just calculates the checksum over the packet. + * VRRPv3 includes the pseudo-header checksum as well. + */ + cksum = in_cksum_skip(m, off + len, off); + cksum -= phdrcksum; + + return (cksum); +} + /* * process input packet. * we have rearranged checks order compared to the rfc, @@ -489,8 +519,11 @@ { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); - struct carp_header *ch; - int iplen, len; + struct vrrpv3_header *vh; + int iplen; + int minlen; + int totlen; + uint16_t phdrcksum = 0; iplen = *offp; *mp = NULL; @@ -503,59 +536,93 @@ } iplen = ip->ip_hl << 2; + totlen = ntohs(ip->ip_len); - if (m->m_pkthdr.len < iplen + sizeof(*ch)) { + /* Ensure we have enough header to figure out the version. */ + if (m->m_pkthdr.len < iplen + sizeof(*vh)) { CARPSTATS_INC(carps_badlen); - CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " + CARP_DEBUG("%s: received len %zd < sizeof(struct vrrpv3_header) " "on %s\n", __func__, m->m_len - sizeof(struct ip), if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - if (iplen + sizeof(*ch) < m->m_len) { - if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { + if (iplen + sizeof(*vh) < m->m_len) { + if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) { CARPSTATS_INC(carps_hdrops); - CARP_DEBUG("%s: pullup failed\n", __func__); + CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); } - ch = (struct carp_header *)((char *)ip + iplen); + vh = (struct vrrpv3_header *)((char *)ip + iplen); - /* - * verify that the received packet length is - * equal to the CARP header - */ - len = iplen + sizeof(*ch); - if (len > m->m_pkthdr.len) { + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: + minlen = sizeof(struct carp_header); + break; + case CARP_VERSION_VRRPv3: + minlen = sizeof(struct vrrpv3_header); + break; + default: + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, + vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + + /* And now check the length again but with the real minimal length. */ + if (m->m_pkthdr.len < iplen + minlen) { CARPSTATS_INC(carps_badlen); - CARP_DEBUG("%s: packet too short %d on %s\n", __func__, - m->m_pkthdr.len, + CARP_DEBUG("%s: received len %zd < %d " + "on %s\n", __func__, m->m_len - sizeof(struct ip), + iplen + minlen, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - if ((m = m_pullup(m, len)) == NULL) { - CARPSTATS_INC(carps_hdrops); - return (IPPROTO_DONE); + if (iplen + minlen < m->m_len) { + if ((m = m_pullup(m, iplen + minlen)) == NULL) { + CARPSTATS_INC(carps_hdrops); + CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); + return (IPPROTO_DONE); + } + ip = mtod(m, struct ip *); + vh = (struct vrrpv3_header *)((char *)ip + iplen); } - ip = mtod(m, struct ip *); - ch = (struct carp_header *)((char *)ip + iplen); - /* verify the CARP checksum */ - m->m_data += iplen; - if (in_cksum(m, len - iplen)) { - CARPSTATS_INC(carps_badsum); - CARP_DEBUG("%s: checksum failed on %s\n", __func__, - if_name(m->m_pkthdr.rcvif)); - m_freem(m); - return (IPPROTO_DONE); + phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htonl((u_short)(totlen - iplen) + ip->ip_p)); + + if (vh->vrrp_version == CARP_VERSION_CARP) { + /* verify the CARP checksum */ + m->m_data += iplen; + if (in_cksum(m, totlen - iplen)) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: checksum failed on %s\n", __func__, + if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + m->m_data -= iplen; + } + + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: { + struct carp_header *ch = (struct carp_header *)((char *)ip + iplen); + carp_input_c(m, ch, AF_INET, ip->ip_ttl); + break; + } + case CARP_VERSION_VRRPv3: + vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen, phdrcksum); + break; + default: + KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); } - m->m_data -= iplen; - carp_input_c(m, ch, AF_INET, ip->ip_ttl); return (IPPROTO_DONE); } #endif @@ -566,8 +633,9 @@ { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - struct carp_header *ch; - u_int len; + struct vrrpv3_header *vh; + u_int len, minlen; + uint16_t phdrcksum = 0; CARPSTATS_INC(carps_ipackets6); @@ -585,10 +653,9 @@ return (IPPROTO_DONE); } - /* verify that we have a complete carp packet */ - if (m->m_len < *offp + sizeof(*ch)) { + if (m->m_len < *offp + sizeof(*vh)) { len = m->m_len; - m = m_pullup(m, *offp + sizeof(*ch)); + m = m_pullup(m, *offp + sizeof(*vh)); if (m == NULL) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: packet size %u too small\n", __func__, len); @@ -596,20 +663,72 @@ } ip6 = mtod(m, struct ip6_hdr *); } - ch = (struct carp_header *)(mtod(m, char *) + *offp); + vh = (struct vrrpv3_header *)(mtod(m, char *) + *offp); - /* verify the CARP checksum */ - m->m_data += *offp; - if (in_cksum(m, sizeof(*ch))) { - CARPSTATS_INC(carps_badsum); - CARP_DEBUG("%s: checksum failed, on %s\n", __func__, + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: + minlen = sizeof(struct carp_header); + break; + case CARP_VERSION_VRRPv3: + minlen = sizeof(struct vrrpv3_header); + break; + default: + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, + vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + + /* And now check the length again but with the real minimal length. */ + if (m->m_pkthdr.len < sizeof(*ip6) + minlen) { + CARPSTATS_INC(carps_badlen); + CARP_DEBUG("%s: received len %zd < %zd " + "on %s\n", __func__, m->m_len - sizeof(struct ip), + sizeof(*ip6) + minlen, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - m->m_data -= *offp; - carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); + if (sizeof (*ip6) + minlen < m->m_len) { + if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) { + CARPSTATS_INC(carps_hdrops); + CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); + return (IPPROTO_DONE); + } + ip6 = mtod(m, struct ip6_hdr *); + vh = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); + } + + phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), ip6->ip6_nxt, 0); + + /* verify the CARP checksum */ + if (vh->vrrp_version == CARP_VERSION_CARP) { + m->m_data += *offp; + if (in_cksum(m, sizeof(struct carp_header))) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: checksum failed, on %s\n", __func__, + if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + m->m_data -= *offp; + } + + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: { + struct carp_header *ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6)); + carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); + break; + } + case CARP_VERSION_VRRPv3: + vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim, ntohs(ip6->ip6_plen), + phdrcksum); + break; + default: + KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); + } return (IPPROTO_DONE); } #endif /* INET6 */ @@ -629,7 +748,7 @@ * The VHID test is outside this mini-function. */ static int -carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) +carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af) { #ifdef INET struct ip *ip4; @@ -659,16 +778,12 @@ return (0); } -static void -carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) +static struct ifaddr * +carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa, *match; - struct carp_softc *sc; - uint64_t tmp_counter; - struct timeval sc_tv, ch_tv; int error; - bool multicast = false; NET_EPOCH_ASSERT(); @@ -688,9 +803,9 @@ IFNET_FOREACH_IFA(ifp, ifa) { if (match == NULL && ifa->ifa_carp != NULL && ifa->ifa_addr->sa_family == af && - ifa->ifa_carp->sc_vhid == ch->carp_vhid) + ifa->ifa_carp->sc_vhid == vhid) match = ifa; - if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) + if (vhid == 0 && carp_source_is_self(m, ifa, af)) error = ELOOP; } ifa = error ? NULL : match; @@ -705,12 +820,37 @@ } else { CARPSTATS_INC(carps_badvhid); } + } + + return (ifa); +} + +static void +carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ifaddr *ifa; + struct carp_softc *sc; + uint64_t tmp_counter; + struct timeval sc_tv, ch_tv; + bool multicast = false; + + NET_EPOCH_ASSERT(); + + ifa = carp_find_ifa(m, af, ch->carp_vhid); + if (ifa == NULL) { m_freem(m); return; } + sc = ifa->ifa_carp; + CARP_LOCK(sc); + /* verify the CARP version. */ - if (ch->carp_version != CARP_VERSION) { + if (ch->carp_version != CARP_VERSION_CARP || + sc->sc_version != CARP_VERSION_CARP) { + CARP_UNLOCK(sc); + CARPSTATS_INC(carps_badver); CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), ch->carp_version); @@ -719,8 +859,6 @@ return; } - sc = ifa->ifa_carp; - CARP_LOCK(sc); if (ifa->ifa_addr->sa_family == AF_INET) { multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr); } else { @@ -809,11 +947,130 @@ m_freem(m); } +static void +vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl, + int len, uint16_t phdrcksum) +{ + struct vrrpv3_header *vh = mtodo(m, off); + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ifaddr *ifa; + struct carp_softc *sc; + + NET_EPOCH_ASSERT(); + + ifa = carp_find_ifa(m, af, vh->vrrp_vrtid); + if (ifa == NULL) { + m_freem(m); + return; + } + + sc = ifa->ifa_carp; + CARP_LOCK(sc); + + ifa_free(ifa); + + /* verify the CARP version. */ + if (vh->vrrp_version != CARP_VERSION_VRRPv3 || sc->sc_version != CARP_VERSION_VRRPv3) { + CARP_UNLOCK(sc); + + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), + vh->vrrp_version); + m_freem(m); + return; + } + + /* verify that the IP TTL is 255. */ + if (ttl != CARP_DFLTTL) { + CARPSTATS_INC(carps_badttl); + CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, + ttl, if_name(m->m_pkthdr.rcvif)); + goto out; + } + + if (vrrp_checksum_verify(m, off, len, phdrcksum)) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__, + sc->sc_vhid, if_name(ifp)); + goto out; + } + + /* RFC9568, 7.1 Receiving VRRP packets. */ + if (sc->sc_vrrp_prio == 255) { + CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n", + __func__); + goto out; + } + + /* XXX TODO Check IP address payload. */ + + sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int); + + switch (sc->sc_state) { + case INIT: + break; + case MASTER: + /* + * If we receive an advertisement from a master who's going to + * be more frequent than us, go into BACKUP state. + * Same if the peer has a higher priority than us. + */ + if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter || + vh->vrrp_priority > sc->sc_vrrp_prio) { + callout_stop(&sc->sc_ad_tmo); + carp_set_state(sc, BACKUP, + "more frequent advertisement received"); + carp_setrun(sc, 0); + carp_delroute(sc); + } + break; + case BACKUP: + /* + * If we're pre-empting masters who advertise slower than us, + * and this one claims to be slower, treat him as down. + */ + if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter + || vh->vrrp_priority < sc->sc_vrrp_prio)) { + carp_master_down_locked(sc, + "preempting a slower master"); + break; + } + + /* + * Otherwise, we reset the counter and wait for the next + * advertisement. + */ + carp_setrun(sc, af); + break; + } + +out: + CARP_UNLOCK(sc); + m_freem(m); +} + static int -carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) +carp_tag(struct carp_softc *sc, struct mbuf *m) { struct m_tag *mtag; + /* Tag packet for carp_output */ + if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), + M_NOWAIT)) == NULL) { + m_freem(m); + CARPSTATS_INC(carps_onomem); + return (ENOMEM); + } + bcopy(&sc, mtag + 1, sizeof(sc)); + m_tag_prepend(m, mtag); + + return (0); +} + +static int +carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) +{ + if (sc->sc_init_counter) { /* this could also be seconds since unix epoch */ sc->sc_counter = arc4random(); @@ -827,17 +1084,7 @@ carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); - /* Tag packet for carp_output */ - if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), - M_NOWAIT)) == NULL) { - m_freem(m); - CARPSTATS_INC(carps_onomem); - return (ENOMEM); - } - bcopy(&sc, mtag + 1, sizeof(sc)); - m_tag_prepend(m, mtag); - - return (0); + return (carp_tag(sc, m)); } /* @@ -959,11 +1206,16 @@ NET_EPOCH_ASSERT(); CARP_LOCK_ASSERT(sc); + if (sc->sc_version == CARP_VERSION_VRRPv3) { + vrrp_send_ad_locked(sc); + return; + } + advskew = DEMOTE_ADVSKEW(sc); tv.tv_sec = sc->sc_advbase; tv.tv_usec = advskew * 1000000 / 256; - ch.carp_version = CARP_VERSION; + ch.carp_version = CARP_VERSION_CARP; ch.carp_type = CARP_ADVERTISEMENT; ch.carp_vhid = sc->sc_vhid; ch.carp_advbase = sc->sc_advbase; @@ -1090,6 +1342,187 @@ callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); } +static void +vrrp_send_ad_locked(struct carp_softc *sc) +{ + struct vrrpv3_header *vh_ptr; + struct ifaddr *ifa; + struct mbuf *m; + int len; + struct vrrpv3_header vh = { + .vrrp_version = CARP_VERSION_VRRPv3, + .vrrp_type = VRRP_TYPE_ADVERTISEMENT, + .vrrp_vrtid = sc->sc_vhid, + .vrrp_priority = sc->sc_vrrp_prio, + .vrrp_count_addr = 0, + .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter), + .vrrp_checksum = 0, + }; + + NET_EPOCH_ASSERT(); + CARP_LOCK_ASSERT(sc); + MPASS(sc->sc_version == CARP_VERSION_VRRPv3); + +#ifdef INET + if (sc->sc_naddrs) { + struct ip *ip; + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + CARPSTATS_INC(carps_onomem); + goto resched; + } + len = sizeof(*ip) + sizeof(vh); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + M_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; + ip->ip_off = htons(IP_DF); + ip->ip_ttl = CARP_DFLTTL; + ip->ip_p = IPPROTO_CARP; + ip->ip_sum = 0; + ip_fillid(ip); + + ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); + if (ifa != NULL) { + ip->ip_src.s_addr = + ifatoia(ifa)->ia_addr.sin_addr.s_addr; + ifa_free(ifa); + } else + ip->ip_src.s_addr = 0; + ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); + + /* Include the IP addresses in the announcement. */ + for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { + struct sockaddr_in *in; + + MPASS(sc->sc_ifas[i] != NULL); + if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET) + continue; + + in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr; + + if (m_append(m, sizeof(in->sin_addr), + (caddr_t)&in->sin_addr) != 1) { + m_freem(m); + goto resched; + } + + vh.vrrp_count_addr++; + len += sizeof(in->sin_addr); + } + ip->ip_len = htons(len); + + vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip)); + bcopy(&vh, vh_ptr, sizeof(vh)); + + vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, + htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p)); + vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip)); + + if (carp_tag(sc, m)) + goto resched; + + CARPSTATS_INC(carps_opackets); + + carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, + &sc->sc_carpdev->if_carp->cif_imo, NULL)); + } +#endif +#ifdef INET6 + if (sc->sc_naddrs6) { + struct ip6_hdr *ip6; + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + CARPSTATS_INC(carps_onomem); + goto resched; + } + len = sizeof(*ip6) + sizeof(vh); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + M_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip6 = mtod(m, struct ip6_hdr *); + bzero(ip6, sizeof(*ip6)); + ip6->ip6_vfc |= IPV6_VERSION; + /* Traffic class isn't defined in ip6 struct instead + * it gets offset into flowid field */ + ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + + IPTOS_DSCP_OFFSET)); + ip6->ip6_hlim = CARP_DFLTTL; + ip6->ip6_nxt = IPPROTO_CARP; + + /* set the source address */ + ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); + if (ifa != NULL) { + bcopy(IFA_IN6(ifa), &ip6->ip6_src, + sizeof(struct in6_addr)); + ifa_free(ifa); + } else + /* This should never happen with IPv6. */ + bzero(&ip6->ip6_src, sizeof(struct in6_addr)); + + /* Set the multicast destination. */ + bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst)); + ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; + ip6->ip6_dst.s6_addr8[15] = 0x12; + + /* Include the IP addresses in the announcement. */ + len = sizeof(vh); + for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { + struct sockaddr_in6 *in6; + + MPASS(sc->sc_ifas[i] != NULL); + if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6) + continue; + + in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr; + + if (m_append(m, sizeof(in6->sin6_addr), + (char *)&in6->sin6_addr) != 1) { + m_freem(m); + goto resched; + } + + vh.vrrp_count_addr++; + len += sizeof(in6->sin6_addr); + } + ip6->ip6_plen = htonl(len); + + vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); + bcopy(&vh, vh_ptr, sizeof(vh)); + + vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0); + vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6)); + + if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { + m_freem(m); + CARP_DEBUG("%s: in6_setscope failed\n", __func__); + goto resched; + } + + if (carp_tag(sc, m)) + goto resched; + CARPSTATS_INC(carps_opackets6); + + carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, + &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); + } +#endif + +resched: + callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100, + carp_send_ad, sc); +} + static void carp_addroute(struct carp_softc *sc) { @@ -1357,6 +1790,7 @@ carp_setrun(struct carp_softc *sc, sa_family_t af) { struct timeval tv; + int timeout; CARP_LOCK_ASSERT(sc); @@ -1373,40 +1807,57 @@ break; case BACKUP: callout_stop(&sc->sc_ad_tmo); - tv.tv_sec = 3 * sc->sc_advbase; - tv.tv_usec = sc->sc_advskew * 1000000 / 256; + + if (sc->sc_version == CARP_VERSION_CARP) { + tv.tv_sec = 3 * sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + timeout = tvtohz(&tv); + } else { + /* skew time */ + timeout = (256 - sc->sc_vrrp_prio) * sc->sc_vrrp_master_inter / 256; + timeout += (3 * sc->sc_vrrp_master_inter); + timeout *= hz; + timeout /= 100; /* master interval is in centiseconds. */ + } + switch (af) { #ifdef INET case AF_INET: - callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md_tmo, timeout, carp_master_down, sc); break; #endif #ifdef INET6 case AF_INET6: - callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md6_tmo, timeout, carp_master_down, sc); break; #endif default: #ifdef INET if (sc->sc_naddrs) - callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md_tmo, timeout, carp_master_down, sc); #endif #ifdef INET6 if (sc->sc_naddrs6) - callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md6_tmo, timeout, carp_master_down, sc); #endif break; } break; case MASTER: - tv.tv_sec = sc->sc_advbase; - tv.tv_usec = sc->sc_advskew * 1000000 / 256; - callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), - carp_send_ad, sc); + if (sc->sc_version == CARP_VERSION_CARP) { + tv.tv_sec = sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_send_ad, sc); + } else { + callout_reset(&sc->sc_ad_tmo, + sc->sc_vrrp_adv_inter * hz / 100, + carp_send_ad, sc); + } break; } } @@ -1634,6 +2085,7 @@ sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); + sc->sc_version = CARP_VERSION_CARP; sc->sc_advbase = CARP_DFLTINTV; sc->sc_vhid = -1; /* required setting */ sc->sc_init_counter = 1; @@ -1647,6 +2099,10 @@ sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; sc->sc_carpaddr6.s6_addr8[15] = 0x12; + sc->sc_vrrp_adv_inter = 100; + sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter; + sc->sc_vrrp_prio = 100; + CARP_LOCK_INIT(sc); #ifdef INET callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); @@ -1788,9 +2244,10 @@ struct carp_softc *sc = NULL; int error = 0; - if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID || - carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) { + carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0 || + (carpr->carpr_version != CARP_VERSION_CARP && + carpr->carpr_version != CARP_VERSION_VRRPv3)) { return (EINVAL); } @@ -1811,6 +2268,7 @@ LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; } else CARP_LOCK(sc); + sc->sc_version = carpr->carpr_version; if (carpr->carpr_advbase > 0) { if (carpr->carpr_advbase > 255 || carpr->carpr_advbase < CARP_DFLTINTV) { @@ -1834,6 +2292,11 @@ bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); carp_hmac_prepare(sc); } + if (carpr->carpr_vrrp_priority != 0) + sc->sc_vrrp_prio = carpr->carpr_vrrp_priority; + if (carpr->carpr_vrrp_adv_inter) + sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter; + if (sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) { switch (carpr->carpr_state) { @@ -2283,6 +2746,9 @@ nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); + nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version); + nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio); + nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER, sc->sc_vrrp_adv_inter); if (priv) nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key); @@ -2307,6 +2773,9 @@ char key[CARP_KEY_LEN]; struct in_addr addr; struct in6_addr addr6; + carp_version_t version; + uint8_t vrrp_prio; + uint16_t vrrp_adv_inter; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) @@ -2322,6 +2791,9 @@ { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string }, + { .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 }, + { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 }, + { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 }, }; static const struct nlfield_parser nlf_p_set[] = { }; @@ -2399,6 +2871,13 @@ return (EINVAL); if (attrs.advskew >= 255) return (EINVAL); + if (attrs.version == 0) + attrs.version = CARP_VERSION_CARP; + if (attrs.version != CARP_VERSION_CARP && + attrs.version != CARP_VERSION_VRRPv3) + return (EINVAL); + if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL) + return (EINVAL); NET_EPOCH_ENTER(et); if (attrs.ifname != NULL) @@ -2422,6 +2901,9 @@ carpr.carpr_advskew = attrs.advskew; carpr.carpr_addr = attrs.addr; carpr.carpr_addr6 = attrs.addr6; + carpr.carpr_version = attrs.version; + carpr.carpr_vrrp_priority = attrs.vrrp_prio; + carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter; memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); diff --git a/sys/netinet/ip_carp_nl.h b/sys/netinet/ip_carp_nl.h --- a/sys/netinet/ip_carp_nl.h +++ b/sys/netinet/ip_carp_nl.h @@ -32,6 +32,9 @@ CARP_NL_ADDR = 7, /* in_addr_t */ CARP_NL_ADDR6 = 8, /* in6_addr_t */ CARP_NL_IFNAME = 9, /* string */ + CARP_NL_VERSION = 10, /* u8 */ + CARP_NL_VRRP_PRIORITY = 11, /* u8 */ + CARP_NL_VRRP_ADV_INTER = 12, /* u16, 12-bit field in centiseconds*/ }; #endif