diff --git a/lib/libifconfig/libifconfig.h b/lib/libifconfig/libifconfig.h --- a/lib/libifconfig/libifconfig.h +++ b/lib/libifconfig/libifconfig.h @@ -287,6 +287,9 @@ uint8_t carpr_key[CARP_KEY_LEN]; struct in_addr carpr_addr; struct in6_addr carpr_addr6; + carp_version_t carpr_version; + uint8_t carpr_vrrp_prio; + uint16_t carpr_vrrp_adv_inter; }; int ifconfig_carp_get_vhid(ifconfig_handle_t *h, const char *name, diff --git a/lib/libifconfig/libifconfig_carp.c b/lib/libifconfig/libifconfig_carp.c --- a/lib/libifconfig/libifconfig_carp.c +++ b/lib/libifconfig/libifconfig_carp.c @@ -56,6 +56,9 @@ { .type = CARP_NL_KEY, .off = _OUT(carpr_key), .cb = snl_attr_copy_string, .arg_u32 = CARP_KEY_LEN }, { .type = CARP_NL_ADDR, .off = _OUT(carpr_addr), .cb = snl_attr_get_in_addr }, { .type = CARP_NL_ADDR6, .off = _OUT(carpr_addr6), .cb = snl_attr_get_in6_addr }, + { .type = CARP_NL_VERSION, .off = _OUT(carpr_version), .cb = snl_attr_get_uint8 }, + { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(carpr_vrrp_prio), .cb = snl_attr_get_uint8 }, + { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(carpr_vrrp_adv_inter), .cb = snl_attr_get_uint16 }, }; #undef _OUT @@ -175,6 +178,9 @@ snl_add_msg_attr(&nw, CARP_NL_ADDR6, sizeof(carpr->carpr_addr6), &carpr->carpr_addr6); snl_add_msg_attr_string(&nw, CARP_NL_KEY, carpr->carpr_key); + snl_add_msg_attr_u8(&nw, CARP_NL_VERSION, carpr->carpr_version); + snl_add_msg_attr_u8(&nw, CARP_NL_VRRP_PRIORITY, carpr->carpr_vrrp_prio); + snl_add_msg_attr_u16(&nw, CARP_NL_VRRP_ADV_INTER, carpr->carpr_vrrp_adv_inter); hdr = snl_finalize_msg(&nw); if (hdr == NULL) { diff --git a/sbin/ifconfig/carp.c b/sbin/ifconfig/carp.c --- a/sbin/ifconfig/carp.c +++ b/sbin/ifconfig/carp.c @@ -68,6 +68,9 @@ static struct in_addr carp_addr; static struct in6_addr carp_addr6; static unsigned char const *carpr_key; +static carp_version_t carpr_version; +static uint8_t carpr_vrrp_prio; +static uint16_t carpr_vrrp_adv_inter; static void carp_status(if_ctx *ctx) @@ -79,19 +82,28 @@ return; for (size_t i = 0; i < carpr[0].carpr_count; i++) { - printf("\tcarp: %s vhid %d advbase %d advskew %d", - carp_states[carpr[i].carpr_state], carpr[i].carpr_vhid, - carpr[i].carpr_advbase, carpr[i].carpr_advskew); - if (ctx->args->printkeys && carpr[i].carpr_key[0] != '\0') - printf(" key \"%s\"\n", carpr[i].carpr_key); - else - printf("\n"); - - inet_ntop(AF_INET6, &carpr[i].carpr_addr6, addr_buf, - sizeof(addr_buf)); - - printf("\t peer %s peer6 %s\n", - inet_ntoa(carpr[i].carpr_addr), addr_buf); + switch (carpr[i].carpr_version) { + case CARP_VERSION_CARP: + printf("\tcarp: %s vhid %d advbase %d advskew %d", + carp_states[carpr[i].carpr_state], carpr[i].carpr_vhid, + carpr[i].carpr_advbase, carpr[i].carpr_advskew); + if (ctx->args->printkeys && carpr[i].carpr_key[0] != '\0') + printf(" key \"%s\"\n", carpr[i].carpr_key); + else + printf("\n"); + + inet_ntop(AF_INET6, &carpr[i].carpr_addr6, addr_buf, + sizeof(addr_buf)); + + printf("\t peer %s peer6 %s\n", + inet_ntoa(carpr[i].carpr_addr), addr_buf); + break; + case CARP_VERSION_VRRPv3: + printf("\tvrrp: %s vrid %d prio %d interval %d\n", + carp_states[carpr[i].carpr_state], carpr[i].carpr_vhid, + carpr[i].carpr_vrrp_prio, carpr[i].carpr_vrrp_adv_inter); + break; + } } } @@ -137,6 +149,12 @@ if (! IN6_IS_ADDR_UNSPECIFIED(&carp_addr6)) memcpy(&carpr.carpr_addr6, &carp_addr6, sizeof(carp_addr6)); + if (carpr_version != 0) + carpr.carpr_version = carpr_version; + if (carpr_vrrp_prio != 0) + carpr.carpr_vrrp_prio = carpr_vrrp_prio; + if (carpr_vrrp_adv_inter != 0) + carpr.carpr_vrrp_adv_inter = carpr_vrrp_adv_inter; if (ifconfig_carp_set_info(lifh, ctx->ifname, &carpr)) err(1, "SIOCSVH"); @@ -226,6 +244,30 @@ carp_addr6.s6_addr[15] = 0x12; } +static void +setcarp_version(if_ctx *ctx __unused, const char *val, int dummy __unused) +{ + carpr_version = atoi(val); + + if (carpr_version != CARP_VERSION_CARP && carpr_version != CARP_VERSION_VRRPv3) + errx(1, "version must be 2 or 3"); +} + +static void +setvrrp_prio(if_ctx *ctx __unused, const char *val, int dummy __unused) +{ + carpr_vrrp_prio = atoi(val); +} + +static void +setvrrp_interval(if_ctx *ctx __unused, const char *val, int dummy __unused) +{ + carpr_vrrp_adv_inter = atoi(val); + + if (carpr_vrrp_adv_inter == 0 || carpr_vrrp_adv_inter > VRRP_MAX_INTERVAL) + errx(1, "vrrpinterval must be greater than 0 and less than %d", VRRP_MAX_INTERVAL); +} + static struct cmd carp_cmds[] = { DEF_CMD_ARG("advbase", setcarp_advbase), DEF_CMD_ARG("advskew", setcarp_advskew), @@ -236,6 +278,9 @@ DEF_CMD("mcast", 0, setcarp_mcast), DEF_CMD_ARG("peer6", setcarp_peer6), DEF_CMD("mcast6", 0, setcarp_mcast6), + DEF_CMD_ARG("carpver", setcarp_version), + DEF_CMD_ARG("vrrpprio", setvrrp_prio), + DEF_CMD_ARG("vrrpinterval", setvrrp_interval), }; static struct afswtch af_carp = { .af_name = "af_carp", diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h --- a/sys/netinet/ip_carp.h +++ b/sys/netinet/ip_carp.h @@ -31,6 +31,7 @@ #ifndef _IP_CARP_H #define _IP_CARP_H +#ifdef _KERNEL /* * The CARP header layout is as follows: * @@ -77,14 +78,53 @@ unsigned char carp_md[20]; /* SHA1 HMAC */ } __packed; -#ifdef CTASSERT CTASSERT(sizeof(struct carp_header) == 36); + +/* + * The VRRPv3 header layout is as follows: + * See RFC5798, 5.1. VRRP Packet Format + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Version| Type | Virtual Rtr ID| Priority |Count IPvX Addr| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |(rsvd) | Max Adver Int | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * + + + * | IPvX Address(es) | + * + + + * + + + * + + + * + + + * | | + * + + + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ + +struct vrrp_header { +#if BYTE_ORDER == LITTLE_ENDIAN + uint8_t vrrp_type:4, + vrrp_version:4; +#endif +#if BYTE_ORDER == BIG_ENDIAN + uint8_t vrrp_version:4, + vrrp_type:4; #endif + uint8_t vrrp_vrtid; + uint8_t vrrp_priority; + uint8_t vrrp_count_addr; + uint16_t vrrp_max_adver_int; + uint16_t vrrp_checksum; +} __packed; -#define CARP_DFLTTL 255 +CTASSERT(sizeof(struct vrrp_header) == 8); +#endif /* _KERNEL */ -/* carp_version */ -#define CARP_VERSION 2 +#define CARP_DFLTTL 255 /* carp_type */ #define CARP_ADVERTISEMENT 0x01 @@ -94,6 +134,8 @@ /* carp_advbase */ #define CARP_DFLTINTV 1 +#define VRRP_TYPE_ADVERTISEMENT 0x01 +#define VRRP_MAX_INTERVAL (0x1000 - 1) /* * Statistics. */ @@ -136,6 +178,11 @@ #define SIOCSVH _IOWR('i', 245, struct ifreq) #define SIOCGVH _IOWR('i', 246, struct ifreq) +typedef enum carp_version { + CARP_VERSION_CARP = 2, + CARP_VERSION_VRRPv3 = 3, +} carp_version_t; + #ifdef _KERNEL int carp_ioctl(struct ifreq *, u_long, struct thread *); int carp_attach(struct ifaddr *, int); diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -95,7 +96,8 @@ struct carp_softc { struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ struct ifaddr **sc_ifas; /* Our ifaddrs. */ - struct sockaddr_dl sc_addr; /* Our link level address. */ + carp_version_t sc_version; /* carp or VRRPv3 */ + uint8_t sc_addr[ETHER_ADDR_LEN]; /* Our link level address. */ struct callout sc_ad_tmo; /* Advertising timeout. */ #ifdef INET struct callout sc_md_tmo; /* Master down timeout. */ @@ -106,11 +108,25 @@ struct mtx sc_mtx; int sc_vhid; - int sc_advskew; - int sc_advbase; - struct in_addr sc_carpaddr; - struct in6_addr sc_carpaddr6; - + union { + struct { /* sc_version == CARP_VERSION_CARP */ + int sc_advskew; + int sc_advbase; + struct in_addr sc_carpaddr; + struct in6_addr sc_carpaddr6; + uint64_t sc_counter; + bool sc_init_counter; +#define CARP_HMAC_PAD 64 + unsigned char sc_key[CARP_KEY_LEN]; + unsigned char sc_pad[CARP_HMAC_PAD]; + SHA1_CTX sc_sha1; + }; + struct { /* sc_version == CARP_VERSION_VRRPv3 */ + uint8_t sc_vrrp_prio; + uint16_t sc_vrrp_adv_inter; + uint16_t sc_vrrp_master_inter; + }; + }; int sc_naddrs; int sc_naddrs6; int sc_ifasiz; @@ -121,15 +137,6 @@ int sc_sendad_success; #define CARP_SENDAD_MIN_SUCCESS 3 - int sc_init_counter; - uint64_t sc_counter; - - /* authentication */ -#define CARP_HMAC_PAD 64 - unsigned char sc_key[CARP_KEY_LEN]; - unsigned char sc_pad[CARP_HMAC_PAD]; - SHA1_CTX sc_sha1; - TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ }; @@ -166,6 +173,9 @@ /* Everything above this is identical to carpreq */ struct in_addr carpr_addr; struct in6_addr carpr_addr6; + carp_version_t carpr_version; + uint8_t carpr_vrrp_priority; + uint16_t carpr_vrrp_adv_inter; }; /* @@ -325,8 +335,9 @@ 0 : ((sc)->sc_advskew + V_carp_demotion))) static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); +static void vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t); static struct carp_softc - *carp_alloc(struct ifnet *); + *carp_alloc(struct ifnet *, carp_version_t, int); static void carp_destroy(struct carp_softc *); static struct carp_if *carp_alloc_if(struct ifnet *); @@ -337,8 +348,8 @@ static void carp_master_down(void *); static void carp_master_down_locked(struct carp_softc *, const char* reason); -static void carp_send_ad(void *); static void carp_send_ad_locked(struct carp_softc *); +static void vrrp_send_ad_locked(struct carp_softc *); static void carp_addroute(struct carp_softc *); static void carp_ifa_addroute(struct ifaddr *); static void carp_delroute(struct carp_softc *); @@ -373,7 +384,7 @@ static void carp_hmac_prepare(struct carp_softc *sc) { - uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; + uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT; uint8_t vhid = sc->sc_vhid & 0xff; struct ifaddr *ifa; int i, found; @@ -385,6 +396,7 @@ #endif CARP_LOCK_ASSERT(sc); + MPASS(sc->sc_version == CARP_VERSION_CARP); /* Compute ipad from key. */ bzero(sc->sc_pad, sizeof(sc->sc_pad)); @@ -478,6 +490,22 @@ return (bcmp(md, md2, sizeof(md2))); } +static int +vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum) +{ + uint16_t cksum; + + /* + * Note that VRRP checksums are different from CARP checksums. + * Carp just calculates the checksum over the packet. + * VRRP includes the pseudo-header checksum as well. + */ + cksum = in_cksum_skip(m, off + len, off); + cksum -= phdrcksum; + + return (cksum); +} + /* * process input packet. * we have rearranged checks order compared to the rfc, @@ -489,8 +517,10 @@ { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); - struct carp_header *ch; - int iplen, len; + struct vrrp_header *vh; + int iplen; + int minlen; + int totlen; iplen = *offp; *mp = NULL; @@ -503,59 +533,94 @@ } iplen = ip->ip_hl << 2; + totlen = ntohs(ip->ip_len); - if (m->m_pkthdr.len < iplen + sizeof(*ch)) { + if (m->m_pkthdr.len < iplen + sizeof(*vh)) { CARPSTATS_INC(carps_badlen); - CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " + CARP_DEBUG("%s: received len %zd < sizeof(struct vrrp_header) " "on %s\n", __func__, m->m_len - sizeof(struct ip), if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - if (iplen + sizeof(*ch) < m->m_len) { - if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { + if (iplen + sizeof(*vh) < m->m_len) { + if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) { CARPSTATS_INC(carps_hdrops); CARP_DEBUG("%s: pullup failed\n", __func__); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); } - ch = (struct carp_header *)((char *)ip + iplen); + vh = (struct vrrp_header *)((char *)ip + iplen); - /* - * verify that the received packet length is - * equal to the CARP header - */ - len = iplen + sizeof(*ch); - if (len > m->m_pkthdr.len) { + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: + minlen = sizeof(struct carp_header); + break; + case CARP_VERSION_VRRPv3: + minlen = sizeof(struct vrrp_header); + break; + default: + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, + vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + + /* And now check the length again but with the real minimal length. */ + if (m->m_pkthdr.len < iplen + minlen) { CARPSTATS_INC(carps_badlen); - CARP_DEBUG("%s: packet too short %d on %s\n", __func__, - m->m_pkthdr.len, + CARP_DEBUG("%s: received len %zd < %d " + "on %s\n", __func__, m->m_len - sizeof(struct ip), + iplen + minlen, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - if ((m = m_pullup(m, len)) == NULL) { - CARPSTATS_INC(carps_hdrops); - return (IPPROTO_DONE); + if (iplen + minlen < m->m_len) { + if ((m = m_pullup(m, iplen + minlen)) == NULL) { + CARPSTATS_INC(carps_hdrops); + CARP_DEBUG("%s: pullup failed\n", __func__); + return (IPPROTO_DONE); + } + ip = mtod(m, struct ip *); + vh = (struct vrrp_header *)((char *)ip + iplen); } - ip = mtod(m, struct ip *); - ch = (struct carp_header *)((char *)ip + iplen); - /* verify the CARP checksum */ - m->m_data += iplen; - if (in_cksum(m, len - iplen)) { - CARPSTATS_INC(carps_badsum); - CARP_DEBUG("%s: checksum failed on %s\n", __func__, - if_name(m->m_pkthdr.rcvif)); - m_freem(m); - return (IPPROTO_DONE); + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: { + struct carp_header *ch; + + /* verify the CARP checksum */ + m->m_data += iplen; + if (in_cksum(m, totlen - iplen)) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: checksum failed on %s\n", __func__, + if_name(m->m_pkthdr.rcvif)); + m_freem(m); + break; + } + m->m_data -= iplen; + ch = (struct carp_header *)((char *)ip + iplen); + carp_input_c(m, ch, AF_INET, ip->ip_ttl); + break; + } + case CARP_VERSION_VRRPv3: { + uint16_t phdrcksum; + + phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htonl((u_short)(totlen - iplen) + ip->ip_p)); + vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen, + phdrcksum); + break; + } + default: + KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); } - m->m_data -= iplen; - carp_input_c(m, ch, AF_INET, ip->ip_ttl); return (IPPROTO_DONE); } #endif @@ -566,8 +631,8 @@ { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - struct carp_header *ch; - u_int len; + struct vrrp_header *vh; + u_int len, minlen; CARPSTATS_INC(carps_ipackets6); @@ -585,10 +650,9 @@ return (IPPROTO_DONE); } - /* verify that we have a complete carp packet */ - if (m->m_len < *offp + sizeof(*ch)) { + if (m->m_len < *offp + sizeof(*vh)) { len = m->m_len; - m = m_pullup(m, *offp + sizeof(*ch)); + m = m_pullup(m, *offp + sizeof(*vh)); if (m == NULL) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: packet size %u too small\n", __func__, len); @@ -596,20 +660,74 @@ } ip6 = mtod(m, struct ip6_hdr *); } - ch = (struct carp_header *)(mtod(m, char *) + *offp); + vh = (struct vrrp_header *)(mtod(m, char *) + *offp); - /* verify the CARP checksum */ - m->m_data += *offp; - if (in_cksum(m, sizeof(*ch))) { - CARPSTATS_INC(carps_badsum); - CARP_DEBUG("%s: checksum failed, on %s\n", __func__, + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: + minlen = sizeof(struct carp_header); + break; + case CARP_VERSION_VRRPv3: + minlen = sizeof(struct vrrp_header); + break; + default: + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, + vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); + m_freem(m); + return (IPPROTO_DONE); + } + + /* And now check the length again but with the real minimal length. */ + if (m->m_pkthdr.len < sizeof(*ip6) + minlen) { + CARPSTATS_INC(carps_badlen); + CARP_DEBUG("%s: received len %zd < %zd " + "on %s\n", __func__, m->m_len - sizeof(struct ip), + sizeof(*ip6) + minlen, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } - m->m_data -= *offp; - carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); + if (sizeof (*ip6) + minlen < m->m_len) { + if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) { + CARPSTATS_INC(carps_hdrops); + CARP_DEBUG("%s: pullup failed\n", __func__); + return (IPPROTO_DONE); + } + ip6 = mtod(m, struct ip6_hdr *); + vh = (struct vrrp_header *)mtodo(m, sizeof(*ip6)); + } + + switch (vh->vrrp_version) { + case CARP_VERSION_CARP: { + struct carp_header *ch; + + /* verify the CARP checksum */ + m->m_data += *offp; + if (in_cksum(m, sizeof(struct carp_header))) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: checksum failed, on %s\n", __func__, + if_name(m->m_pkthdr.rcvif)); + m_freem(m); + break; + } + m->m_data -= *offp; + ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6)); + carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); + break; + } + case CARP_VERSION_VRRPv3: { + uint16_t phdrcksum; + + phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), + ip6->ip6_nxt, 0); + vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim, + ntohs(ip6->ip6_plen), phdrcksum); + break; + } + default: + KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); + } return (IPPROTO_DONE); } #endif /* INET6 */ @@ -629,7 +747,7 @@ * The VHID test is outside this mini-function. */ static int -carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) +carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af) { #ifdef INET struct ip *ip4; @@ -659,16 +777,12 @@ return (0); } -static void -carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) +static struct ifaddr * +carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa, *match; - struct carp_softc *sc; - uint64_t tmp_counter; - struct timeval sc_tv, ch_tv; int error; - bool multicast = false; NET_EPOCH_ASSERT(); @@ -688,9 +802,9 @@ IFNET_FOREACH_IFA(ifp, ifa) { if (match == NULL && ifa->ifa_carp != NULL && ifa->ifa_addr->sa_family == af && - ifa->ifa_carp->sc_vhid == ch->carp_vhid) + ifa->ifa_carp->sc_vhid == vhid) match = ifa; - if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) + if (vhid == 0 && carp_source_is_self(m, ifa, af)) error = ELOOP; } ifa = error ? NULL : match; @@ -705,12 +819,37 @@ } else { CARPSTATS_INC(carps_badvhid); } + } + + return (ifa); +} + +static void +carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ifaddr *ifa; + struct carp_softc *sc; + uint64_t tmp_counter; + struct timeval sc_tv, ch_tv; + bool multicast = false; + + NET_EPOCH_ASSERT(); + MPASS(ch->carp_version == CARP_VERSION_CARP); + + ifa = carp_find_ifa(m, af, ch->carp_vhid); + if (ifa == NULL) { m_freem(m); return; } + sc = ifa->ifa_carp; + CARP_LOCK(sc); + /* verify the CARP version. */ - if (ch->carp_version != CARP_VERSION) { + if (sc->sc_version != CARP_VERSION_CARP) { + CARP_UNLOCK(sc); + CARPSTATS_INC(carps_badver); CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), ch->carp_version); @@ -719,8 +858,6 @@ return; } - sc = ifa->ifa_carp; - CARP_LOCK(sc); if (ifa->ifa_addr->sa_family == AF_INET) { multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr); } else { @@ -749,7 +886,7 @@ /* XXX Replay protection goes here */ - sc->sc_init_counter = 0; + sc->sc_init_counter = false; sc->sc_counter = tmp_counter; sc_tv.tv_sec = sc->sc_advbase; @@ -809,11 +946,133 @@ m_freem(m); } +static void +vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl, + int len, uint16_t phdrcksum) +{ + struct vrrp_header *vh = mtodo(m, off); + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ifaddr *ifa; + struct carp_softc *sc; + + NET_EPOCH_ASSERT(); + MPASS(vh->vrrp_version == CARP_VERSION_VRRPv3); + + ifa = carp_find_ifa(m, af, vh->vrrp_vrtid); + if (ifa == NULL) { + m_freem(m); + return; + } + + sc = ifa->ifa_carp; + CARP_LOCK(sc); + + ifa_free(ifa); + + /* verify the CARP version. */ + if (sc->sc_version != CARP_VERSION_VRRPv3) { + CARP_UNLOCK(sc); + + CARPSTATS_INC(carps_badver); + CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), + vh->vrrp_version); + m_freem(m); + return; + } + + /* verify that the IP TTL is 255. */ + if (ttl != CARP_DFLTTL) { + CARPSTATS_INC(carps_badttl); + CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, + ttl, if_name(m->m_pkthdr.rcvif)); + goto out; + } + + if (vrrp_checksum_verify(m, off, len, phdrcksum)) { + CARPSTATS_INC(carps_badsum); + CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__, + sc->sc_vhid, if_name(ifp)); + goto out; + } + + /* RFC5798, 7.1 Receiving VRRP packets. */ + if (sc->sc_vrrp_prio == 255) { + CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n", + __func__); + goto out; + } + + /* XXX TODO Check IP address payload. */ + + sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int); + + switch (sc->sc_state) { + case INIT: + break; + case MASTER: + /* + * If we receive an advertisement from a master who's going to + * be more frequent than us, go into BACKUP state. + * Same if the peer has a higher priority than us. + */ + if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter || + vh->vrrp_priority > sc->sc_vrrp_prio) { + callout_stop(&sc->sc_ad_tmo); + carp_set_state(sc, BACKUP, + "more frequent advertisement received"); + carp_setrun(sc, 0); + carp_delroute(sc); + } + break; + case BACKUP: + /* + * If we're pre-empting masters who advertise slower than us, + * and this one claims to be slower, treat him as down. + */ + if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter + || vh->vrrp_priority < sc->sc_vrrp_prio)) { + carp_master_down_locked(sc, + "preempting a slower master"); + break; + } + + /* + * Otherwise, we reset the counter and wait for the next + * advertisement. + */ + carp_setrun(sc, af); + break; + } + +out: + CARP_UNLOCK(sc); + m_freem(m); +} + static int -carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) +carp_tag(struct carp_softc *sc, struct mbuf *m) { struct m_tag *mtag; + /* Tag packet for carp_output */ + if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(sc->sc_vhid), + M_NOWAIT)) == NULL) { + m_freem(m); + CARPSTATS_INC(carps_onomem); + return (ENOMEM); + } + bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid)); + m_tag_prepend(m, mtag); + + return (0); +} + +static void +carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) +{ + + MPASS(sc->sc_version == CARP_VERSION_CARP); + if (sc->sc_init_counter) { /* this could also be seconds since unix epoch */ sc->sc_counter = arc4random(); @@ -826,18 +1085,19 @@ ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); +} - /* Tag packet for carp_output */ - if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), - M_NOWAIT)) == NULL) { - m_freem(m); - CARPSTATS_INC(carps_onomem); - return (ENOMEM); +static inline void +send_ad_locked(struct carp_softc *sc) +{ + switch (sc->sc_version) { + case CARP_VERSION_CARP: + carp_send_ad_locked(sc); + break; + case CARP_VERSION_VRRPv3: + vrrp_send_ad_locked(sc); + break; } - bcopy(&sc, mtag + 1, sizeof(sc)); - m_tag_prepend(m, mtag); - - return (0); } /* @@ -856,7 +1116,7 @@ if (sc->sc_state == MASTER) { CARP_LOCK(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); - carp_send_ad_locked(sc); + send_ad_locked(sc); CURVNET_RESTORE(); CARP_UNLOCK(sc); } @@ -866,7 +1126,7 @@ /* Send a periodic advertisement, executed in callout context. */ static void -carp_send_ad(void *v) +carp_callout(void *v) { struct carp_softc *sc = v; struct epoch_tracker et; @@ -874,7 +1134,7 @@ NET_EPOCH_ENTER(et); CARP_LOCK_ASSERT(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); - carp_send_ad_locked(sc); + send_ad_locked(sc); CURVNET_RESTORE(); CARP_UNLOCK(sc); NET_EPOCH_EXIT(et); @@ -958,12 +1218,13 @@ NET_EPOCH_ASSERT(); CARP_LOCK_ASSERT(sc); + MPASS(sc->sc_version == CARP_VERSION_CARP); advskew = DEMOTE_ADVSKEW(sc); tv.tv_sec = sc->sc_advbase; tv.tv_usec = advskew * 1000000 / 256; - ch.carp_version = CARP_VERSION; + ch.carp_version = CARP_VERSION_CARP; ch.carp_type = CARP_ADVERTISEMENT; ch.carp_vhid = sc->sc_vhid; ch.carp_advbase = sc->sc_advbase; @@ -1012,7 +1273,9 @@ ch_ptr = (struct carp_header *)(&ip[1]); bcopy(&ch, ch_ptr, sizeof(ch)); - if (carp_prepare_ad(m, sc, ch_ptr)) + carp_prepare_ad(m, sc, ch_ptr); + if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)) && + carp_tag(sc, m) != 0) goto resched; m->m_data += sizeof(*ip); @@ -1072,7 +1335,9 @@ ch_ptr = (struct carp_header *)(&ip6[1]); bcopy(&ch, ch_ptr, sizeof(ch)); - if (carp_prepare_ad(m, sc, ch_ptr)) + carp_prepare_ad(m, sc, ch_ptr); + if (IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6) && + carp_tag(sc, m) != 0) goto resched; m->m_data += sizeof(*ip6); @@ -1087,7 +1352,188 @@ #endif /* INET6 */ resched: - callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_callout, sc); +} + +static void +vrrp_send_ad_locked(struct carp_softc *sc) +{ + struct vrrp_header *vh_ptr; + struct ifaddr *ifa; + struct mbuf *m; + int len; + struct vrrp_header vh = { + .vrrp_version = CARP_VERSION_VRRPv3, + .vrrp_type = VRRP_TYPE_ADVERTISEMENT, + .vrrp_vrtid = sc->sc_vhid, + .vrrp_priority = sc->sc_vrrp_prio, + .vrrp_count_addr = 0, + .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter), + .vrrp_checksum = 0, + }; + + NET_EPOCH_ASSERT(); + CARP_LOCK_ASSERT(sc); + MPASS(sc->sc_version == CARP_VERSION_VRRPv3); + +#ifdef INET + if (sc->sc_naddrs) { + struct ip *ip; + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + CARPSTATS_INC(carps_onomem); + goto resched; + } + len = sizeof(*ip) + sizeof(vh); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + M_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; + ip->ip_off = htons(IP_DF); + ip->ip_ttl = CARP_DFLTTL; + ip->ip_p = IPPROTO_CARP; + ip->ip_sum = 0; + ip_fillid(ip); + + ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); + if (ifa != NULL) { + ip->ip_src.s_addr = + ifatoia(ifa)->ia_addr.sin_addr.s_addr; + ifa_free(ifa); + } else + ip->ip_src.s_addr = 0; + ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); + + /* Include the IP addresses in the announcement. */ + for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { + struct sockaddr_in *in; + + MPASS(sc->sc_ifas[i] != NULL); + if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET) + continue; + + in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr; + + if (m_append(m, sizeof(in->sin_addr), + (caddr_t)&in->sin_addr) != 1) { + m_freem(m); + goto resched; + } + + vh.vrrp_count_addr++; + len += sizeof(in->sin_addr); + } + ip->ip_len = htons(len); + + vh_ptr = (struct vrrp_header *)mtodo(m, sizeof(*ip)); + bcopy(&vh, vh_ptr, sizeof(vh)); + + vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, + htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p)); + vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip)); + + if (carp_tag(sc, m)) + goto resched; + + CARPSTATS_INC(carps_opackets); + + carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, + &sc->sc_carpdev->if_carp->cif_imo, NULL)); + } +#endif +#ifdef INET6 + if (sc->sc_naddrs6) { + struct ip6_hdr *ip6; + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + CARPSTATS_INC(carps_onomem); + goto resched; + } + len = sizeof(*ip6) + sizeof(vh); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + M_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip6 = mtod(m, struct ip6_hdr *); + bzero(ip6, sizeof(*ip6)); + ip6->ip6_vfc |= IPV6_VERSION; + /* Traffic class isn't defined in ip6 struct instead + * it gets offset into flowid field */ + ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + + IPTOS_DSCP_OFFSET)); + ip6->ip6_hlim = CARP_DFLTTL; + ip6->ip6_nxt = IPPROTO_CARP; + + /* set the source address */ + ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); + if (ifa != NULL) { + bcopy(IFA_IN6(ifa), &ip6->ip6_src, + sizeof(struct in6_addr)); + ifa_free(ifa); + } else + /* This should never happen with IPv6. */ + bzero(&ip6->ip6_src, sizeof(struct in6_addr)); + + /* Set the multicast destination. */ + bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst)); + ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; + ip6->ip6_dst.s6_addr8[15] = 0x12; + + /* Include the IP addresses in the announcement. */ + len = sizeof(vh); + for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { + struct sockaddr_in6 *in6; + + MPASS(sc->sc_ifas[i] != NULL); + if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6) + continue; + + in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr; + + if (m_append(m, sizeof(in6->sin6_addr), + (char *)&in6->sin6_addr) != 1) { + m_freem(m); + goto resched; + } + + vh.vrrp_count_addr++; + len += sizeof(in6->sin6_addr); + } + ip6->ip6_plen = htonl(len); + + vh_ptr = (struct vrrp_header *)mtodo(m, sizeof(*ip6)); + bcopy(&vh, vh_ptr, sizeof(vh)); + + vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0); + vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6)); + + if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { + m_freem(m); + CARP_DEBUG("%s: in6_setscope failed\n", __func__); + goto resched; + } + + if (carp_tag(sc, m)) + goto resched; + CARPSTATS_INC(carps_opackets6); + + carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, + &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); + } +#endif + +resched: + callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100, + carp_callout, sc); } static void @@ -1178,7 +1624,7 @@ if (ifa->ifa_addr->sa_family != AF_INET) continue; addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; - arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr)); + arp_announce_ifaddr(sc->sc_carpdev, addr, sc->sc_addr); } } @@ -1188,7 +1634,7 @@ struct carp_softc *sc = ifa->ifa_carp; if (sc->sc_state == MASTER) { - *enaddr = LLADDR(&sc->sc_addr); + *enaddr = sc->sc_addr; return (1); } @@ -1259,12 +1705,12 @@ sizeof(struct carp_softc *), M_NOWAIT); if (mtag == NULL) /* Better a bit than nothing. */ - return (LLADDR(&sc->sc_addr)); + return (sc->sc_addr); bcopy(&sc, mtag + 1, sizeof(sc)); m_tag_prepend(m, mtag); - return (LLADDR(&sc->sc_addr)); + return (sc->sc_addr); } return (NULL); @@ -1286,7 +1732,7 @@ * CARP_LOCK() is not here, since would protect nothing, but * cause deadlock with if_bridge, calling this under its lock. */ - if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), + if (sc->sc_state == MASTER && !bcmp(dhost, sc->sc_addr, ETHER_ADDR_LEN)) { CIF_UNLOCK(ifp->if_carp); return (1); @@ -1327,7 +1773,7 @@ switch (sc->sc_state) { case BACKUP: carp_set_state(sc, MASTER, reason); - carp_send_ad_locked(sc); + send_ad_locked(sc); #ifdef INET carp_send_arp(sc); #endif @@ -1357,6 +1803,7 @@ carp_setrun(struct carp_softc *sc, sa_family_t af) { struct timeval tv; + int timeout; CARP_LOCK_ASSERT(sc); @@ -1373,40 +1820,63 @@ break; case BACKUP: callout_stop(&sc->sc_ad_tmo); - tv.tv_sec = 3 * sc->sc_advbase; - tv.tv_usec = sc->sc_advskew * 1000000 / 256; + + switch (sc->sc_version) { + case CARP_VERSION_CARP: + tv.tv_sec = 3 * sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + timeout = tvtohz(&tv); + break; + case CARP_VERSION_VRRPv3: + /* skew time */ + timeout = (256 - sc->sc_vrrp_prio) * + sc->sc_vrrp_master_inter / 256; + timeout += (3 * sc->sc_vrrp_master_inter); + timeout *= hz; + timeout /= 100; /* master interval is in centiseconds */ + break; + } switch (af) { #ifdef INET case AF_INET: - callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md_tmo, timeout, carp_master_down, sc); break; #endif #ifdef INET6 case AF_INET6: - callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md6_tmo, timeout, carp_master_down, sc); break; #endif default: #ifdef INET if (sc->sc_naddrs) - callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md_tmo, timeout, carp_master_down, sc); #endif #ifdef INET6 if (sc->sc_naddrs6) - callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + callout_reset(&sc->sc_md6_tmo, timeout, carp_master_down, sc); #endif break; } break; case MASTER: - tv.tv_sec = sc->sc_advbase; - tv.tv_usec = sc->sc_advskew * 1000000 / 256; - callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), - carp_send_ad, sc); + switch (sc->sc_version) { + case CARP_VERSION_CARP: + tv.tv_sec = sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_callout, sc); + break; + case CARP_VERSION_VRRPv3: + callout_reset(&sc->sc_ad_tmo, + sc->sc_vrrp_adv_inter * hz / 100, + carp_callout, sc); + break; + } break; } } @@ -1559,7 +2029,7 @@ carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) { struct m_tag *mtag; - struct carp_softc *sc; + int vhid; if (!sa) return (0); @@ -1581,20 +2051,7 @@ if (mtag == NULL) return (0); - bcopy(mtag + 1, &sc, sizeof(sc)); - - switch (sa->sa_family) { - case AF_INET: - if (! IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr))) - return (0); - break; - case AF_INET6: - if (! IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6)) - return (0); - break; - default: - panic("Unknown af"); - } + bcopy(mtag + 1, &vhid, sizeof(vhid)); /* Set the source MAC address to the Virtual Router MAC Address. */ switch (ifp->if_type) { @@ -1609,7 +2066,7 @@ eh->ether_shost[2] = 0x5e; eh->ether_shost[3] = 0; eh->ether_shost[4] = 1; - eh->ether_shost[5] = sc->sc_vhid; + eh->ether_shost[5] = vhid; } break; default: @@ -1622,7 +2079,7 @@ } static struct carp_softc* -carp_alloc(struct ifnet *ifp) +carp_alloc(struct ifnet *ifp, carp_version_t version, int vhid) { struct carp_softc *sc; struct carp_if *cif; @@ -1632,20 +2089,31 @@ if ((cif = ifp->if_carp) == NULL) cif = carp_alloc_if(ifp); - sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); - - sc->sc_advbase = CARP_DFLTINTV; - sc->sc_vhid = -1; /* required setting */ - sc->sc_init_counter = 1; - sc->sc_state = INIT; - - sc->sc_ifasiz = sizeof(struct ifaddr *); + sc = malloc(sizeof(*sc), M_CARP, M_WAITOK); + *sc = (struct carp_softc ){ + .sc_vhid = vhid, + .sc_version = version, + .sc_state = INIT, + .sc_carpdev = ifp, + .sc_ifasiz = sizeof(struct ifaddr *), + .sc_addr = { 0, 0, 0x5e, 0, 1, vhid }, + }; sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); - sc->sc_carpdev = ifp; - sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP); - sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; - sc->sc_carpaddr6.s6_addr8[15] = 0x12; + switch (version) { + case CARP_VERSION_CARP: + sc->sc_advbase = CARP_DFLTINTV; + sc->sc_init_counter = true; + sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP); + sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; + sc->sc_carpaddr6.s6_addr8[15] = 0x12; + break; + case CARP_VERSION_VRRPv3: + sc->sc_vrrp_adv_inter = 100; + sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter; + sc->sc_vrrp_prio = 100; + break; + } CARP_LOCK_INIT(sc); #ifdef INET @@ -1770,12 +2238,19 @@ CARP_LOCK(sc); carpr->carpr_state = sc->sc_state; carpr->carpr_vhid = sc->sc_vhid; - carpr->carpr_advbase = sc->sc_advbase; - carpr->carpr_advskew = sc->sc_advskew; - if (priv) - bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); - else - bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); + switch (sc->sc_version) { + case CARP_VERSION_CARP: + carpr->carpr_advbase = sc->sc_advbase; + carpr->carpr_advskew = sc->sc_advskew; + if (priv) + bcopy(sc->sc_key, carpr->carpr_key, + sizeof(carpr->carpr_key)); + else + bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); + break; + case CARP_VERSION_VRRPv3: + break; + } CARP_UNLOCK(sc); return (true); @@ -1788,9 +2263,21 @@ struct carp_softc *sc = NULL; int error = 0; + if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID) + return (EINVAL); - if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID || - carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) { + switch (carpr->carpr_version) { + case CARP_VERSION_CARP: + if (carpr->carpr_advbase != 0 && (carpr->carpr_advbase > 255 || + carpr->carpr_advbase < CARP_DFLTINTV)) + return (EINVAL); + if (carpr->carpr_advskew < 0 || carpr->carpr_advskew >= 255) + return (EINVAL); + break; + case CARP_VERSION_VRRPv3: + /* XXXGL: shouldn't we check anything? */ + break; + default: return (EINVAL); } @@ -1799,41 +2286,37 @@ if (sc->sc_vhid == carpr->carpr_vhid) break; } - if (sc == NULL) { - sc = carp_alloc(ifp); - CARP_LOCK(sc); - sc->sc_vhid = carpr->carpr_vhid; - LLADDR(&sc->sc_addr)[0] = 0; - LLADDR(&sc->sc_addr)[1] = 0; - LLADDR(&sc->sc_addr)[2] = 0x5e; - LLADDR(&sc->sc_addr)[3] = 0; - LLADDR(&sc->sc_addr)[4] = 1; - LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; - } else - CARP_LOCK(sc); - if (carpr->carpr_advbase > 0) { - if (carpr->carpr_advbase > 255 || - carpr->carpr_advbase < CARP_DFLTINTV) { - error = EINVAL; - goto out; + + if (sc == NULL) + sc = carp_alloc(ifp, carpr->carpr_version, carpr->carpr_vhid); + else if (sc->sc_version != carpr->carpr_version) + return (EINVAL); + + CARP_LOCK(sc); + switch (sc->sc_version) { + case CARP_VERSION_CARP: + if (carpr->carpr_advbase != 0) + sc->sc_advbase = carpr->carpr_advbase; + sc->sc_advskew = carpr->carpr_advskew; + if (carpr->carpr_addr.s_addr != INADDR_ANY) + sc->sc_carpaddr = carpr->carpr_addr; + if (!IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) { + memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6, + sizeof(sc->sc_carpaddr6)); } - sc->sc_advbase = carpr->carpr_advbase; - } - if (carpr->carpr_advskew >= 255) { - error = EINVAL; - goto out; - } - sc->sc_advskew = carpr->carpr_advskew; - if (carpr->carpr_addr.s_addr != INADDR_ANY) - sc->sc_carpaddr = carpr->carpr_addr; - if (! IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) { - memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6, - sizeof(sc->sc_carpaddr6)); - } - if (carpr->carpr_key[0] != '\0') { - bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); - carp_hmac_prepare(sc); + if (carpr->carpr_key[0] != '\0') { + bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); + carp_hmac_prepare(sc); + } + break; + case CARP_VERSION_VRRPv3: + if (carpr->carpr_vrrp_priority != 0) + sc->sc_vrrp_prio = carpr->carpr_vrrp_priority; + if (carpr->carpr_vrrp_adv_inter) + sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter; + break; } + if (sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) { switch (carpr->carpr_state) { @@ -1854,8 +2337,6 @@ break; } } - -out: CARP_UNLOCK(sc); return (error); @@ -1910,7 +2391,9 @@ carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) { struct carpreq carpr; - struct carpkreq carprk = { }; + struct carpkreq carprk = { + .carpr_version = CARP_VERSION_CARP, + }; struct ifnet *ifp; int error = 0; @@ -2034,7 +2517,8 @@ CARP_LOCK(sc); sc->sc_ifas[index - 1] = ifa; ifa->ifa_carp = sc; - carp_hmac_prepare(sc); + if (sc->sc_version == CARP_VERSION_CARP) + carp_hmac_prepare(sc); carp_sc_state(sc); CARP_UNLOCK(sc); @@ -2087,7 +2571,8 @@ ifa->ifa_carp = NULL; ifa_free(ifa); - carp_hmac_prepare(sc); + if (sc->sc_version == CARP_VERSION_CARP) + carp_hmac_prepare(sc); carp_sc_state(sc); if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) @@ -2279,13 +2764,23 @@ nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid); nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state); - nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase); - nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); - nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); - nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); - - if (priv) - nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key); + nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version); + switch (sc->sc_version) { + case CARP_VERSION_CARP: + nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase); + nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); + nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); + nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); + if (priv) + nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), + sc->sc_key); + break; + case CARP_VERSION_VRRPv3: + nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio); + nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER, + sc->sc_vrrp_adv_inter); + break; + } CARP_UNLOCK(sc); @@ -2307,6 +2802,9 @@ char key[CARP_KEY_LEN]; struct in_addr addr; struct in6_addr addr6; + carp_version_t version; + uint8_t vrrp_prio; + uint16_t vrrp_adv_inter; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) @@ -2322,6 +2820,9 @@ { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string }, + { .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 }, + { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 }, + { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 }, }; static const struct nlfield_parser nlf_p_set[] = { }; @@ -2393,12 +2894,24 @@ return (EINVAL); if (attrs.state > CARP_MAXSTATE) return (EINVAL); - if (attrs.advbase < 0 || attrs.advskew < 0) - return (EINVAL); - if (attrs.advbase > 255) - return (EINVAL); - if (attrs.advskew >= 255) + if (attrs.version == 0) /* compat with pre-VRRPv3 */ + attrs.version = CARP_VERSION_CARP; + switch (attrs.version) { + case CARP_VERSION_CARP: + if (attrs.advbase < 0 || attrs.advskew < 0) + return (EINVAL); + if (attrs.advbase > 255) + return (EINVAL); + if (attrs.advskew >= 255) + return (EINVAL); + break; + case CARP_VERSION_VRRPv3: + if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL) + return (EINVAL); + break; + default: return (EINVAL); + } NET_EPOCH_ENTER(et); if (attrs.ifname != NULL) @@ -2418,12 +2931,20 @@ carpr.carpr_count = 1; carpr.carpr_vhid = attrs.vhid; carpr.carpr_state = attrs.state; - carpr.carpr_advbase = attrs.advbase; - carpr.carpr_advskew = attrs.advskew; - carpr.carpr_addr = attrs.addr; - carpr.carpr_addr6 = attrs.addr6; - - memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); + carpr.carpr_version = attrs.version; + switch (attrs.version) { + case CARP_VERSION_CARP: + carpr.carpr_advbase = attrs.advbase; + carpr.carpr_advskew = attrs.advskew; + carpr.carpr_addr = attrs.addr; + carpr.carpr_addr6 = attrs.addr6; + memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); + break; + case CARP_VERSION_VRRPv3: + carpr.carpr_vrrp_priority = attrs.vrrp_prio; + carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter; + break; + } sx_xlock(&carp_sx); error = carp_ioctl_set(ifp, &carpr); diff --git a/sys/netinet/ip_carp_nl.h b/sys/netinet/ip_carp_nl.h --- a/sys/netinet/ip_carp_nl.h +++ b/sys/netinet/ip_carp_nl.h @@ -32,6 +32,9 @@ CARP_NL_ADDR = 7, /* in_addr_t */ CARP_NL_ADDR6 = 8, /* in6_addr_t */ CARP_NL_IFNAME = 9, /* string */ + CARP_NL_VERSION = 10, /* u8 */ + CARP_NL_VRRP_PRIORITY = 11, /* u8 */ + CARP_NL_VRRP_ADV_INTER = 12, /* u16, 12-bit field in centiseconds*/ }; #endif