Index: head/sbin/ifconfig/iflagg.c =================================================================== --- head/sbin/ifconfig/iflagg.c (revision 272445) +++ head/sbin/ifconfig/iflagg.c (revision 272446) @@ -1,316 +1,321 @@ /*- */ #ifndef lint static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifconfig.h" char lacpbuf[120]; /* LACP peer '[(a,a,a),(p,p,p)]' */ static void setlaggport(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_reqport rp; bzero(&rp, sizeof(rp)); strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname)); strlcpy(rp.rp_portname, val, sizeof(rp.rp_portname)); /* Don't choke if the port is already in this lagg. */ if (ioctl(s, SIOCSLAGGPORT, &rp) && errno != EEXIST) err(1, "SIOCSLAGGPORT"); } static void unsetlaggport(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_reqport rp; bzero(&rp, sizeof(rp)); strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname)); strlcpy(rp.rp_portname, val, sizeof(rp.rp_portname)); if (ioctl(s, SIOCSLAGGDELPORT, &rp)) err(1, "SIOCSLAGGDELPORT"); } static void setlaggproto(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_protos lpr[] = LAGG_PROTOS; struct lagg_reqall ra; int i; bzero(&ra, sizeof(ra)); ra.ra_proto = LAGG_PROTO_MAX; for (i = 0; i < nitems(lpr); i++) { if (strcmp(val, lpr[i].lpr_name) == 0) { ra.ra_proto = lpr[i].lpr_proto; break; } } if (ra.ra_proto == LAGG_PROTO_MAX) errx(1, "Invalid aggregation protocol: %s", val); strlcpy(ra.ra_ifname, name, sizeof(ra.ra_ifname)); if (ioctl(s, SIOCSLAGG, &ra) != 0) err(1, "SIOCSLAGG"); } static void setlaggflowidshift(const char *val, int d, int s, const struct afswtch *afp) { - struct lagg_reqall ra; + struct lagg_reqopts ro; - bzero(&ra, sizeof(ra)); - ra.ra_opts = LAGG_OPT_FLOWIDSHIFT; - strlcpy(ra.ra_ifname, name, sizeof(ra.ra_ifname)); - ra.ra_flowid_shift = (int)strtol(val, NULL, 10); - if (ra.ra_flowid_shift & ~LAGG_OPT_FLOWIDSHIFT_MASK) + bzero(&ro, sizeof(ro)); + ro.ro_opts = LAGG_OPT_FLOWIDSHIFT; + strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); + ro.ro_flowid_shift = (int)strtol(val, NULL, 10); + if (ro.ro_flowid_shift & ~LAGG_OPT_FLOWIDSHIFT_MASK) errx(1, "Invalid flowid_shift option: %s", val); - if (ioctl(s, SIOCSLAGG, &ra) != 0) - err(1, "SIOCSLAGG"); + if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0) + err(1, "SIOCSLAGGOPTS"); } static void setlaggsetopt(const char *val, int d, int s, const struct afswtch *afp) { - struct lagg_reqall ra; + struct lagg_reqopts ro; - bzero(&ra, sizeof(ra)); - ra.ra_opts = d; - switch (ra.ra_opts) { + bzero(&ro, sizeof(ro)); + ro.ro_opts = d; + switch (ro.ro_opts) { case LAGG_OPT_USE_FLOWID: case -LAGG_OPT_USE_FLOWID: case LAGG_OPT_LACP_STRICT: case -LAGG_OPT_LACP_STRICT: case LAGG_OPT_LACP_TXTEST: case -LAGG_OPT_LACP_TXTEST: case LAGG_OPT_LACP_RXTEST: case -LAGG_OPT_LACP_RXTEST: break; default: err(1, "Invalid lagg option"); } - strlcpy(ra.ra_ifname, name, sizeof(ra.ra_ifname)); + strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); - if (ioctl(s, SIOCSLAGG, &ra) != 0) - err(1, "SIOCSLAGG"); + if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0) + err(1, "SIOCSLAGGOPTS"); } static void setlagghash(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_reqflags rf; char *str, *tmp, *tok; rf.rf_flags = 0; str = tmp = strdup(val); while ((tok = strsep(&tmp, ",")) != NULL) { if (strcmp(tok, "l2") == 0) rf.rf_flags |= LAGG_F_HASHL2; else if (strcmp(tok, "l3") == 0) rf.rf_flags |= LAGG_F_HASHL3; else if (strcmp(tok, "l4") == 0) rf.rf_flags |= LAGG_F_HASHL4; else errx(1, "Invalid lagghash option: %s", tok); } free(str); if (rf.rf_flags == 0) errx(1, "No lagghash options supplied"); strlcpy(rf.rf_ifname, name, sizeof(rf.rf_ifname)); if (ioctl(s, SIOCSLAGGHASH, &rf)) err(1, "SIOCSLAGGHASH"); } static char * lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen) { snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X", (int)mac[0], (int)mac[1], (int)mac[2], (int)mac[3], (int)mac[4], (int)mac[5]); return (buf); } static char * lacp_format_peer(struct lacp_opreq *req, const char *sep) { char macbuf1[20]; char macbuf2[20]; snprintf(lacpbuf, sizeof(lacpbuf), "[(%04X,%s,%04X,%04X,%04X),%s(%04X,%s,%04X,%04X,%04X)]", req->actor_prio, lacp_format_mac(req->actor_mac, macbuf1, sizeof(macbuf1)), req->actor_key, req->actor_portprio, req->actor_portno, sep, req->partner_prio, lacp_format_mac(req->partner_mac, macbuf2, sizeof(macbuf2)), req->partner_key, req->partner_portprio, req->partner_portno); return(lacpbuf); } static void lagg_status(int s) { struct lagg_protos lpr[] = LAGG_PROTOS; struct lagg_reqport rp, rpbuf[LAGG_MAX_PORTS]; struct lagg_reqall ra; + struct lagg_reqopts ro; struct lagg_reqflags rf; struct lacp_opreq *lp; const char *proto = ""; int i, isport = 0; bzero(&rp, sizeof(rp)); bzero(&ra, sizeof(ra)); + bzero(&ro, sizeof(ro)); strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname)); strlcpy(rp.rp_portname, name, sizeof(rp.rp_portname)); if (ioctl(s, SIOCGLAGGPORT, &rp) == 0) isport = 1; strlcpy(ra.ra_ifname, name, sizeof(ra.ra_ifname)); ra.ra_size = sizeof(rpbuf); ra.ra_port = rpbuf; + strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); + ioctl(s, SIOCGLAGGOPTS, &ro); + strlcpy(rf.rf_ifname, name, sizeof(rf.rf_ifname)); if (ioctl(s, SIOCGLAGGFLAGS, &rf) != 0) rf.rf_flags = 0; if (ioctl(s, SIOCGLAGG, &ra) == 0) { lp = (struct lacp_opreq *)&ra.ra_lacpreq; for (i = 0; i < nitems(lpr); i++) { if (ra.ra_proto == lpr[i].lpr_proto) { proto = lpr[i].lpr_name; break; } } printf("\tlaggproto %s", proto); if (rf.rf_flags & LAGG_F_HASHMASK) { const char *sep = ""; printf(" lagghash "); if (rf.rf_flags & LAGG_F_HASHL2) { printf("%sl2", sep); sep = ","; } if (rf.rf_flags & LAGG_F_HASHL3) { printf("%sl3", sep); sep = ","; } if (rf.rf_flags & LAGG_F_HASHL4) { printf("%sl4", sep); sep = ","; } } if (isport) printf(" laggdev %s", rp.rp_ifname); putchar('\n'); if (verbose) { printf("\tlagg options:\n"); printf("\t\tuse_flowid: %d\n", - (ra.ra_opts & LAGG_OPT_USE_FLOWID) ? 1 : 0); - printf("\t\tflowid_shift: %d\n", ra.ra_flowid_shift); + (ro.ro_opts & LAGG_OPT_USE_FLOWID) ? 1 : 0); + printf("\t\tflowid_shift: %d\n", ro.ro_flowid_shift); switch (ra.ra_proto) { case LAGG_PROTO_LACP: printf("\t\tlacp_strict: %d\n", - (ra.ra_opts & LAGG_OPT_LACP_STRICT) ? 1 : 0); + (ro.ro_opts & LAGG_OPT_LACP_STRICT) ? 1 : 0); printf("\t\tlacp_rxtest: %d\n", - (ra.ra_opts & LAGG_OPT_LACP_RXTEST) ? 1 : 0); + (ro.ro_opts & LAGG_OPT_LACP_RXTEST) ? 1 : 0); printf("\t\tlacp_txtest: %d\n", - (ra.ra_opts & LAGG_OPT_LACP_TXTEST) ? 1 : 0); + (ro.ro_opts & LAGG_OPT_LACP_TXTEST) ? 1 : 0); } printf("\tlagg statistics:\n"); - printf("\t\tactive ports: %d\n", ra.ra_active); - printf("\t\tflapping: %u\n", ra.ra_flapping); + printf("\t\tactive ports: %d\n", ro.ro_active); + printf("\t\tflapping: %u\n", ro.ro_flapping); if (ra.ra_proto == LAGG_PROTO_LACP) { printf("\tlag id: %s\n", lacp_format_peer(lp, "\n\t\t ")); } } for (i = 0; i < ra.ra_ports; i++) { lp = (struct lacp_opreq *)&rpbuf[i].rp_lacpreq; printf("\tlaggport: %s ", rpbuf[i].rp_portname); printb("flags", rpbuf[i].rp_flags, LAGG_PORT_BITS); if (verbose && ra.ra_proto == LAGG_PROTO_LACP) printf(" state=%X", lp->actor_state); putchar('\n'); if (verbose && ra.ra_proto == LAGG_PROTO_LACP) printf("\t\t%s\n", lacp_format_peer(lp, "\n\t\t ")); } if (0 /* XXX */) { printf("\tsupported aggregation protocols:\n"); for (i = 0; i < (sizeof(lpr) / sizeof(lpr[0])); i++) printf("\t\tlaggproto %s\n", lpr[i].lpr_name); } } } static struct cmd lagg_cmds[] = { DEF_CMD_ARG("laggport", setlaggport), DEF_CMD_ARG("-laggport", unsetlaggport), DEF_CMD_ARG("laggproto", setlaggproto), DEF_CMD_ARG("lagghash", setlagghash), DEF_CMD("use_flowid", LAGG_OPT_USE_FLOWID, setlaggsetopt), DEF_CMD("-use_flowid", -LAGG_OPT_USE_FLOWID, setlaggsetopt), DEF_CMD("lacp_strict", LAGG_OPT_LACP_STRICT, setlaggsetopt), DEF_CMD("-lacp_strict", -LAGG_OPT_LACP_STRICT, setlaggsetopt), DEF_CMD("lacp_txtest", LAGG_OPT_LACP_TXTEST, setlaggsetopt), DEF_CMD("-lacp_txtest", -LAGG_OPT_LACP_TXTEST, setlaggsetopt), DEF_CMD("lacp_rxtest", LAGG_OPT_LACP_RXTEST, setlaggsetopt), DEF_CMD("-lacp_rxtest", -LAGG_OPT_LACP_RXTEST, setlaggsetopt), DEF_CMD_ARG("flowid_shift", setlaggflowidshift), }; static struct afswtch af_lagg = { .af_name = "af_lagg", .af_af = AF_UNSPEC, .af_other_status = lagg_status, }; static __constructor void lagg_ctor(void) { #define N(a) (sizeof(a) / sizeof(a[0])) int i; for (i = 0; i < N(lagg_cmds); i++) cmd_register(&lagg_cmds[i]); af_register(&af_lagg); #undef N } Index: head/sys/net/if_lagg.c =================================================================== --- head/sys/net/if_lagg.c (revision 272445) +++ head/sys/net/if_lagg.c (revision 272446) @@ -1,2257 +1,2255 @@ /* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */ /* * Copyright (c) 2005, 2006 Reyk Floeter * Copyright (c) 2007 Andrew Thompson * Copyright (c) 2014 Marcelo Araujo * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #endif #ifdef INET #include #include #endif #ifdef INET6 #include #include #include #endif #include #include #include /* Special flags we should propagate to the lagg ports. */ static struct { int flag; int (*func)(struct ifnet *, int); } lagg_pflags[] = { {IFF_PROMISC, ifpromisc}, {IFF_ALLMULTI, if_allmulti}, {0, NULL} }; VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */ #define V_lagg_list VNET(lagg_list) static VNET_DEFINE(struct mtx, lagg_list_mtx); #define V_lagg_list_mtx VNET(lagg_list_mtx) #define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \ "if_lagg list", NULL, MTX_DEF) #define LAGG_LIST_LOCK_DESTROY(x) mtx_destroy(&V_lagg_list_mtx) #define LAGG_LIST_LOCK(x) mtx_lock(&V_lagg_list_mtx) #define LAGG_LIST_UNLOCK(x) mtx_unlock(&V_lagg_list_mtx) eventhandler_tag lagg_detach_cookie = NULL; static int lagg_clone_create(struct if_clone *, int, caddr_t); static void lagg_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, lagg_cloner); #define V_lagg_cloner VNET(lagg_cloner) static const char laggname[] = "lagg"; static void lagg_lladdr(struct lagg_softc *, uint8_t *); static void lagg_capabilities(struct lagg_softc *); static void lagg_port_lladdr(struct lagg_port *, uint8_t *); static void lagg_port_setlladdr(void *, int); static int lagg_port_create(struct lagg_softc *, struct ifnet *); static int lagg_port_destroy(struct lagg_port *, int); static struct mbuf *lagg_input(struct ifnet *, struct mbuf *); static void lagg_linkstate(struct lagg_softc *); static void lagg_port_state(struct ifnet *, int); static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t); static int lagg_port_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static void lagg_port_ifdetach(void *arg __unused, struct ifnet *); #ifdef LAGG_PORT_STACKING static int lagg_port_checkstacking(struct lagg_softc *); #endif static void lagg_port2req(struct lagg_port *, struct lagg_reqport *); static void lagg_init(void *); static void lagg_stop(struct lagg_softc *); static int lagg_ioctl(struct ifnet *, u_long, caddr_t); static int lagg_ether_setmulti(struct lagg_softc *); static int lagg_ether_cmdmulti(struct lagg_port *, int); static int lagg_setflag(struct lagg_port *, int, int, int (*func)(struct ifnet *, int)); static int lagg_setflags(struct lagg_port *, int status); static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt); static int lagg_transmit(struct ifnet *, struct mbuf *); static void lagg_qflush(struct ifnet *); static int lagg_media_change(struct ifnet *); static void lagg_media_status(struct ifnet *, struct ifmediareq *); static struct lagg_port *lagg_link_active(struct lagg_softc *, struct lagg_port *); static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *); /* Simple round robin */ static void lagg_rr_attach(struct lagg_softc *); static int lagg_rr_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Active failover */ static int lagg_fail_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Loadbalancing */ static void lagg_lb_attach(struct lagg_softc *); static void lagg_lb_detach(struct lagg_softc *); static int lagg_lb_port_create(struct lagg_port *); static void lagg_lb_port_destroy(struct lagg_port *); static int lagg_lb_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *); /* Broadcast */ static int lagg_bcast_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* 802.3ad LACP */ static void lagg_lacp_attach(struct lagg_softc *); static void lagg_lacp_detach(struct lagg_softc *); static int lagg_lacp_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); static void lagg_lacp_lladdr(struct lagg_softc *); /* lagg protocol table */ static const struct lagg_proto { lagg_proto pr_num; void (*pr_attach)(struct lagg_softc *); void (*pr_detach)(struct lagg_softc *); int (*pr_start)(struct lagg_softc *, struct mbuf *); struct mbuf * (*pr_input)(struct lagg_softc *, struct lagg_port *, struct mbuf *); int (*pr_addport)(struct lagg_port *); void (*pr_delport)(struct lagg_port *); void (*pr_linkstate)(struct lagg_port *); void (*pr_init)(struct lagg_softc *); void (*pr_stop)(struct lagg_softc *); void (*pr_lladdr)(struct lagg_softc *); void (*pr_request)(struct lagg_softc *, void *); void (*pr_portreq)(struct lagg_port *, void *); } lagg_protos[] = { { .pr_num = LAGG_PROTO_NONE }, { .pr_num = LAGG_PROTO_ROUNDROBIN, .pr_attach = lagg_rr_attach, .pr_start = lagg_rr_start, .pr_input = lagg_rr_input, }, { .pr_num = LAGG_PROTO_FAILOVER, .pr_start = lagg_fail_start, .pr_input = lagg_fail_input, }, { .pr_num = LAGG_PROTO_LOADBALANCE, .pr_attach = lagg_lb_attach, .pr_detach = lagg_lb_detach, .pr_start = lagg_lb_start, .pr_input = lagg_lb_input, .pr_addport = lagg_lb_port_create, .pr_delport = lagg_lb_port_destroy, }, { .pr_num = LAGG_PROTO_LACP, .pr_attach = lagg_lacp_attach, .pr_detach = lagg_lacp_detach, .pr_start = lagg_lacp_start, .pr_input = lagg_lacp_input, .pr_addport = lacp_port_create, .pr_delport = lacp_port_destroy, .pr_linkstate = lacp_linkstate, .pr_init = lacp_init, .pr_stop = lacp_stop, .pr_lladdr = lagg_lacp_lladdr, .pr_request = lacp_req, .pr_portreq = lacp_portreq, }, { .pr_num = LAGG_PROTO_ETHERCHANNEL, .pr_attach = lagg_lb_attach, .pr_detach = lagg_lb_detach, .pr_start = lagg_lb_start, .pr_input = lagg_lb_input, }, { .pr_num = LAGG_PROTO_BROADCAST, .pr_start = lagg_bcast_start, .pr_input = lagg_bcast_input, }, }; SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation"); /* Allow input on any failover links */ static VNET_DEFINE(int, lagg_failover_rx_all); #define V_lagg_failover_rx_all VNET(lagg_failover_rx_all) SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(lagg_failover_rx_all), 0, "Accept input from any interface in a failover lagg"); /* Default value for using M_FLOWID */ static VNET_DEFINE(int, def_use_flowid) = 1; #define V_def_use_flowid VNET(def_use_flowid) SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN, &VNET_NAME(def_use_flowid), 0, "Default setting for using flow id for load sharing"); /* Default value for using M_FLOWID */ static VNET_DEFINE(int, def_flowid_shift) = 16; #define V_def_flowid_shift VNET(def_flowid_shift) SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN, &VNET_NAME(def_flowid_shift), 0, "Default setting for flowid shift for load sharing"); static void vnet_lagg_init(const void *unused __unused) { LAGG_LIST_LOCK_INIT(); SLIST_INIT(&V_lagg_list); V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create, lagg_clone_destroy, 0); } VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_lagg_init, NULL); static void vnet_lagg_uninit(const void *unused __unused) { if_clone_detach(V_lagg_cloner); LAGG_LIST_LOCK_DESTROY(); } VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_lagg_uninit, NULL); static int lagg_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: lagg_input_p = lagg_input; lagg_linkstate_p = lagg_port_state; lagg_detach_cookie = EVENTHANDLER_REGISTER( ifnet_departure_event, lagg_port_ifdetach, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, lagg_detach_cookie); lagg_input_p = NULL; lagg_linkstate_p = NULL; break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t lagg_mod = { "if_lagg", lagg_modevent, 0 }; DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_lagg, 1); static void lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr) { KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto", __func__, sc)); if (sc->sc_ifflags & IFF_DEBUG) if_printf(sc->sc_ifp, "using proto %u\n", pr); if (lagg_protos[pr].pr_attach != NULL) lagg_protos[pr].pr_attach(sc); sc->sc_proto = pr; } static void lagg_proto_detach(struct lagg_softc *sc) { lagg_proto pr; LAGG_WLOCK_ASSERT(sc); pr = sc->sc_proto; sc->sc_proto = LAGG_PROTO_NONE; if (lagg_protos[pr].pr_detach != NULL) lagg_protos[pr].pr_detach(sc); else LAGG_WUNLOCK(sc); } static int lagg_proto_start(struct lagg_softc *sc, struct mbuf *m) { return (lagg_protos[sc->sc_proto].pr_start(sc, m)); } static struct mbuf * lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m)); } static int lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp) { if (lagg_protos[sc->sc_proto].pr_addport == NULL) return (0); else return (lagg_protos[sc->sc_proto].pr_addport(lp)); } static void lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp) { if (lagg_protos[sc->sc_proto].pr_delport != NULL) lagg_protos[sc->sc_proto].pr_delport(lp); } static void lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp) { if (lagg_protos[sc->sc_proto].pr_linkstate != NULL) lagg_protos[sc->sc_proto].pr_linkstate(lp); } static void lagg_proto_init(struct lagg_softc *sc) { if (lagg_protos[sc->sc_proto].pr_init != NULL) lagg_protos[sc->sc_proto].pr_init(sc); } static void lagg_proto_stop(struct lagg_softc *sc) { if (lagg_protos[sc->sc_proto].pr_stop != NULL) lagg_protos[sc->sc_proto].pr_stop(sc); } static void lagg_proto_lladdr(struct lagg_softc *sc) { if (lagg_protos[sc->sc_proto].pr_lladdr != NULL) lagg_protos[sc->sc_proto].pr_lladdr(sc); } static void lagg_proto_request(struct lagg_softc *sc, void *v) { if (lagg_protos[sc->sc_proto].pr_request != NULL) lagg_protos[sc->sc_proto].pr_request(sc, v); } static void lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v) { if (lagg_protos[sc->sc_proto].pr_portreq != NULL) lagg_protos[sc->sc_proto].pr_portreq(lp, v); } /* * This routine is run via an vlan * config EVENT */ static void lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct lagg_softc *sc = ifp->if_softc; struct lagg_port *lp; struct rm_priotracker tracker; if (ifp->if_softc != arg) /* Not our event */ return; LAGG_RLOCK(sc, &tracker); if (!SLIST_EMPTY(&sc->sc_ports)) { SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag); } LAGG_RUNLOCK(sc, &tracker); } /* * This routine is run via an vlan * unconfig EVENT */ static void lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct lagg_softc *sc = ifp->if_softc; struct lagg_port *lp; struct rm_priotracker tracker; if (ifp->if_softc != arg) /* Not our event */ return; LAGG_RLOCK(sc, &tracker); if (!SLIST_EMPTY(&sc->sc_ports)) { SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag); } LAGG_RUNLOCK(sc, &tracker); } static int lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct lagg_softc *sc; struct ifnet *ifp; static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { free(sc, M_DEVBUF); return (ENOSPC); } if (V_def_use_flowid) sc->sc_opts |= LAGG_OPT_USE_FLOWID; sc->flowid_shift = V_def_flowid_shift; /* Hash all layers by default */ sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4; lagg_proto_attach(sc, LAGG_PROTO_DEFAULT); LAGG_LOCK_INIT(sc); SLIST_INIT(&sc->sc_ports); TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc); /* Initialise pseudo media types */ ifmedia_init(&sc->sc_media, 0, lagg_media_change, lagg_media_status); ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); if_initname(ifp, laggname, unit); ifp->if_softc = sc; ifp->if_transmit = lagg_transmit; ifp->if_qflush = lagg_qflush; ifp->if_init = lagg_init; ifp->if_ioctl = lagg_ioctl; ifp->if_get_counter = lagg_get_counter; ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; /* * Attach as an ordinary ethernet device, children will be attached * as special device IFT_IEEE8023ADLAG. */ ether_ifattach(ifp, eaddr); sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); /* Insert into the global list of laggs */ LAGG_LIST_LOCK(); SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries); LAGG_LIST_UNLOCK(); return (0); } static void lagg_clone_destroy(struct ifnet *ifp) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_port *lp; LAGG_WLOCK(sc); lagg_stop(sc); ifp->if_flags &= ~IFF_UP; EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); /* Shutdown and remove lagg ports */ while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL) lagg_port_destroy(lp, 1); /* Unhook the aggregation protocol */ lagg_proto_detach(sc); ifmedia_removeall(&sc->sc_media); ether_ifdetach(ifp); if_free(ifp); LAGG_LIST_LOCK(); SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries); LAGG_LIST_UNLOCK(); taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task); LAGG_LOCK_DESTROY(sc); free(sc, M_DEVBUF); } static void lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr) { struct ifnet *ifp = sc->sc_ifp; if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) return; bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN); /* Let the protocol know the MAC has changed */ lagg_proto_lladdr(sc); EVENTHANDLER_INVOKE(iflladdr_event, ifp); } static void lagg_capabilities(struct lagg_softc *sc) { struct lagg_port *lp; int cap = ~0, ena = ~0; u_long hwa = ~0UL; struct ifnet_hw_tsomax hw_tsomax; LAGG_WLOCK_ASSERT(sc); memset(&hw_tsomax, 0, sizeof(hw_tsomax)); /* Get capabilities from the lagg ports */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { cap &= lp->lp_ifp->if_capabilities; ena &= lp->lp_ifp->if_capenable; hwa &= lp->lp_ifp->if_hwassist; if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax); } cap = (cap == ~0 ? 0 : cap); ena = (ena == ~0 ? 0 : ena); hwa = (hwa == ~0 ? 0 : hwa); if (sc->sc_ifp->if_capabilities != cap || sc->sc_ifp->if_capenable != ena || sc->sc_ifp->if_hwassist != hwa || if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) { sc->sc_ifp->if_capabilities = cap; sc->sc_ifp->if_capenable = ena; sc->sc_ifp->if_hwassist = hwa; getmicrotime(&sc->sc_ifp->if_lastchange); if (sc->sc_ifflags & IFF_DEBUG) if_printf(sc->sc_ifp, "capabilities 0x%08x enabled 0x%08x\n", cap, ena); } } static void lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr) { struct lagg_softc *sc = lp->lp_softc; struct ifnet *ifp = lp->lp_ifp; struct lagg_llq *llq; int pending = 0; LAGG_WLOCK_ASSERT(sc); if (lp->lp_detaching || memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) return; /* Check to make sure its not already queued to be changed */ SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) { if (llq->llq_ifp == ifp) { pending = 1; break; } } if (!pending) { llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT); if (llq == NULL) /* XXX what to do */ return; } /* Update the lladdr even if pending, it may have changed */ llq->llq_ifp = ifp; bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN); if (!pending) SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries); taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task); } /* * Set the interface MAC address from a taskqueue to avoid a LOR. */ static void lagg_port_setlladdr(void *arg, int pending) { struct lagg_softc *sc = (struct lagg_softc *)arg; struct lagg_llq *llq, *head; struct ifnet *ifp; int error; /* Grab a local reference of the queue and remove it from the softc */ LAGG_WLOCK(sc); head = SLIST_FIRST(&sc->sc_llq_head); SLIST_FIRST(&sc->sc_llq_head) = NULL; LAGG_WUNLOCK(sc); /* * Traverse the queue and set the lladdr on each ifp. It is safe to do * unlocked as we have the only reference to it. */ for (llq = head; llq != NULL; llq = head) { ifp = llq->llq_ifp; /* Set the link layer address */ CURVNET_SET(ifp->if_vnet); error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN); CURVNET_RESTORE(); if (error) printf("%s: setlladdr failed on %s\n", __func__, ifp->if_xname); head = SLIST_NEXT(llq, llq_entries); free(llq, M_DEVBUF); } } static int lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) { struct lagg_softc *sc_ptr; struct lagg_port *lp, *tlp; int error, i; uint64_t *pval; LAGG_WLOCK_ASSERT(sc); /* Limit the maximal number of lagg ports */ if (sc->sc_count >= LAGG_MAX_PORTS) return (ENOSPC); /* Check if port has already been associated to a lagg */ if (ifp->if_lagg != NULL) { /* Port is already in the current lagg? */ lp = (struct lagg_port *)ifp->if_lagg; if (lp->lp_softc == sc) return (EEXIST); return (EBUSY); } /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ if (ifp->if_type != IFT_ETHER) return (EPROTONOSUPPORT); #ifdef INET6 /* * The member interface should not have inet6 address because * two interfaces with a valid link-local scope zone must not be * merged in any form. This restriction is needed to * prevent violation of link-local scope zone. Attempts to * add a member interface which has inet6 addresses triggers * removal of all inet6 addresses on the member interface. */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (in6ifa_llaonifp(lp->lp_ifp)) { in6_ifdetach(lp->lp_ifp); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", lp->lp_ifp->if_xname); } } if (in6ifa_llaonifp(ifp)) { in6_ifdetach(ifp); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", ifp->if_xname); } #endif /* Allow the first Ethernet member to define the MTU */ if (SLIST_EMPTY(&sc->sc_ports)) sc->sc_ifp->if_mtu = ifp->if_mtu; else if (sc->sc_ifp->if_mtu != ifp->if_mtu) { if_printf(sc->sc_ifp, "invalid MTU for %s\n", ifp->if_xname); return (EINVAL); } if ((lp = malloc(sizeof(struct lagg_port), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) return (ENOMEM); /* Check if port is a stacked lagg */ LAGG_LIST_LOCK(); SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) { if (ifp == sc_ptr->sc_ifp) { LAGG_LIST_UNLOCK(); free(lp, M_DEVBUF); return (EINVAL); /* XXX disable stacking for the moment, its untested */ #ifdef LAGG_PORT_STACKING lp->lp_flags |= LAGG_PORT_STACK; if (lagg_port_checkstacking(sc_ptr) >= LAGG_MAX_STACKING) { LAGG_LIST_UNLOCK(); free(lp, M_DEVBUF); return (E2BIG); } #endif } } LAGG_LIST_UNLOCK(); /* Change the interface type */ lp->lp_iftype = ifp->if_type; ifp->if_type = IFT_IEEE8023ADLAG; ifp->if_lagg = lp; lp->lp_ioctl = ifp->if_ioctl; ifp->if_ioctl = lagg_port_ioctl; lp->lp_output = ifp->if_output; ifp->if_output = lagg_port_output; lp->lp_ifp = ifp; lp->lp_softc = sc; /* Save port link layer address */ bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN); if (SLIST_EMPTY(&sc->sc_ports)) { sc->sc_primary = lp; lagg_lladdr(sc, IF_LLADDR(ifp)); } else { /* Update link layer address for this port */ lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp)); } /* Insert into the list of ports. Keep ports sorted by if_index. */ SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) { if (tlp->lp_ifp->if_index < ifp->if_index && ( SLIST_NEXT(tlp, lp_entries) == NULL || SLIST_NEXT(tlp, lp_entries)->lp_ifp->if_index < ifp->if_index)) break; } if (tlp != NULL) SLIST_INSERT_AFTER(tlp, lp, lp_entries); else SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries); sc->sc_count++; /* Update lagg capabilities */ lagg_capabilities(sc); lagg_linkstate(sc); /* Read port counters */ pval = lp->port_counters.val; for (i = 0; i < IFCOUNTERS; i++, pval++) *pval = ifp->if_get_counter(ifp, i); /* Add multicast addresses and interface flags to this port */ lagg_ether_cmdmulti(lp, 1); lagg_setflags(lp, 1); if ((error = lagg_proto_addport(sc, lp)) != 0) { /* Remove the port, without calling pr_delport. */ lagg_port_destroy(lp, 0); return (error); } return (0); } #ifdef LAGG_PORT_STACKING static int lagg_port_checkstacking(struct lagg_softc *sc) { struct lagg_softc *sc_ptr; struct lagg_port *lp; int m = 0; LAGG_WLOCK_ASSERT(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (lp->lp_flags & LAGG_PORT_STACK) { sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc; m = MAX(m, lagg_port_checkstacking(sc_ptr)); } } return (m + 1); } #endif static int lagg_port_destroy(struct lagg_port *lp, int rundelport) { struct lagg_softc *sc = lp->lp_softc; struct lagg_port *lp_ptr; struct lagg_llq *llq; struct ifnet *ifp = lp->lp_ifp; uint64_t *pval, vdiff; int i; LAGG_WLOCK_ASSERT(sc); if (rundelport) lagg_proto_delport(sc, lp); /* * Remove multicast addresses and interface flags from this port and * reset the MAC address, skip if the interface is being detached. */ if (!lp->lp_detaching) { lagg_ether_cmdmulti(lp, 0); lagg_setflags(lp, 0); lagg_port_lladdr(lp, lp->lp_lladdr); } /* Restore interface */ ifp->if_type = lp->lp_iftype; ifp->if_ioctl = lp->lp_ioctl; ifp->if_output = lp->lp_output; ifp->if_lagg = NULL; /* Update detached port counters */ pval = lp->port_counters.val; for (i = 0; i < IFCOUNTERS; i++, pval++) { vdiff = ifp->if_get_counter(ifp, i) - *pval; sc->detached_counters.val[i] += vdiff; } /* Finally, remove the port from the lagg */ SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries); sc->sc_count--; /* Update the primary interface */ if (lp == sc->sc_primary) { uint8_t lladdr[ETHER_ADDR_LEN]; if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) { bzero(&lladdr, ETHER_ADDR_LEN); } else { bcopy(lp_ptr->lp_lladdr, lladdr, ETHER_ADDR_LEN); } lagg_lladdr(sc, lladdr); sc->sc_primary = lp_ptr; /* Update link layer address for each port */ SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) lagg_port_lladdr(lp_ptr, lladdr); } /* Remove any pending lladdr changes from the queue */ if (lp->lp_detaching) { SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) { if (llq->llq_ifp == ifp) { SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq, llq_entries); free(llq, M_DEVBUF); break; /* Only appears once */ } } } if (lp->lp_ifflags) if_printf(ifp, "%s: lp_ifflags unclean\n", __func__); free(lp, M_DEVBUF); /* Update lagg capabilities */ lagg_capabilities(sc); lagg_linkstate(sc); return (0); } static int lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct lagg_reqport *rp = (struct lagg_reqport *)data; struct lagg_softc *sc; struct lagg_port *lp = NULL; int error = 0; struct rm_priotracker tracker; /* Should be checked by the caller */ if (ifp->if_type != IFT_IEEE8023ADLAG || (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL) goto fallback; switch (cmd) { case SIOCGLAGGPORT: if (rp->rp_portname[0] == '\0' || ifunit(rp->rp_portname) != ifp) { error = EINVAL; break; } LAGG_RLOCK(sc, &tracker); if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; LAGG_RUNLOCK(sc, &tracker); break; } lagg_port2req(lp, rp); LAGG_RUNLOCK(sc, &tracker); break; case SIOCSIFCAP: if (lp->lp_ioctl == NULL) { error = EINVAL; break; } error = (*lp->lp_ioctl)(ifp, cmd, data); if (error) break; /* Update lagg interface capabilities */ LAGG_WLOCK(sc); lagg_capabilities(sc); LAGG_WUNLOCK(sc); break; case SIOCSIFMTU: /* Do not allow the MTU to be changed once joined */ error = EINVAL; break; default: goto fallback; } return (error); fallback: if (lp->lp_ioctl != NULL) return ((*lp->lp_ioctl)(ifp, cmd, data)); return (EINVAL); } /* * Requests counter @cnt data. * * Counter value is calculated the following way: * 1) for each port, sum difference between current and "initial" measurements. * 2) add lagg logical interface counters. * 3) add data from detached_counters array. * * We also do the following things on ports attach/detach: * 1) On port attach we store all counters it has into port_counter array. * 2) On port detach we add the different between "initial" and * current counters data to detached_counters array. */ static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt) { struct lagg_softc *sc; struct lagg_port *lp; struct ifnet *lpifp; struct rm_priotracker tracker; uint64_t newval, oldval, vsum; /* Revise this when we've got non-generic counters. */ KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); sc = (struct lagg_softc *)ifp->if_softc; LAGG_RLOCK(sc, &tracker); vsum = 0; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { /* Saved attached value */ oldval = lp->port_counters.val[cnt]; /* current value */ lpifp = lp->lp_ifp; newval = lpifp->if_get_counter(lpifp, cnt); /* Calculate diff and save new */ vsum += newval - oldval; } /* * Add counter data which might be added by upper * layer protocols operating on logical interface. */ vsum += if_get_counter_default(ifp, cnt); /* * Add counter data from detached ports counters */ vsum += sc->detached_counters.val[cnt]; LAGG_RUNLOCK(sc, &tracker); return (vsum); } /* * For direct output to child ports. */ static int lagg_port_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct lagg_port *lp = ifp->if_lagg; switch (dst->sa_family) { case pseudo_AF_HDRCMPLT: case AF_UNSPEC: return ((*lp->lp_output)(ifp, m, dst, ro)); } /* drop any other frames */ m_freem(m); return (ENETDOWN); } static void lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp) { struct lagg_port *lp; struct lagg_softc *sc; if ((lp = ifp->if_lagg) == NULL) return; /* If the ifnet is just being renamed, don't do anything. */ if (ifp->if_flags & IFF_RENAMING) return; sc = lp->lp_softc; LAGG_WLOCK(sc); lp->lp_detaching = 1; lagg_port_destroy(lp, 1); LAGG_WUNLOCK(sc); } static void lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp) { struct lagg_softc *sc = lp->lp_softc; strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname)); strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname)); rp->rp_prio = lp->lp_prio; rp->rp_flags = lp->lp_flags; lagg_proto_portreq(sc, lp, &rp->rp_psc); /* Add protocol specific flags */ switch (sc->sc_proto) { case LAGG_PROTO_FAILOVER: if (lp == sc->sc_primary) rp->rp_flags |= LAGG_PORT_MASTER; if (lp == lagg_link_active(sc, sc->sc_primary)) rp->rp_flags |= LAGG_PORT_ACTIVE; break; case LAGG_PROTO_ROUNDROBIN: case LAGG_PROTO_LOADBALANCE: case LAGG_PROTO_ETHERCHANNEL: case LAGG_PROTO_BROADCAST: if (LAGG_PORTACTIVE(lp)) rp->rp_flags |= LAGG_PORT_ACTIVE; break; case LAGG_PROTO_LACP: /* LACP has a different definition of active */ if (lacp_isactive(lp)) rp->rp_flags |= LAGG_PORT_ACTIVE; if (lacp_iscollecting(lp)) rp->rp_flags |= LAGG_PORT_COLLECTING; if (lacp_isdistributing(lp)) rp->rp_flags |= LAGG_PORT_DISTRIBUTING; break; } } static void lagg_init(void *xsc) { struct lagg_softc *sc = (struct lagg_softc *)xsc; struct lagg_port *lp; struct ifnet *ifp = sc->sc_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; LAGG_WLOCK(sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; /* Update the port lladdrs */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lagg_port_lladdr(lp, IF_LLADDR(ifp)); lagg_proto_init(sc); LAGG_WUNLOCK(sc); } static void lagg_stop(struct lagg_softc *sc) { struct ifnet *ifp = sc->sc_ifp; LAGG_WLOCK_ASSERT(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; lagg_proto_stop(sc); } static int lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_reqall *ra = (struct lagg_reqall *)data; + struct lagg_reqopts *ro = (struct lagg_reqopts *)data; struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf; struct lagg_reqflags *rf = (struct lagg_reqflags *)data; struct ifreq *ifr = (struct ifreq *)data; struct lagg_port *lp; struct ifnet *tpif; struct thread *td = curthread; char *buf, *outbuf; int count, buflen, len, error = 0; struct rm_priotracker tracker; bzero(&rpbuf, sizeof(rpbuf)); switch (cmd) { case SIOCGLAGG: LAGG_RLOCK(sc, &tracker); count = 0; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) count++; buflen = count * sizeof(struct lagg_reqport); LAGG_RUNLOCK(sc, &tracker); outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); LAGG_RLOCK(sc, &tracker); ra->ra_proto = sc->sc_proto; lagg_proto_request(sc, &ra->ra_psc); - ra->ra_opts = sc->sc_opts; - if (sc->sc_proto == LAGG_PROTO_LACP) { - struct lacp_softc *lsc; - - lsc = (struct lacp_softc *)sc->sc_psc; - if (lsc->lsc_debug.lsc_tx_test != 0) - ra->ra_opts |= LAGG_OPT_LACP_TXTEST; - if (lsc->lsc_debug.lsc_rx_test != 0) - ra->ra_opts |= LAGG_OPT_LACP_RXTEST; - if (lsc->lsc_strict_mode != 0) - ra->ra_opts |= LAGG_OPT_LACP_STRICT; - - ra->ra_active = sc->sc_active; - } else { - /* - * LACP tracks active links automatically, - * the others do not. - */ - ra->ra_active = 0; - SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) - ra->ra_active += LAGG_PORTACTIVE(lp); - } - ra->ra_flapping = sc->sc_flapping; - ra->ra_flowid_shift = sc->flowid_shift; - count = 0; buf = outbuf; len = min(ra->ra_size, buflen); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (len < sizeof(rpbuf)) break; lagg_port2req(lp, &rpbuf); memcpy(buf, &rpbuf, sizeof(rpbuf)); count++; buf += sizeof(rpbuf); len -= sizeof(rpbuf); } LAGG_RUNLOCK(sc, &tracker); ra->ra_ports = count; ra->ra_size = count * sizeof(rpbuf); error = copyout(outbuf, ra->ra_port, ra->ra_size); free(outbuf, M_TEMP); break; case SIOCSLAGG: - /* - * Set options or protocol depending on - * ra->ra_opts and ra->ra_proto. - */ error = priv_check(td, PRIV_NET_LAGG); if (error) break; - if (ra->ra_opts != 0) { - /* - * Set options. LACP options are stored in sc->sc_psc, - * not in sc_opts. - */ - int valid, lacp; + if (ra->ra_proto < 1 || ra->ra_proto >= LAGG_PROTO_MAX) { + error = EPROTONOSUPPORT; + break; + } - switch (ra->ra_opts) { - case LAGG_OPT_USE_FLOWID: - case -LAGG_OPT_USE_FLOWID: - case LAGG_OPT_FLOWIDSHIFT: - valid = 1; - lacp = 0; - break; + LAGG_WLOCK(sc); + lagg_proto_detach(sc); + lagg_proto_attach(sc, ra->ra_proto); + break; + case SIOCGLAGGOPTS: + ro->ro_opts = sc->sc_opts; + if (sc->sc_proto == LAGG_PROTO_LACP) { + struct lacp_softc *lsc; + + lsc = (struct lacp_softc *)sc->sc_psc; + if (lsc->lsc_debug.lsc_tx_test != 0) + ro->ro_opts |= LAGG_OPT_LACP_TXTEST; + if (lsc->lsc_debug.lsc_rx_test != 0) + ro->ro_opts |= LAGG_OPT_LACP_RXTEST; + if (lsc->lsc_strict_mode != 0) + ro->ro_opts |= LAGG_OPT_LACP_STRICT; + + ro->ro_active = sc->sc_active; + } else { + ro->ro_active = 0; + SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) + ro->ro_active += LAGG_PORTACTIVE(lp); + } + ro->ro_flapping = sc->sc_flapping; + ro->ro_flowid_shift = sc->flowid_shift; + break; + case SIOCSLAGGOPTS: + error = priv_check(td, PRIV_NET_LAGG); + if (error) + break; + if (ro->ro_opts == 0) + break; + /* + * Set options. LACP options are stored in sc->sc_psc, + * not in sc_opts. + */ + int valid, lacp; + + switch (ro->ro_opts) { + case LAGG_OPT_USE_FLOWID: + case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_FLOWIDSHIFT: + valid = 1; + lacp = 0; + break; + case LAGG_OPT_LACP_TXTEST: + case -LAGG_OPT_LACP_TXTEST: + case LAGG_OPT_LACP_RXTEST: + case -LAGG_OPT_LACP_RXTEST: + case LAGG_OPT_LACP_STRICT: + case -LAGG_OPT_LACP_STRICT: + valid = lacp = 1; + break; + default: + valid = lacp = 0; + break; + } + + LAGG_WLOCK(sc); + if (valid == 0 || + (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) { + /* Invalid combination of options specified. */ + error = EINVAL; + LAGG_WUNLOCK(sc); + break; /* Return from SIOCSLAGGOPTS. */ + } + /* + * Store new options into sc->sc_opts except for + * FLOWIDSHIFT and LACP options. + */ + if (lacp == 0) { + if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT) + sc->flowid_shift = ro->ro_flowid_shift; + else if (ro->ro_opts > 0) + sc->sc_opts |= ro->ro_opts; + else + sc->sc_opts &= ~ro->ro_opts; + } else { + struct lacp_softc *lsc; + + lsc = (struct lacp_softc *)sc->sc_psc; + + switch (ro->ro_opts) { case LAGG_OPT_LACP_TXTEST: + lsc->lsc_debug.lsc_tx_test = 1; + break; case -LAGG_OPT_LACP_TXTEST: + lsc->lsc_debug.lsc_tx_test = 0; + break; case LAGG_OPT_LACP_RXTEST: + lsc->lsc_debug.lsc_rx_test = 1; + break; case -LAGG_OPT_LACP_RXTEST: + lsc->lsc_debug.lsc_rx_test = 0; + break; case LAGG_OPT_LACP_STRICT: + lsc->lsc_strict_mode = 1; + break; case -LAGG_OPT_LACP_STRICT: - valid = lacp = 1; + lsc->lsc_strict_mode = 0; break; - default: - valid = lacp = 0; - break; } - - LAGG_WLOCK(sc); - if (valid == 0 || - (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) { - /* Invalid combination of options specified. */ - error = EINVAL; - LAGG_WUNLOCK(sc); - break; /* Return from SIOCSLAGG. */ - } - /* - * Store new options into sc->sc_opts except for - * FLOWIDSHIFT and LACP options. - */ - if (lacp == 0) { - if (ra->ra_opts == LAGG_OPT_FLOWIDSHIFT) - sc->flowid_shift = ra->ra_flowid_shift; - else if (ra->ra_opts > 0) - sc->sc_opts |= ra->ra_opts; - else - sc->sc_opts &= ~ra->ra_opts; - } else { - struct lacp_softc *lsc; - - lsc = (struct lacp_softc *)sc->sc_psc; - - switch (ra->ra_opts) { - case LAGG_OPT_LACP_TXTEST: - lsc->lsc_debug.lsc_tx_test = 1; - break; - case -LAGG_OPT_LACP_TXTEST: - lsc->lsc_debug.lsc_tx_test = 0; - break; - case LAGG_OPT_LACP_RXTEST: - lsc->lsc_debug.lsc_rx_test = 1; - break; - case -LAGG_OPT_LACP_RXTEST: - lsc->lsc_debug.lsc_rx_test = 0; - break; - case LAGG_OPT_LACP_STRICT: - lsc->lsc_strict_mode = 1; - break; - case -LAGG_OPT_LACP_STRICT: - lsc->lsc_strict_mode = 0; - break; - } - } - LAGG_WUNLOCK(sc); - break; /* Return from SIOCSLAGG. */ } - if (ra->ra_proto < 1 || ra->ra_proto >= LAGG_PROTO_MAX) { - error = EPROTONOSUPPORT; - break; - } - - LAGG_WLOCK(sc); - lagg_proto_detach(sc); - lagg_proto_attach(sc, ra->ra_proto); + LAGG_WUNLOCK(sc); break; case SIOCGLAGGFLAGS: rf->rf_flags = sc->sc_flags; break; case SIOCSLAGGHASH: error = priv_check(td, PRIV_NET_LAGG); if (error) break; if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) { error = EINVAL; break; } LAGG_WLOCK(sc); sc->sc_flags &= ~LAGG_F_HASHMASK; sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK; LAGG_WUNLOCK(sc); break; case SIOCGLAGGPORT: if (rp->rp_portname[0] == '\0' || (tpif = ifunit(rp->rp_portname)) == NULL) { error = EINVAL; break; } LAGG_RLOCK(sc, &tracker); if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; LAGG_RUNLOCK(sc, &tracker); break; } lagg_port2req(lp, rp); LAGG_RUNLOCK(sc, &tracker); break; case SIOCSLAGGPORT: error = priv_check(td, PRIV_NET_LAGG); if (error) break; if (rp->rp_portname[0] == '\0' || (tpif = ifunit(rp->rp_portname)) == NULL) { error = EINVAL; break; } LAGG_WLOCK(sc); error = lagg_port_create(sc, tpif); LAGG_WUNLOCK(sc); break; case SIOCSLAGGDELPORT: error = priv_check(td, PRIV_NET_LAGG); if (error) break; if (rp->rp_portname[0] == '\0' || (tpif = ifunit(rp->rp_portname)) == NULL) { error = EINVAL; break; } LAGG_WLOCK(sc); if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; LAGG_WUNLOCK(sc); break; } error = lagg_port_destroy(lp, 1); LAGG_WUNLOCK(sc); break; case SIOCSIFFLAGS: /* Set flags on ports too */ LAGG_WLOCK(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { lagg_setflags(lp, 1); } LAGG_WUNLOCK(sc); if (!(ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked down and it is running, * then stop and disable it. */ LAGG_WLOCK(sc); lagg_stop(sc); LAGG_WUNLOCK(sc); } else if ((ifp->if_flags & IFF_UP) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked up and it is stopped, then * start it. */ (*ifp->if_init)(sc); } break; case SIOCADDMULTI: case SIOCDELMULTI: LAGG_WLOCK(sc); error = lagg_ether_setmulti(sc); LAGG_WUNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; case SIOCSIFCAP: case SIOCSIFMTU: /* Do not allow the MTU or caps to be directly changed */ error = EINVAL; break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static int lagg_ether_setmulti(struct lagg_softc *sc) { struct lagg_port *lp; LAGG_WLOCK_ASSERT(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { /* First, remove any existing filter entries. */ lagg_ether_cmdmulti(lp, 0); /* copy all addresses from the lagg interface to the port */ lagg_ether_cmdmulti(lp, 1); } return (0); } static int lagg_ether_cmdmulti(struct lagg_port *lp, int set) { struct lagg_softc *sc = lp->lp_softc; struct ifnet *ifp = lp->lp_ifp; struct ifnet *scifp = sc->sc_ifp; struct lagg_mc *mc; struct ifmultiaddr *ifma; int error; LAGG_WLOCK_ASSERT(sc); if (set) { IF_ADDR_WLOCK(scifp); TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT); if (mc == NULL) { IF_ADDR_WUNLOCK(scifp); return (ENOMEM); } bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len); mc->mc_addr.sdl_index = ifp->if_index; mc->mc_ifma = NULL; SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries); } IF_ADDR_WUNLOCK(scifp); SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) { error = if_addmulti(ifp, (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma); if (error) return (error); } } else { while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) { SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries); if (mc->mc_ifma && !lp->lp_detaching) if_delmulti_ifma(mc->mc_ifma); free(mc, M_DEVBUF); } } return (0); } /* Handle a ref counted flag that should be set on the lagg port as well */ static int lagg_setflag(struct lagg_port *lp, int flag, int status, int (*func)(struct ifnet *, int)) { struct lagg_softc *sc = lp->lp_softc; struct ifnet *scifp = sc->sc_ifp; struct ifnet *ifp = lp->lp_ifp; int error; LAGG_WLOCK_ASSERT(sc); status = status ? (scifp->if_flags & flag) : 0; /* Now "status" contains the flag value or 0 */ /* * See if recorded ports status is different from what * we want it to be. If it is, flip it. We record ports * status in lp_ifflags so that we won't clear ports flag * we haven't set. In fact, we don't clear or set ports * flags directly, but get or release references to them. * That's why we can be sure that recorded flags still are * in accord with actual ports flags. */ if (status != (lp->lp_ifflags & flag)) { error = (*func)(ifp, status); if (error) return (error); lp->lp_ifflags &= ~flag; lp->lp_ifflags |= status; } return (0); } /* * Handle IFF_* flags that require certain changes on the lagg port * if "status" is true, update ports flags respective to the lagg * if "status" is false, forcedly clear the flags set on port. */ static int lagg_setflags(struct lagg_port *lp, int status) { int error, i; for (i = 0; lagg_pflags[i].flag; i++) { error = lagg_setflag(lp, lagg_pflags[i].flag, status, lagg_pflags[i].func); if (error) return (error); } return (0); } static int lagg_transmit(struct ifnet *ifp, struct mbuf *m) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; int error, len, mcast; struct rm_priotracker tracker; len = m->m_pkthdr.len; mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; LAGG_RLOCK(sc, &tracker); /* We need a Tx algorithm and at least one port */ if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { LAGG_RUNLOCK(sc, &tracker); m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENXIO); } ETHER_BPF_MTAP(ifp, m); error = lagg_proto_start(sc, m); LAGG_RUNLOCK(sc, &tracker); if (error != 0) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } /* * The ifp->if_qflush entry point for lagg(4) is no-op. */ static void lagg_qflush(struct ifnet *ifp __unused) { } static struct mbuf * lagg_input(struct ifnet *ifp, struct mbuf *m) { struct lagg_port *lp = ifp->if_lagg; struct lagg_softc *sc = lp->lp_softc; struct ifnet *scifp = sc->sc_ifp; struct rm_priotracker tracker; LAGG_RLOCK(sc, &tracker); if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (lp->lp_flags & LAGG_PORT_DISABLED) || sc->sc_proto == LAGG_PROTO_NONE) { LAGG_RUNLOCK(sc, &tracker); m_freem(m); return (NULL); } ETHER_BPF_MTAP(scifp, m); m = lagg_proto_input(sc, lp, m); if (m != NULL) { if (scifp->if_flags & IFF_MONITOR) { m_freem(m); m = NULL; } } LAGG_RUNLOCK(sc, &tracker); return (m); } static int lagg_media_change(struct ifnet *ifp) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; if (sc->sc_ifflags & IFF_DEBUG) printf("%s\n", __func__); /* Ignore */ return (0); } static void lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_port *lp; struct rm_priotracker tracker; imr->ifm_status = IFM_AVALID; imr->ifm_active = IFM_ETHER | IFM_AUTO; LAGG_RLOCK(sc, &tracker); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (LAGG_PORTACTIVE(lp)) imr->ifm_status |= IFM_ACTIVE; } LAGG_RUNLOCK(sc, &tracker); } static void lagg_linkstate(struct lagg_softc *sc) { struct lagg_port *lp; int new_link = LINK_STATE_DOWN; uint64_t speed; /* Our link is considered up if at least one of our ports is active */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (lp->lp_ifp->if_link_state == LINK_STATE_UP) { new_link = LINK_STATE_UP; break; } } if_link_state_change(sc->sc_ifp, new_link); /* Update if_baudrate to reflect the max possible speed */ switch (sc->sc_proto) { case LAGG_PROTO_FAILOVER: sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ? sc->sc_primary->lp_ifp->if_baudrate : 0; break; case LAGG_PROTO_ROUNDROBIN: case LAGG_PROTO_LOADBALANCE: case LAGG_PROTO_ETHERCHANNEL: case LAGG_PROTO_BROADCAST: speed = 0; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) speed += lp->lp_ifp->if_baudrate; sc->sc_ifp->if_baudrate = speed; break; case LAGG_PROTO_LACP: /* LACP updates if_baudrate itself */ break; } } static void lagg_port_state(struct ifnet *ifp, int state) { struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg; struct lagg_softc *sc = NULL; if (lp != NULL) sc = lp->lp_softc; if (sc == NULL) return; LAGG_WLOCK(sc); lagg_linkstate(sc); lagg_proto_linkstate(sc, lp); LAGG_WUNLOCK(sc); } struct lagg_port * lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) { struct lagg_port *lp_next, *rval = NULL; // int new_link = LINK_STATE_DOWN; LAGG_RLOCK_ASSERT(sc); /* * Search a port which reports an active link state. */ if (lp == NULL) goto search; if (LAGG_PORTACTIVE(lp)) { rval = lp; goto found; } if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL && LAGG_PORTACTIVE(lp_next)) { rval = lp_next; goto found; } search: SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { if (LAGG_PORTACTIVE(lp_next)) { rval = lp_next; goto found; } } found: if (rval != NULL) { /* * The IEEE 802.1D standard assumes that a lagg with * multiple ports is always full duplex. This is valid * for load sharing laggs and if at least two links * are active. Unfortunately, checking the latter would * be too expensive at this point. XXX if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) && (sc->sc_count > 1)) new_link = LINK_STATE_FULL_DUPLEX; else new_link = rval->lp_link_state; */ } return (rval); } static const void * lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf) { if (m->m_pkthdr.len < (off + len)) { return (NULL); } else if (m->m_len < (off + len)) { m_copydata(m, off, len, buf); return (buf); } return (mtod(m, char *) + off); } uint32_t lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key) { uint16_t etype; uint32_t p = key; int off; struct ether_header *eh; const struct ether_vlan_header *vlan; #ifdef INET const struct ip *ip; const uint32_t *ports; int iphlen; #endif #ifdef INET6 const struct ip6_hdr *ip6; uint32_t flow; #endif union { #ifdef INET struct ip ip; #endif #ifdef INET6 struct ip6_hdr ip6; #endif struct ether_vlan_header vlan; uint32_t port; } buf; off = sizeof(*eh); if (m->m_len < off) goto out; eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); if (sc->sc_flags & LAGG_F_HASHL2) { p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p); p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p); } /* Special handling for encapsulating VLAN frames */ if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) { p = hash32_buf(&m->m_pkthdr.ether_vtag, sizeof(m->m_pkthdr.ether_vtag), p); } else if (etype == ETHERTYPE_VLAN) { vlan = lagg_gethdr(m, off, sizeof(*vlan), &buf); if (vlan == NULL) goto out; if (sc->sc_flags & LAGG_F_HASHL2) p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p); etype = ntohs(vlan->evl_proto); off += sizeof(*vlan) - sizeof(*eh); } switch (etype) { #ifdef INET case ETHERTYPE_IP: ip = lagg_gethdr(m, off, sizeof(*ip), &buf); if (ip == NULL) goto out; if (sc->sc_flags & LAGG_F_HASHL3) { p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p); p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p); } if (!(sc->sc_flags & LAGG_F_HASHL4)) break; switch (ip->ip_p) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_SCTP: iphlen = ip->ip_hl << 2; if (iphlen < sizeof(*ip)) break; off += iphlen; ports = lagg_gethdr(m, off, sizeof(*ports), &buf); if (ports == NULL) break; p = hash32_buf(ports, sizeof(*ports), p); break; } break; #endif #ifdef INET6 case ETHERTYPE_IPV6: if (!(sc->sc_flags & LAGG_F_HASHL3)) break; ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf); if (ip6 == NULL) goto out; p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p); p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p); flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK; p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */ break; #endif } out: return (p); } int lagg_enqueue(struct ifnet *ifp, struct mbuf *m) { return (ifp->if_transmit)(ifp, m); } /* * Simple round robin aggregation */ static void lagg_rr_attach(struct lagg_softc *sc) { sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX; sc->sc_seq = 0; } static int lagg_rr_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_port *lp; uint32_t p; p = atomic_fetchadd_32(&sc->sc_seq, 1); p %= sc->sc_count; lp = SLIST_FIRST(&sc->sc_ports); while (p--) lp = SLIST_NEXT(lp, lp_entries); /* * Check the port's link state. This will return the next active * port if the link is down or the port is NULL. */ if ((lp = lagg_link_active(sc, lp)) == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; /* Just pass in the packet to our lagg device */ m->m_pkthdr.rcvif = ifp; return (m); } /* * Broadcast mode */ static int lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m) { int active_ports = 0; int errors = 0; int ret; struct lagg_port *lp, *last = NULL; struct mbuf *m0; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (!LAGG_PORTACTIVE(lp)) continue; active_ports++; if (last != NULL) { m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (m0 == NULL) { ret = ENOBUFS; errors++; break; } ret = lagg_enqueue(last->lp_ifp, m0); if (ret != 0) errors++; } last = lp; } if (last == NULL) { m_freem(m); return (ENOENT); } if ((last = lagg_link_active(sc, last)) == NULL) { m_freem(m); return (ENETDOWN); } ret = lagg_enqueue(last->lp_ifp, m); if (ret != 0) errors++; if (errors == 0) return (ret); return (0); } static struct mbuf* lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; /* Just pass in the packet to our lagg device */ m->m_pkthdr.rcvif = ifp; return (m); } /* * Active failover */ static int lagg_fail_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_port *lp; /* Use the master port if active or the next available port */ if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; struct lagg_port *tmp_tp; if (lp == sc->sc_primary || V_lagg_failover_rx_all) { m->m_pkthdr.rcvif = ifp; return (m); } if (!LAGG_PORTACTIVE(sc->sc_primary)) { tmp_tp = lagg_link_active(sc, sc->sc_primary); /* * If tmp_tp is null, we've recieved a packet when all * our links are down. Weird, but process it anyways. */ if ((tmp_tp == NULL || tmp_tp == lp)) { m->m_pkthdr.rcvif = ifp; return (m); } } m_freem(m); return (NULL); } /* * Loadbalancing */ static void lagg_lb_attach(struct lagg_softc *sc) { struct lagg_port *lp; struct lagg_lb *lb; lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO); sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX; lb->lb_key = arc4random(); sc->sc_psc = lb; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lagg_lb_port_create(lp); } static void lagg_lb_detach(struct lagg_softc *sc) { struct lagg_lb *lb; lb = (struct lagg_lb *)sc->sc_psc; LAGG_WUNLOCK(sc); if (lb != NULL) free(lb, M_DEVBUF); } static int lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp) { struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; struct lagg_port *lp_next; int i = 0; bzero(&lb->lb_ports, sizeof(lb->lb_ports)); SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { if (lp_next == lp) continue; if (i >= LAGG_MAX_PORTS) return (EINVAL); if (sc->sc_ifflags & IFF_DEBUG) printf("%s: port %s at index %d\n", sc->sc_ifname, lp_next->lp_ifp->if_xname, i); lb->lb_ports[i++] = lp_next; } return (0); } static int lagg_lb_port_create(struct lagg_port *lp) { struct lagg_softc *sc = lp->lp_softc; return (lagg_lb_porttable(sc, NULL)); } static void lagg_lb_port_destroy(struct lagg_port *lp) { struct lagg_softc *sc = lp->lp_softc; lagg_lb_porttable(sc, lp); } static int lagg_lb_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; struct lagg_port *lp = NULL; uint32_t p = 0; if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && (m->m_flags & M_FLOWID)) p = m->m_pkthdr.flowid >> sc->flowid_shift; else p = lagg_hashmbuf(sc, m, lb->lb_key); p %= sc->sc_count; lp = lb->lb_ports[p]; /* * Check the port's link state. This will return the next active * port if the link is down or the port is NULL. */ if ((lp = lagg_link_active(sc, lp)) == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; /* Just pass in the packet to our lagg device */ m->m_pkthdr.rcvif = ifp; return (m); } /* * 802.3ad LACP */ static void lagg_lacp_attach(struct lagg_softc *sc) { struct lagg_port *lp; lacp_attach(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_create(lp); } static void lagg_lacp_detach(struct lagg_softc *sc) { struct lagg_port *lp; void *psc; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_destroy(lp); psc = sc->sc_psc; sc->sc_psc = NULL; LAGG_WUNLOCK(sc); lacp_detach(psc); } static void lagg_lacp_lladdr(struct lagg_softc *sc) { struct lagg_port *lp; /* purge all the lacp ports */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_destroy(lp); /* add them back in */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_create(lp); } static int lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_port *lp; lp = lacp_select_tx_port(sc, m); if (lp == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; struct ether_header *eh; u_short etype; eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); /* Tap off LACP control messages */ if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) { m = lacp_input(lp, m); if (m == NULL) return (NULL); } /* * If the port is not collecting or not in the active aggregator then * free and return. */ if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) { m_freem(m); return (NULL); } m->m_pkthdr.rcvif = ifp; return (m); } Index: head/sys/net/if_lagg.h =================================================================== --- head/sys/net/if_lagg.h (revision 272445) +++ head/sys/net/if_lagg.h (revision 272446) @@ -1,276 +1,284 @@ /* $OpenBSD: if_trunk.h,v 1.11 2007/01/31 06:20:19 reyk Exp $ */ /* * Copyright (c) 2005, 2006 Reyk Floeter * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $FreeBSD$ */ #ifndef _NET_LAGG_H #define _NET_LAGG_H /* * Global definitions */ #define LAGG_MAX_PORTS 32 /* logically */ #define LAGG_MAX_NAMESIZE 32 /* name of a protocol */ #define LAGG_MAX_STACKING 4 /* maximum number of stacked laggs */ /* Lagg flags */ #define LAGG_F_HASHL2 0x00000001 /* hash layer 2 */ #define LAGG_F_HASHL3 0x00000002 /* hash layer 3 */ #define LAGG_F_HASHL4 0x00000004 /* hash layer 4 */ #define LAGG_F_HASHMASK 0x00000007 /* Port flags */ #define LAGG_PORT_SLAVE 0x00000000 /* normal enslaved port */ #define LAGG_PORT_MASTER 0x00000001 /* primary port */ #define LAGG_PORT_STACK 0x00000002 /* stacked lagg port */ #define LAGG_PORT_ACTIVE 0x00000004 /* port is active */ #define LAGG_PORT_COLLECTING 0x00000008 /* port is receiving frames */ #define LAGG_PORT_DISTRIBUTING 0x00000010 /* port is sending frames */ #define LAGG_PORT_DISABLED 0x00000020 /* port is disabled */ #define LAGG_PORT_BITS "\20\01MASTER\02STACK\03ACTIVE\04COLLECTING" \ "\05DISTRIBUTING\06DISABLED" /* Supported lagg PROTOs */ typedef enum { LAGG_PROTO_NONE = 0, /* no lagg protocol defined */ LAGG_PROTO_ROUNDROBIN, /* simple round robin */ LAGG_PROTO_FAILOVER, /* active failover */ LAGG_PROTO_LOADBALANCE, /* loadbalance */ LAGG_PROTO_LACP, /* 802.3ad lacp */ LAGG_PROTO_ETHERCHANNEL,/* Cisco FEC */ LAGG_PROTO_BROADCAST, /* broadcast */ LAGG_PROTO_MAX, } lagg_proto; struct lagg_protos { const char *lpr_name; lagg_proto lpr_proto; }; #define LAGG_PROTO_DEFAULT LAGG_PROTO_FAILOVER #define LAGG_PROTOS { \ { "failover", LAGG_PROTO_FAILOVER }, \ { "fec", LAGG_PROTO_ETHERCHANNEL }, \ { "lacp", LAGG_PROTO_LACP }, \ { "loadbalance", LAGG_PROTO_LOADBALANCE }, \ { "roundrobin", LAGG_PROTO_ROUNDROBIN }, \ { "broadcast", LAGG_PROTO_BROADCAST }, \ { "none", LAGG_PROTO_NONE }, \ { "default", LAGG_PROTO_DEFAULT } \ } /* * lagg ioctls. */ /* * LACP current operational parameters structure. */ struct lacp_opreq { uint16_t actor_prio; uint8_t actor_mac[ETHER_ADDR_LEN]; uint16_t actor_key; uint16_t actor_portprio; uint16_t actor_portno; uint8_t actor_state; uint16_t partner_prio; uint8_t partner_mac[ETHER_ADDR_LEN]; uint16_t partner_key; uint16_t partner_portprio; uint16_t partner_portno; uint8_t partner_state; }; /* lagg port settings */ struct lagg_reqport { char rp_ifname[IFNAMSIZ]; /* name of the lagg */ char rp_portname[IFNAMSIZ]; /* name of the port */ u_int32_t rp_prio; /* port priority */ u_int32_t rp_flags; /* port flags */ union { struct lacp_opreq rpsc_lacp; } rp_psc; #define rp_lacpreq rp_psc.rpsc_lacp }; #define SIOCGLAGGPORT _IOWR('i', 140, struct lagg_reqport) #define SIOCSLAGGPORT _IOW('i', 141, struct lagg_reqport) #define SIOCSLAGGDELPORT _IOW('i', 142, struct lagg_reqport) /* lagg, ports and options */ struct lagg_reqall { char ra_ifname[IFNAMSIZ]; /* name of the lagg */ u_int ra_proto; /* lagg protocol */ size_t ra_size; /* size of buffer */ struct lagg_reqport *ra_port; /* allocated buffer */ int ra_ports; /* total port count */ union { struct lacp_opreq rpsc_lacp; } ra_psc; #define ra_lacpreq ra_psc.rpsc_lacp - int ra_opts; /* Option bitmap */ -#define LAGG_OPT_NONE 0x00 -#define LAGG_OPT_USE_FLOWID 0x01 /* use M_FLOWID */ -/* Pseudo flags which are used in ra_opts but not stored into sc_opts. */ -#define LAGG_OPT_FLOWIDSHIFT 0x02 /* Set flowid */ -#define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */ -#define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */ -#define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */ -#define LAGG_OPT_LACP_RXTEST 0x40 /* LACP debug: rxtest */ - u_int ra_count; /* number of ports */ - u_int ra_active; /* active port count */ - u_int ra_flapping; /* number of flapping */ - int ra_flowid_shift; /* shift the flowid */ }; #define SIOCGLAGG _IOWR('i', 143, struct lagg_reqall) #define SIOCSLAGG _IOW('i', 144, struct lagg_reqall) struct lagg_reqflags { char rf_ifname[IFNAMSIZ]; /* name of the lagg */ uint32_t rf_flags; /* lagg protocol */ }; #define SIOCGLAGGFLAGS _IOWR('i', 145, struct lagg_reqflags) #define SIOCSLAGGHASH _IOW('i', 146, struct lagg_reqflags) + +struct lagg_reqopts { + char ro_ifname[IFNAMSIZ]; /* name of the lagg */ + + int ro_opts; /* Option bitmap */ +#define LAGG_OPT_NONE 0x00 +#define LAGG_OPT_USE_FLOWID 0x01 /* use M_FLOWID */ +/* Pseudo flags which are used in ro_opts but not stored into sc_opts. */ +#define LAGG_OPT_FLOWIDSHIFT 0x02 /* Set flowid */ +#define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */ +#define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */ +#define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */ +#define LAGG_OPT_LACP_RXTEST 0x40 /* LACP debug: rxtest */ + u_int ro_count; /* number of ports */ + u_int ro_active; /* active port count */ + u_int ro_flapping; /* number of flapping */ + int ro_flowid_shift; /* shift the flowid */ +}; + +#define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts) +#define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts) #ifdef _KERNEL /* * Internal kernel part */ #define LAGG_PORTACTIVE(_tp) ( \ ((_tp)->lp_ifp->if_link_state == LINK_STATE_UP) && \ ((_tp)->lp_ifp->if_flags & IFF_UP) \ ) struct lagg_ifreq { union { struct ifreq ifreq; struct { char ifr_name[IFNAMSIZ]; struct sockaddr_storage ifr_ss; } ifreq_storage; } ifreq; }; #define sc_ifflags sc_ifp->if_flags /* flags */ #define sc_ifname sc_ifp->if_xname /* name */ #define sc_capabilities sc_ifp->if_capabilities /* capabilities */ #define IFCAP_LAGG_MASK 0xffff0000 /* private capabilities */ #define IFCAP_LAGG_FULLDUPLEX 0x00010000 /* full duplex with >1 ports */ /* Private data used by the loadbalancing protocol */ struct lagg_lb { u_int32_t lb_key; struct lagg_port *lb_ports[LAGG_MAX_PORTS]; }; struct lagg_mc { struct sockaddr_dl mc_addr; struct ifmultiaddr *mc_ifma; SLIST_ENTRY(lagg_mc) mc_entries; }; /* List of interfaces to have the MAC address modified */ struct lagg_llq { struct ifnet *llq_ifp; uint8_t llq_lladdr[ETHER_ADDR_LEN]; SLIST_ENTRY(lagg_llq) llq_entries; }; struct lagg_counters { uint64_t val[IFCOUNTERS]; }; struct lagg_softc { struct ifnet *sc_ifp; /* virtual interface */ struct rmlock sc_mtx; int sc_proto; /* lagg protocol */ u_int sc_count; /* number of ports */ u_int sc_active; /* active port count */ u_int sc_flapping; /* number of flapping * events */ struct lagg_port *sc_primary; /* primary port */ struct ifmedia sc_media; /* media config */ void *sc_psc; /* protocol data */ uint32_t sc_seq; /* sequence counter */ uint32_t sc_flags; SLIST_HEAD(__tplhd, lagg_port) sc_ports; /* list of interfaces */ SLIST_ENTRY(lagg_softc) sc_entries; struct task sc_lladdr_task; SLIST_HEAD(__llqhd, lagg_llq) sc_llq_head; /* interfaces to program the lladdr on */ eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; struct callout sc_callout; u_int sc_opts; int flowid_shift; /* shift the flowid */ struct lagg_counters detached_counters; /* detached ports sum */ }; struct lagg_port { struct ifnet *lp_ifp; /* physical interface */ struct lagg_softc *lp_softc; /* parent lagg */ uint8_t lp_lladdr[ETHER_ADDR_LEN]; u_char lp_iftype; /* interface type */ uint32_t lp_prio; /* port priority */ uint32_t lp_flags; /* port flags */ int lp_ifflags; /* saved ifp flags */ void *lh_cookie; /* if state hook */ void *lp_psc; /* protocol data */ int lp_detaching; /* ifnet is detaching */ SLIST_HEAD(__mclhd, lagg_mc) lp_mc_head; /* multicast addresses */ /* Redirected callbacks */ int (*lp_ioctl)(struct ifnet *, u_long, caddr_t); int (*lp_output)(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); struct lagg_counters port_counters; /* ifp counters copy */ SLIST_ENTRY(lagg_port) lp_entries; }; #define LAGG_LOCK_INIT(_sc) rm_init(&(_sc)->sc_mtx, "if_lagg rmlock") #define LAGG_LOCK_DESTROY(_sc) rm_destroy(&(_sc)->sc_mtx) #define LAGG_RLOCK(_sc, _p) rm_rlock(&(_sc)->sc_mtx, (_p)) #define LAGG_WLOCK(_sc) rm_wlock(&(_sc)->sc_mtx) #define LAGG_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->sc_mtx, (_p)) #define LAGG_WUNLOCK(_sc) rm_wunlock(&(_sc)->sc_mtx) #define LAGG_RLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_RLOCKED) #define LAGG_WLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_WLOCKED) extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); extern void (*lagg_linkstate_p)(struct ifnet *, int ); int lagg_enqueue(struct ifnet *, struct mbuf *); uint32_t lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t); SYSCTL_DECL(_net_link_lagg); #endif /* _KERNEL */ #endif /* _NET_LAGG_H */