diff --git a/sys/net/bridgestp.c b/sys/net/bridgestp.c index c36dc61d1397..82524440c241 100644 --- a/sys/net/bridgestp.c +++ b/sys/net/bridgestp.c @@ -1,2280 +1,2289 @@ /* $NetBSD: bridgestp.c,v 1.5 2003/11/28 08:56:48 keihan Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c) 2000 Jason L. Wright (jason@thought.net) * Copyright (c) 2006 Andrew Thompson (thompsa@FreeBSD.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * OpenBSD: bridgestp.c,v 1.5 2001/03/22 03:48:29 jason Exp */ /* * Implementation of the spanning tree protocol as defined in * ISO/IEC 802.1D-2004, June 9, 2004. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef BRIDGESTP_DEBUG #define DPRINTF(fmt, arg...) printf("bstp: " fmt, ##arg) #else #define DPRINTF(fmt, arg...) (void)0 #endif #define PV2ADDR(pv, eaddr) do { \ eaddr[0] = pv >> 40; \ eaddr[1] = pv >> 32; \ eaddr[2] = pv >> 24; \ eaddr[3] = pv >> 16; \ eaddr[4] = pv >> 8; \ eaddr[5] = pv >> 0; \ } while (0) #define INFO_BETTER 1 #define INFO_SAME 0 #define INFO_WORSE -1 const uint8_t bstp_etheraddr[] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; LIST_HEAD(, bstp_state) bstp_list; static struct mtx bstp_list_mtx; static void bstp_transmit(struct bstp_state *, struct bstp_port *); static void bstp_transmit_bpdu(struct bstp_state *, struct bstp_port *); static void bstp_transmit_tcn(struct bstp_state *, struct bstp_port *); static void bstp_decode_bpdu(struct bstp_port *, struct bstp_cbpdu *, struct bstp_config_unit *); static void bstp_send_bpdu(struct bstp_state *, struct bstp_port *, struct bstp_cbpdu *); static int bstp_pdu_flags(struct bstp_port *); static void bstp_received_stp(struct bstp_state *, struct bstp_port *, struct mbuf **, struct bstp_tbpdu *); static void bstp_received_rstp(struct bstp_state *, struct bstp_port *, struct mbuf **, struct bstp_tbpdu *); static void bstp_received_tcn(struct bstp_state *, struct bstp_port *, struct bstp_tcn_unit *); static void bstp_received_bpdu(struct bstp_state *, struct bstp_port *, struct bstp_config_unit *); static int bstp_pdu_rcvtype(struct bstp_port *, struct bstp_config_unit *); static int bstp_pdu_bettersame(struct bstp_port *, int); static int bstp_info_cmp(struct bstp_pri_vector *, struct bstp_pri_vector *); static int bstp_info_superior(struct bstp_pri_vector *, struct bstp_pri_vector *); static void bstp_assign_roles(struct bstp_state *); static void bstp_update_roles(struct bstp_state *, struct bstp_port *); static void bstp_update_state(struct bstp_state *, struct bstp_port *); static void bstp_update_tc(struct bstp_port *); static void bstp_update_info(struct bstp_port *); static void bstp_set_other_tcprop(struct bstp_port *); static void bstp_set_all_reroot(struct bstp_state *); static void bstp_set_all_sync(struct bstp_state *); static void bstp_set_port_state(struct bstp_port *, int); static void bstp_set_port_role(struct bstp_port *, int); static void bstp_set_port_proto(struct bstp_port *, int); static void bstp_set_port_tc(struct bstp_port *, int); static void bstp_set_timer_tc(struct bstp_port *); static void bstp_set_timer_msgage(struct bstp_port *); static int bstp_rerooted(struct bstp_state *, struct bstp_port *); static uint32_t bstp_calc_path_cost(struct bstp_port *); static void bstp_notify_state(void *, int); static void bstp_notify_rtage(void *, int); static void bstp_ifupdstatus(void *, int); static void bstp_enable_port(struct bstp_state *, struct bstp_port *); static void bstp_disable_port(struct bstp_state *, struct bstp_port *); static void bstp_tick(void *); static void bstp_timer_start(struct bstp_timer *, uint16_t); static void bstp_timer_stop(struct bstp_timer *); static void bstp_timer_latch(struct bstp_timer *); static int bstp_timer_dectest(struct bstp_timer *); static void bstp_hello_timer_expiry(struct bstp_state *, struct bstp_port *); static void bstp_message_age_expiry(struct bstp_state *, struct bstp_port *); static void bstp_migrate_delay_expiry(struct bstp_state *, struct bstp_port *); static void bstp_edge_delay_expiry(struct bstp_state *, struct bstp_port *); static int bstp_addr_cmp(const uint8_t *, const uint8_t *); static int bstp_same_bridgeid(uint64_t, uint64_t); static void bstp_reinit(struct bstp_state *); static void bstp_transmit(struct bstp_state *bs, struct bstp_port *bp) { + NET_EPOCH_ASSERT(); + if (bs->bs_running == 0) return; /* * a PDU can only be sent if we have tx quota left and the * hello timer is running. */ if (bp->bp_hello_timer.active == 0) { /* Test if it needs to be reset */ bstp_hello_timer_expiry(bs, bp); return; } if (bp->bp_txcount > bs->bs_txholdcount) /* Ran out of karma */ return; if (bp->bp_protover == BSTP_PROTO_RSTP) { bstp_transmit_bpdu(bs, bp); bp->bp_tc_ack = 0; } else { /* STP */ switch (bp->bp_role) { case BSTP_ROLE_DESIGNATED: bstp_transmit_bpdu(bs, bp); bp->bp_tc_ack = 0; break; case BSTP_ROLE_ROOT: bstp_transmit_tcn(bs, bp); break; } } bstp_timer_start(&bp->bp_hello_timer, bp->bp_desg_htime); bp->bp_flags &= ~BSTP_PORT_NEWINFO; } static void bstp_transmit_bpdu(struct bstp_state *bs, struct bstp_port *bp) { struct bstp_cbpdu bpdu; BSTP_LOCK_ASSERT(bs); bpdu.cbu_rootpri = htons(bp->bp_desg_pv.pv_root_id >> 48); PV2ADDR(bp->bp_desg_pv.pv_root_id, bpdu.cbu_rootaddr); bpdu.cbu_rootpathcost = htonl(bp->bp_desg_pv.pv_cost); bpdu.cbu_bridgepri = htons(bp->bp_desg_pv.pv_dbridge_id >> 48); PV2ADDR(bp->bp_desg_pv.pv_dbridge_id, bpdu.cbu_bridgeaddr); bpdu.cbu_portid = htons(bp->bp_port_id); bpdu.cbu_messageage = htons(bp->bp_desg_msg_age); bpdu.cbu_maxage = htons(bp->bp_desg_max_age); bpdu.cbu_hellotime = htons(bp->bp_desg_htime); bpdu.cbu_forwarddelay = htons(bp->bp_desg_fdelay); bpdu.cbu_flags = bstp_pdu_flags(bp); switch (bp->bp_protover) { case BSTP_PROTO_STP: bpdu.cbu_bpdutype = BSTP_MSGTYPE_CFG; break; case BSTP_PROTO_RSTP: bpdu.cbu_bpdutype = BSTP_MSGTYPE_RSTP; break; } bstp_send_bpdu(bs, bp, &bpdu); } static void bstp_transmit_tcn(struct bstp_state *bs, struct bstp_port *bp) { struct bstp_tbpdu bpdu; struct ifnet *ifp = bp->bp_ifp; struct ether_header *eh; struct mbuf *m; KASSERT(bp == bs->bs_root_port, ("%s: bad root port\n", __func__)); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = sizeof(*eh) + sizeof(bpdu); m->m_len = m->m_pkthdr.len; eh = mtod(m, struct ether_header *); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN); eh->ether_type = htons(sizeof(bpdu)); bpdu.tbu_ssap = bpdu.tbu_dsap = LLC_8021D_LSAP; bpdu.tbu_ctl = LLC_UI; bpdu.tbu_protoid = 0; bpdu.tbu_protover = 0; bpdu.tbu_bpdutype = BSTP_MSGTYPE_TCN; memcpy(mtod(m, caddr_t) + sizeof(*eh), &bpdu, sizeof(bpdu)); bp->bp_txcount++; ifp->if_transmit(ifp, m); } static void bstp_decode_bpdu(struct bstp_port *bp, struct bstp_cbpdu *cpdu, struct bstp_config_unit *cu) { int flags; cu->cu_pv.pv_root_id = (((uint64_t)ntohs(cpdu->cbu_rootpri)) << 48) | (((uint64_t)cpdu->cbu_rootaddr[0]) << 40) | (((uint64_t)cpdu->cbu_rootaddr[1]) << 32) | (((uint64_t)cpdu->cbu_rootaddr[2]) << 24) | (((uint64_t)cpdu->cbu_rootaddr[3]) << 16) | (((uint64_t)cpdu->cbu_rootaddr[4]) << 8) | (((uint64_t)cpdu->cbu_rootaddr[5]) << 0); cu->cu_pv.pv_dbridge_id = (((uint64_t)ntohs(cpdu->cbu_bridgepri)) << 48) | (((uint64_t)cpdu->cbu_bridgeaddr[0]) << 40) | (((uint64_t)cpdu->cbu_bridgeaddr[1]) << 32) | (((uint64_t)cpdu->cbu_bridgeaddr[2]) << 24) | (((uint64_t)cpdu->cbu_bridgeaddr[3]) << 16) | (((uint64_t)cpdu->cbu_bridgeaddr[4]) << 8) | (((uint64_t)cpdu->cbu_bridgeaddr[5]) << 0); cu->cu_pv.pv_cost = ntohl(cpdu->cbu_rootpathcost); cu->cu_message_age = ntohs(cpdu->cbu_messageage); cu->cu_max_age = ntohs(cpdu->cbu_maxage); cu->cu_hello_time = ntohs(cpdu->cbu_hellotime); cu->cu_forward_delay = ntohs(cpdu->cbu_forwarddelay); cu->cu_pv.pv_dport_id = ntohs(cpdu->cbu_portid); cu->cu_pv.pv_port_id = bp->bp_port_id; cu->cu_message_type = cpdu->cbu_bpdutype; /* Strip off unused flags in STP mode */ flags = cpdu->cbu_flags; switch (cpdu->cbu_protover) { case BSTP_PROTO_STP: flags &= BSTP_PDU_STPMASK; /* A STP BPDU explicitly conveys a Designated Port */ cu->cu_role = BSTP_ROLE_DESIGNATED; break; case BSTP_PROTO_RSTP: flags &= BSTP_PDU_RSTPMASK; break; } cu->cu_topology_change_ack = (flags & BSTP_PDU_F_TCA) ? 1 : 0; cu->cu_proposal = (flags & BSTP_PDU_F_P) ? 1 : 0; cu->cu_agree = (flags & BSTP_PDU_F_A) ? 1 : 0; cu->cu_learning = (flags & BSTP_PDU_F_L) ? 1 : 0; cu->cu_forwarding = (flags & BSTP_PDU_F_F) ? 1 : 0; cu->cu_topology_change = (flags & BSTP_PDU_F_TC) ? 1 : 0; switch ((flags & BSTP_PDU_PRMASK) >> BSTP_PDU_PRSHIFT) { case BSTP_PDU_F_ROOT: cu->cu_role = BSTP_ROLE_ROOT; break; case BSTP_PDU_F_ALT: cu->cu_role = BSTP_ROLE_ALTERNATE; break; case BSTP_PDU_F_DESG: cu->cu_role = BSTP_ROLE_DESIGNATED; break; } } static void bstp_send_bpdu(struct bstp_state *bs, struct bstp_port *bp, struct bstp_cbpdu *bpdu) { struct ifnet *ifp; struct mbuf *m; struct ether_header *eh; BSTP_LOCK_ASSERT(bs); + NET_EPOCH_ASSERT(); ifp = bp->bp_ifp; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; eh = mtod(m, struct ether_header *); bpdu->cbu_ssap = bpdu->cbu_dsap = LLC_8021D_LSAP; bpdu->cbu_ctl = LLC_UI; bpdu->cbu_protoid = htons(BSTP_PROTO_ID); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN); switch (bpdu->cbu_bpdutype) { case BSTP_MSGTYPE_CFG: bpdu->cbu_protover = BSTP_PROTO_STP; m->m_pkthdr.len = sizeof(*eh) + BSTP_BPDU_STP_LEN; eh->ether_type = htons(BSTP_BPDU_STP_LEN); memcpy(mtod(m, caddr_t) + sizeof(*eh), bpdu, BSTP_BPDU_STP_LEN); break; case BSTP_MSGTYPE_RSTP: bpdu->cbu_protover = BSTP_PROTO_RSTP; bpdu->cbu_versionlen = htons(0); m->m_pkthdr.len = sizeof(*eh) + BSTP_BPDU_RSTP_LEN; eh->ether_type = htons(BSTP_BPDU_RSTP_LEN); memcpy(mtod(m, caddr_t) + sizeof(*eh), bpdu, BSTP_BPDU_RSTP_LEN); break; default: panic("not implemented"); } m->m_pkthdr.rcvif = ifp; m->m_len = m->m_pkthdr.len; bp->bp_txcount++; ifp->if_transmit(ifp, m); } static int bstp_pdu_flags(struct bstp_port *bp) { int flags = 0; if (bp->bp_proposing && bp->bp_state != BSTP_IFSTATE_FORWARDING) flags |= BSTP_PDU_F_P; if (bp->bp_agree) flags |= BSTP_PDU_F_A; if (bp->bp_tc_timer.active) flags |= BSTP_PDU_F_TC; if (bp->bp_tc_ack) flags |= BSTP_PDU_F_TCA; switch (bp->bp_state) { case BSTP_IFSTATE_LEARNING: flags |= BSTP_PDU_F_L; break; case BSTP_IFSTATE_FORWARDING: flags |= (BSTP_PDU_F_L | BSTP_PDU_F_F); break; } switch (bp->bp_role) { case BSTP_ROLE_ROOT: flags |= (BSTP_PDU_F_ROOT << BSTP_PDU_PRSHIFT); break; case BSTP_ROLE_ALTERNATE: case BSTP_ROLE_BACKUP: /* fall through */ flags |= (BSTP_PDU_F_ALT << BSTP_PDU_PRSHIFT); break; case BSTP_ROLE_DESIGNATED: flags |= (BSTP_PDU_F_DESG << BSTP_PDU_PRSHIFT); break; } /* Strip off unused flags in either mode */ switch (bp->bp_protover) { case BSTP_PROTO_STP: flags &= BSTP_PDU_STPMASK; break; case BSTP_PROTO_RSTP: flags &= BSTP_PDU_RSTPMASK; break; } return (flags); } void bstp_input(struct bstp_port *bp, struct ifnet *ifp, struct mbuf *m) { struct bstp_state *bs = bp->bp_bs; struct ether_header *eh; struct bstp_tbpdu tpdu; uint16_t len; if (bp->bp_active == 0) { m_freem(m); return; } BSTP_LOCK(bs); eh = mtod(m, struct ether_header *); len = ntohs(eh->ether_type); if (len < sizeof(tpdu)) goto out; m_adj(m, ETHER_HDR_LEN); if (m->m_pkthdr.len > len) m_adj(m, len - m->m_pkthdr.len); if (m->m_len < sizeof(tpdu) && (m = m_pullup(m, sizeof(tpdu))) == NULL) goto out; memcpy(&tpdu, mtod(m, caddr_t), sizeof(tpdu)); /* basic packet checks */ if (tpdu.tbu_dsap != LLC_8021D_LSAP || tpdu.tbu_ssap != LLC_8021D_LSAP || tpdu.tbu_ctl != LLC_UI) goto out; if (tpdu.tbu_protoid != BSTP_PROTO_ID) goto out; /* * We can treat later versions of the PDU as the same as the maximum * version we implement. All additional parameters/flags are ignored. */ if (tpdu.tbu_protover > BSTP_PROTO_MAX) tpdu.tbu_protover = BSTP_PROTO_MAX; if (tpdu.tbu_protover != bp->bp_protover) { /* * Wait for the migration delay timer to expire before changing * protocol version to avoid flip-flops. */ if (bp->bp_flags & BSTP_PORT_CANMIGRATE) bstp_set_port_proto(bp, tpdu.tbu_protover); else goto out; } /* Clear operedge upon receiving a PDU on the port */ bp->bp_operedge = 0; bstp_timer_start(&bp->bp_edge_delay_timer, BSTP_DEFAULT_MIGRATE_DELAY); switch (tpdu.tbu_protover) { case BSTP_PROTO_STP: bstp_received_stp(bs, bp, &m, &tpdu); break; case BSTP_PROTO_RSTP: bstp_received_rstp(bs, bp, &m, &tpdu); break; } out: BSTP_UNLOCK(bs); if (m) m_freem(m); } static void bstp_received_stp(struct bstp_state *bs, struct bstp_port *bp, struct mbuf **mp, struct bstp_tbpdu *tpdu) { struct bstp_cbpdu cpdu; struct bstp_config_unit *cu = &bp->bp_msg_cu; struct bstp_tcn_unit tu; switch (tpdu->tbu_bpdutype) { case BSTP_MSGTYPE_TCN: tu.tu_message_type = tpdu->tbu_bpdutype; bstp_received_tcn(bs, bp, &tu); break; case BSTP_MSGTYPE_CFG: if ((*mp)->m_len < BSTP_BPDU_STP_LEN && (*mp = m_pullup(*mp, BSTP_BPDU_STP_LEN)) == NULL) return; memcpy(&cpdu, mtod(*mp, caddr_t), BSTP_BPDU_STP_LEN); bstp_decode_bpdu(bp, &cpdu, cu); bstp_received_bpdu(bs, bp, cu); break; } } static void bstp_received_rstp(struct bstp_state *bs, struct bstp_port *bp, struct mbuf **mp, struct bstp_tbpdu *tpdu) { struct bstp_cbpdu cpdu; struct bstp_config_unit *cu = &bp->bp_msg_cu; if (tpdu->tbu_bpdutype != BSTP_MSGTYPE_RSTP) return; if ((*mp)->m_len < BSTP_BPDU_RSTP_LEN && (*mp = m_pullup(*mp, BSTP_BPDU_RSTP_LEN)) == NULL) return; memcpy(&cpdu, mtod(*mp, caddr_t), BSTP_BPDU_RSTP_LEN); bstp_decode_bpdu(bp, &cpdu, cu); bstp_received_bpdu(bs, bp, cu); } static void bstp_received_tcn(struct bstp_state *bs, struct bstp_port *bp, struct bstp_tcn_unit *tcn) { bp->bp_rcvdtcn = 1; bstp_update_tc(bp); } static void bstp_received_bpdu(struct bstp_state *bs, struct bstp_port *bp, struct bstp_config_unit *cu) { int type; BSTP_LOCK_ASSERT(bs); /* We need to have transitioned to INFO_MINE before proceeding */ switch (bp->bp_infois) { case BSTP_INFO_DISABLED: case BSTP_INFO_AGED: return; } type = bstp_pdu_rcvtype(bp, cu); switch (type) { case BSTP_PDU_SUPERIOR: bs->bs_allsynced = 0; bp->bp_agreed = 0; bp->bp_proposing = 0; if (cu->cu_proposal && cu->cu_forwarding == 0) bp->bp_proposed = 1; if (cu->cu_topology_change) bp->bp_rcvdtc = 1; if (cu->cu_topology_change_ack) bp->bp_rcvdtca = 1; if (bp->bp_agree && !bstp_pdu_bettersame(bp, BSTP_INFO_RECEIVED)) bp->bp_agree = 0; /* copy the received priority and timers to the port */ bp->bp_port_pv = cu->cu_pv; bp->bp_port_msg_age = cu->cu_message_age; bp->bp_port_max_age = cu->cu_max_age; bp->bp_port_fdelay = cu->cu_forward_delay; bp->bp_port_htime = (cu->cu_hello_time > BSTP_MIN_HELLO_TIME ? cu->cu_hello_time : BSTP_MIN_HELLO_TIME); /* set expiry for the new info */ bstp_set_timer_msgage(bp); bp->bp_infois = BSTP_INFO_RECEIVED; bstp_assign_roles(bs); break; case BSTP_PDU_REPEATED: if (cu->cu_proposal && cu->cu_forwarding == 0) bp->bp_proposed = 1; if (cu->cu_topology_change) bp->bp_rcvdtc = 1; if (cu->cu_topology_change_ack) bp->bp_rcvdtca = 1; /* rearm the age timer */ bstp_set_timer_msgage(bp); break; case BSTP_PDU_INFERIOR: if (cu->cu_learning) { bp->bp_agreed = 1; bp->bp_proposing = 0; } break; case BSTP_PDU_INFERIORALT: /* * only point to point links are allowed fast * transitions to forwarding. */ if (cu->cu_agree && bp->bp_ptp_link) { bp->bp_agreed = 1; bp->bp_proposing = 0; } else bp->bp_agreed = 0; if (cu->cu_topology_change) bp->bp_rcvdtc = 1; if (cu->cu_topology_change_ack) bp->bp_rcvdtca = 1; break; case BSTP_PDU_OTHER: return; /* do nothing */ } /* update the state machines with the new data */ bstp_update_state(bs, bp); } static int bstp_pdu_rcvtype(struct bstp_port *bp, struct bstp_config_unit *cu) { int type; /* default return type */ type = BSTP_PDU_OTHER; switch (cu->cu_role) { case BSTP_ROLE_DESIGNATED: if (bstp_info_superior(&bp->bp_port_pv, &cu->cu_pv)) /* bpdu priority is superior */ type = BSTP_PDU_SUPERIOR; else if (bstp_info_cmp(&bp->bp_port_pv, &cu->cu_pv) == INFO_SAME) { if (bp->bp_port_msg_age != cu->cu_message_age || bp->bp_port_max_age != cu->cu_max_age || bp->bp_port_fdelay != cu->cu_forward_delay || bp->bp_port_htime != cu->cu_hello_time) /* bpdu priority is equal and timers differ */ type = BSTP_PDU_SUPERIOR; else /* bpdu is equal */ type = BSTP_PDU_REPEATED; } else /* bpdu priority is worse */ type = BSTP_PDU_INFERIOR; break; case BSTP_ROLE_ROOT: case BSTP_ROLE_ALTERNATE: case BSTP_ROLE_BACKUP: if (bstp_info_cmp(&bp->bp_port_pv, &cu->cu_pv) <= INFO_SAME) /* * not a designated port and priority is the same or * worse */ type = BSTP_PDU_INFERIORALT; break; } return (type); } static int bstp_pdu_bettersame(struct bstp_port *bp, int newinfo) { if (newinfo == BSTP_INFO_RECEIVED && bp->bp_infois == BSTP_INFO_RECEIVED && bstp_info_cmp(&bp->bp_port_pv, &bp->bp_msg_cu.cu_pv) >= INFO_SAME) return (1); if (newinfo == BSTP_INFO_MINE && bp->bp_infois == BSTP_INFO_MINE && bstp_info_cmp(&bp->bp_port_pv, &bp->bp_desg_pv) >= INFO_SAME) return (1); return (0); } static int bstp_info_cmp(struct bstp_pri_vector *pv, struct bstp_pri_vector *cpv) { if (cpv->pv_root_id < pv->pv_root_id) return (INFO_BETTER); if (cpv->pv_root_id > pv->pv_root_id) return (INFO_WORSE); if (cpv->pv_cost < pv->pv_cost) return (INFO_BETTER); if (cpv->pv_cost > pv->pv_cost) return (INFO_WORSE); if (cpv->pv_dbridge_id < pv->pv_dbridge_id) return (INFO_BETTER); if (cpv->pv_dbridge_id > pv->pv_dbridge_id) return (INFO_WORSE); if (cpv->pv_dport_id < pv->pv_dport_id) return (INFO_BETTER); if (cpv->pv_dport_id > pv->pv_dport_id) return (INFO_WORSE); return (INFO_SAME); } /* * This message priority vector is superior to the port priority vector and * will replace it if, and only if, the message priority vector is better than * the port priority vector, or the message has been transmitted from the same * designated bridge and designated port as the port priority vector. */ static int bstp_info_superior(struct bstp_pri_vector *pv, struct bstp_pri_vector *cpv) { if (bstp_info_cmp(pv, cpv) == INFO_BETTER || (bstp_same_bridgeid(pv->pv_dbridge_id, cpv->pv_dbridge_id) && (cpv->pv_dport_id & 0xfff) == (pv->pv_dport_id & 0xfff))) return (1); return (0); } static void bstp_assign_roles(struct bstp_state *bs) { struct bstp_port *bp, *rbp = NULL; struct bstp_pri_vector pv; /* default to our priority vector */ bs->bs_root_pv = bs->bs_bridge_pv; bs->bs_root_msg_age = 0; bs->bs_root_max_age = bs->bs_bridge_max_age; bs->bs_root_fdelay = bs->bs_bridge_fdelay; bs->bs_root_htime = bs->bs_bridge_htime; bs->bs_root_port = NULL; /* check if any received info supersedes us */ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) { if (bp->bp_infois != BSTP_INFO_RECEIVED) continue; pv = bp->bp_port_pv; pv.pv_cost += bp->bp_path_cost; /* * The root priority vector is the best of the set comprising * the bridge priority vector plus all root path priority * vectors whose bridge address is not equal to us. */ if (bstp_same_bridgeid(pv.pv_dbridge_id, bs->bs_bridge_pv.pv_dbridge_id) == 0 && bstp_info_cmp(&bs->bs_root_pv, &pv) == INFO_BETTER) { /* the port vector replaces the root */ bs->bs_root_pv = pv; bs->bs_root_msg_age = bp->bp_port_msg_age + BSTP_MESSAGE_AGE_INCR; bs->bs_root_max_age = bp->bp_port_max_age; bs->bs_root_fdelay = bp->bp_port_fdelay; bs->bs_root_htime = bp->bp_port_htime; rbp = bp; } } LIST_FOREACH(bp, &bs->bs_bplist, bp_next) { /* calculate the port designated vector */ bp->bp_desg_pv.pv_root_id = bs->bs_root_pv.pv_root_id; bp->bp_desg_pv.pv_cost = bs->bs_root_pv.pv_cost; bp->bp_desg_pv.pv_dbridge_id = bs->bs_bridge_pv.pv_dbridge_id; bp->bp_desg_pv.pv_dport_id = bp->bp_port_id; bp->bp_desg_pv.pv_port_id = bp->bp_port_id; /* calculate designated times */ bp->bp_desg_msg_age = bs->bs_root_msg_age; bp->bp_desg_max_age = bs->bs_root_max_age; bp->bp_desg_fdelay = bs->bs_root_fdelay; bp->bp_desg_htime = bs->bs_bridge_htime; switch (bp->bp_infois) { case BSTP_INFO_DISABLED: bstp_set_port_role(bp, BSTP_ROLE_DISABLED); break; case BSTP_INFO_AGED: bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED); bstp_update_info(bp); break; case BSTP_INFO_MINE: bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED); /* update the port info if stale */ if (bstp_info_cmp(&bp->bp_port_pv, &bp->bp_desg_pv) != INFO_SAME || (rbp != NULL && (bp->bp_port_msg_age != rbp->bp_port_msg_age || bp->bp_port_max_age != rbp->bp_port_max_age || bp->bp_port_fdelay != rbp->bp_port_fdelay || bp->bp_port_htime != rbp->bp_port_htime))) bstp_update_info(bp); break; case BSTP_INFO_RECEIVED: if (bp == rbp) { /* * root priority is derived from this * port, make it the root port. */ bstp_set_port_role(bp, BSTP_ROLE_ROOT); bs->bs_root_port = bp; } else if (bstp_info_cmp(&bp->bp_port_pv, &bp->bp_desg_pv) == INFO_BETTER) { /* * the port priority is lower than the root * port. */ bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED); bstp_update_info(bp); } else { if (bstp_same_bridgeid( bp->bp_port_pv.pv_dbridge_id, bs->bs_bridge_pv.pv_dbridge_id)) { /* * the designated bridge refers to * another port on this bridge. */ bstp_set_port_role(bp, BSTP_ROLE_BACKUP); } else { /* * the port is an inferior path to the * root bridge. */ bstp_set_port_role(bp, BSTP_ROLE_ALTERNATE); } } break; } } } static void bstp_update_state(struct bstp_state *bs, struct bstp_port *bp) { struct bstp_port *bp2; int synced; BSTP_LOCK_ASSERT(bs); /* check if all the ports have syncronised again */ if (!bs->bs_allsynced) { synced = 1; LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) { if (!(bp2->bp_synced || bp2->bp_role == BSTP_ROLE_ROOT)) { synced = 0; break; } } bs->bs_allsynced = synced; } bstp_update_roles(bs, bp); bstp_update_tc(bp); } static void bstp_update_roles(struct bstp_state *bs, struct bstp_port *bp) { + NET_EPOCH_ASSERT(); + switch (bp->bp_role) { case BSTP_ROLE_DISABLED: /* Clear any flags if set */ if (bp->bp_sync || !bp->bp_synced || bp->bp_reroot) { bp->bp_sync = 0; bp->bp_synced = 1; bp->bp_reroot = 0; } break; case BSTP_ROLE_ALTERNATE: case BSTP_ROLE_BACKUP: if ((bs->bs_allsynced && !bp->bp_agree) || (bp->bp_proposed && bp->bp_agree)) { bp->bp_proposed = 0; bp->bp_agree = 1; bp->bp_flags |= BSTP_PORT_NEWINFO; DPRINTF("%s -> ALTERNATE_AGREED\n", bp->bp_ifp->if_xname); } if (bp->bp_proposed && !bp->bp_agree) { bstp_set_all_sync(bs); bp->bp_proposed = 0; DPRINTF("%s -> ALTERNATE_PROPOSED\n", bp->bp_ifp->if_xname); } /* Clear any flags if set */ if (bp->bp_sync || !bp->bp_synced || bp->bp_reroot) { bp->bp_sync = 0; bp->bp_synced = 1; bp->bp_reroot = 0; DPRINTF("%s -> ALTERNATE_PORT\n", bp->bp_ifp->if_xname); } break; case BSTP_ROLE_ROOT: if (bp->bp_state != BSTP_IFSTATE_FORWARDING && !bp->bp_reroot) { bstp_set_all_reroot(bs); DPRINTF("%s -> ROOT_REROOT\n", bp->bp_ifp->if_xname); } if ((bs->bs_allsynced && !bp->bp_agree) || (bp->bp_proposed && bp->bp_agree)) { bp->bp_proposed = 0; bp->bp_sync = 0; bp->bp_agree = 1; bp->bp_flags |= BSTP_PORT_NEWINFO; DPRINTF("%s -> ROOT_AGREED\n", bp->bp_ifp->if_xname); } if (bp->bp_proposed && !bp->bp_agree) { bstp_set_all_sync(bs); bp->bp_proposed = 0; DPRINTF("%s -> ROOT_PROPOSED\n", bp->bp_ifp->if_xname); } if (bp->bp_state != BSTP_IFSTATE_FORWARDING && (bp->bp_forward_delay_timer.active == 0 || (bstp_rerooted(bs, bp) && bp->bp_recent_backup_timer.active == 0 && bp->bp_protover == BSTP_PROTO_RSTP))) { switch (bp->bp_state) { case BSTP_IFSTATE_DISCARDING: bstp_set_port_state(bp, BSTP_IFSTATE_LEARNING); break; case BSTP_IFSTATE_LEARNING: bstp_set_port_state(bp, BSTP_IFSTATE_FORWARDING); break; } } if (bp->bp_state == BSTP_IFSTATE_FORWARDING && bp->bp_reroot) { bp->bp_reroot = 0; DPRINTF("%s -> ROOT_REROOTED\n", bp->bp_ifp->if_xname); } break; case BSTP_ROLE_DESIGNATED: if (bp->bp_recent_root_timer.active == 0 && bp->bp_reroot) { bp->bp_reroot = 0; DPRINTF("%s -> DESIGNATED_RETIRED\n", bp->bp_ifp->if_xname); } if ((bp->bp_state == BSTP_IFSTATE_DISCARDING && !bp->bp_synced) || (bp->bp_agreed && !bp->bp_synced) || (bp->bp_operedge && !bp->bp_synced) || (bp->bp_sync && bp->bp_synced)) { bstp_timer_stop(&bp->bp_recent_root_timer); bp->bp_synced = 1; bp->bp_sync = 0; DPRINTF("%s -> DESIGNATED_SYNCED\n", bp->bp_ifp->if_xname); } if (bp->bp_state != BSTP_IFSTATE_FORWARDING && !bp->bp_agreed && !bp->bp_proposing && !bp->bp_operedge) { bp->bp_proposing = 1; bp->bp_flags |= BSTP_PORT_NEWINFO; bstp_timer_start(&bp->bp_edge_delay_timer, (bp->bp_ptp_link ? BSTP_DEFAULT_MIGRATE_DELAY : bp->bp_desg_max_age)); DPRINTF("%s -> DESIGNATED_PROPOSE\n", bp->bp_ifp->if_xname); } if (bp->bp_state != BSTP_IFSTATE_FORWARDING && (bp->bp_forward_delay_timer.active == 0 || bp->bp_agreed || bp->bp_operedge) && (bp->bp_recent_root_timer.active == 0 || !bp->bp_reroot) && !bp->bp_sync) { if (bp->bp_agreed) DPRINTF("%s -> AGREED\n", bp->bp_ifp->if_xname); /* * If agreed|operedge then go straight to forwarding, * otherwise follow discard -> learn -> forward. */ if (bp->bp_agreed || bp->bp_operedge || bp->bp_state == BSTP_IFSTATE_LEARNING) { bstp_set_port_state(bp, BSTP_IFSTATE_FORWARDING); bp->bp_agreed = bp->bp_protover; } else if (bp->bp_state == BSTP_IFSTATE_DISCARDING) bstp_set_port_state(bp, BSTP_IFSTATE_LEARNING); } if (((bp->bp_sync && !bp->bp_synced) || (bp->bp_reroot && bp->bp_recent_root_timer.active) || (bp->bp_flags & BSTP_PORT_DISPUTED)) && !bp->bp_operedge && bp->bp_state != BSTP_IFSTATE_DISCARDING) { bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING); bp->bp_flags &= ~BSTP_PORT_DISPUTED; bstp_timer_start(&bp->bp_forward_delay_timer, bp->bp_protover == BSTP_PROTO_RSTP ? bp->bp_desg_htime : bp->bp_desg_fdelay); DPRINTF("%s -> DESIGNATED_DISCARD\n", bp->bp_ifp->if_xname); } break; } if (bp->bp_flags & BSTP_PORT_NEWINFO) bstp_transmit(bs, bp); } static void bstp_update_tc(struct bstp_port *bp) { switch (bp->bp_tcstate) { case BSTP_TCSTATE_ACTIVE: if ((bp->bp_role != BSTP_ROLE_DESIGNATED && bp->bp_role != BSTP_ROLE_ROOT) || bp->bp_operedge) bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING); if (bp->bp_rcvdtcn) bstp_set_port_tc(bp, BSTP_TCSTATE_TCN); if (bp->bp_rcvdtc) bstp_set_port_tc(bp, BSTP_TCSTATE_TC); if (bp->bp_tc_prop && !bp->bp_operedge) bstp_set_port_tc(bp, BSTP_TCSTATE_PROPAG); if (bp->bp_rcvdtca) bstp_set_port_tc(bp, BSTP_TCSTATE_ACK); break; case BSTP_TCSTATE_INACTIVE: if ((bp->bp_state == BSTP_IFSTATE_LEARNING || bp->bp_state == BSTP_IFSTATE_FORWARDING) && bp->bp_fdbflush == 0) bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING); break; case BSTP_TCSTATE_LEARNING: if (bp->bp_rcvdtc || bp->bp_rcvdtcn || bp->bp_rcvdtca || bp->bp_tc_prop) bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING); else if (bp->bp_role != BSTP_ROLE_DESIGNATED && bp->bp_role != BSTP_ROLE_ROOT && bp->bp_state == BSTP_IFSTATE_DISCARDING) bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE); if ((bp->bp_role == BSTP_ROLE_DESIGNATED || bp->bp_role == BSTP_ROLE_ROOT) && bp->bp_state == BSTP_IFSTATE_FORWARDING && !bp->bp_operedge) bstp_set_port_tc(bp, BSTP_TCSTATE_DETECTED); break; /* these are transient states and go straight back to ACTIVE */ case BSTP_TCSTATE_DETECTED: case BSTP_TCSTATE_TCN: case BSTP_TCSTATE_TC: case BSTP_TCSTATE_PROPAG: case BSTP_TCSTATE_ACK: DPRINTF("Invalid TC state for %s\n", bp->bp_ifp->if_xname); break; } } static void bstp_update_info(struct bstp_port *bp) { struct bstp_state *bs = bp->bp_bs; bp->bp_proposing = 0; bp->bp_proposed = 0; if (bp->bp_agreed && !bstp_pdu_bettersame(bp, BSTP_INFO_MINE)) bp->bp_agreed = 0; if (bp->bp_synced && !bp->bp_agreed) { bp->bp_synced = 0; bs->bs_allsynced = 0; } /* copy the designated pv to the port */ bp->bp_port_pv = bp->bp_desg_pv; bp->bp_port_msg_age = bp->bp_desg_msg_age; bp->bp_port_max_age = bp->bp_desg_max_age; bp->bp_port_fdelay = bp->bp_desg_fdelay; bp->bp_port_htime = bp->bp_desg_htime; bp->bp_infois = BSTP_INFO_MINE; /* Set transmit flag but do not immediately send */ bp->bp_flags |= BSTP_PORT_NEWINFO; } /* set tcprop on every port other than the caller */ static void bstp_set_other_tcprop(struct bstp_port *bp) { struct bstp_state *bs = bp->bp_bs; struct bstp_port *bp2; BSTP_LOCK_ASSERT(bs); LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) { if (bp2 == bp) continue; bp2->bp_tc_prop = 1; } } static void bstp_set_all_reroot(struct bstp_state *bs) { struct bstp_port *bp; BSTP_LOCK_ASSERT(bs); LIST_FOREACH(bp, &bs->bs_bplist, bp_next) bp->bp_reroot = 1; } static void bstp_set_all_sync(struct bstp_state *bs) { struct bstp_port *bp; BSTP_LOCK_ASSERT(bs); LIST_FOREACH(bp, &bs->bs_bplist, bp_next) { bp->bp_sync = 1; bp->bp_synced = 0; /* Not explicit in spec */ } bs->bs_allsynced = 0; } static void bstp_set_port_state(struct bstp_port *bp, int state) { if (bp->bp_state == state) return; bp->bp_state = state; switch (bp->bp_state) { case BSTP_IFSTATE_DISCARDING: DPRINTF("state changed to DISCARDING on %s\n", bp->bp_ifp->if_xname); break; case BSTP_IFSTATE_LEARNING: DPRINTF("state changed to LEARNING on %s\n", bp->bp_ifp->if_xname); bstp_timer_start(&bp->bp_forward_delay_timer, bp->bp_protover == BSTP_PROTO_RSTP ? bp->bp_desg_htime : bp->bp_desg_fdelay); break; case BSTP_IFSTATE_FORWARDING: DPRINTF("state changed to FORWARDING on %s\n", bp->bp_ifp->if_xname); bstp_timer_stop(&bp->bp_forward_delay_timer); /* Record that we enabled forwarding */ bp->bp_forward_transitions++; break; } /* notify the parent bridge */ taskqueue_enqueue(taskqueue_swi, &bp->bp_statetask); } static void bstp_set_port_role(struct bstp_port *bp, int role) { struct bstp_state *bs = bp->bp_bs; if (bp->bp_role == role) return; /* perform pre-change tasks */ switch (bp->bp_role) { case BSTP_ROLE_DISABLED: bstp_timer_start(&bp->bp_forward_delay_timer, bp->bp_desg_max_age); break; case BSTP_ROLE_BACKUP: bstp_timer_start(&bp->bp_recent_backup_timer, bp->bp_desg_htime * 2); /* fall through */ case BSTP_ROLE_ALTERNATE: bstp_timer_start(&bp->bp_forward_delay_timer, bp->bp_desg_fdelay); bp->bp_sync = 0; bp->bp_synced = 1; bp->bp_reroot = 0; break; case BSTP_ROLE_ROOT: bstp_timer_start(&bp->bp_recent_root_timer, BSTP_DEFAULT_FORWARD_DELAY); break; } bp->bp_role = role; /* clear values not carried between roles */ bp->bp_proposing = 0; bs->bs_allsynced = 0; /* initialise the new role */ switch (bp->bp_role) { case BSTP_ROLE_DISABLED: case BSTP_ROLE_ALTERNATE: case BSTP_ROLE_BACKUP: DPRINTF("%s role -> ALT/BACK/DISABLED\n", bp->bp_ifp->if_xname); bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING); bstp_timer_stop(&bp->bp_recent_root_timer); bstp_timer_latch(&bp->bp_forward_delay_timer); bp->bp_sync = 0; bp->bp_synced = 1; bp->bp_reroot = 0; break; case BSTP_ROLE_ROOT: DPRINTF("%s role -> ROOT\n", bp->bp_ifp->if_xname); bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING); bstp_timer_latch(&bp->bp_recent_root_timer); bp->bp_proposing = 0; break; case BSTP_ROLE_DESIGNATED: DPRINTF("%s role -> DESIGNATED\n", bp->bp_ifp->if_xname); bstp_timer_start(&bp->bp_hello_timer, bp->bp_desg_htime); bp->bp_agree = 0; break; } /* let the TC state know that the role changed */ bstp_update_tc(bp); } static void bstp_set_port_proto(struct bstp_port *bp, int proto) { struct bstp_state *bs = bp->bp_bs; /* supported protocol versions */ switch (proto) { case BSTP_PROTO_STP: /* we can downgrade protocols only */ bstp_timer_stop(&bp->bp_migrate_delay_timer); /* clear unsupported features */ bp->bp_operedge = 0; /* STP compat mode only uses 16 bits of the 32 */ if (bp->bp_path_cost > 65535) bp->bp_path_cost = 65535; break; case BSTP_PROTO_RSTP: bstp_timer_start(&bp->bp_migrate_delay_timer, bs->bs_migration_delay); break; default: DPRINTF("Unsupported STP version %d\n", proto); return; } bp->bp_protover = proto; bp->bp_flags &= ~BSTP_PORT_CANMIGRATE; } static void bstp_set_port_tc(struct bstp_port *bp, int state) { struct bstp_state *bs = bp->bp_bs; bp->bp_tcstate = state; /* initialise the new state */ switch (bp->bp_tcstate) { case BSTP_TCSTATE_ACTIVE: DPRINTF("%s -> TC_ACTIVE\n", bp->bp_ifp->if_xname); /* nothing to do */ break; case BSTP_TCSTATE_INACTIVE: bstp_timer_stop(&bp->bp_tc_timer); /* flush routes on the parent bridge */ bp->bp_fdbflush = 1; taskqueue_enqueue(taskqueue_swi, &bp->bp_rtagetask); bp->bp_tc_ack = 0; DPRINTF("%s -> TC_INACTIVE\n", bp->bp_ifp->if_xname); break; case BSTP_TCSTATE_LEARNING: bp->bp_rcvdtc = 0; bp->bp_rcvdtcn = 0; bp->bp_rcvdtca = 0; bp->bp_tc_prop = 0; DPRINTF("%s -> TC_LEARNING\n", bp->bp_ifp->if_xname); break; case BSTP_TCSTATE_DETECTED: bstp_set_timer_tc(bp); bstp_set_other_tcprop(bp); /* send out notification */ bp->bp_flags |= BSTP_PORT_NEWINFO; bstp_transmit(bs, bp); getmicrotime(&bs->bs_last_tc_time); DPRINTF("%s -> TC_DETECTED\n", bp->bp_ifp->if_xname); bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */ break; case BSTP_TCSTATE_TCN: bstp_set_timer_tc(bp); DPRINTF("%s -> TC_TCN\n", bp->bp_ifp->if_xname); /* fall through */ case BSTP_TCSTATE_TC: bp->bp_rcvdtc = 0; bp->bp_rcvdtcn = 0; if (bp->bp_role == BSTP_ROLE_DESIGNATED) bp->bp_tc_ack = 1; bstp_set_other_tcprop(bp); DPRINTF("%s -> TC_TC\n", bp->bp_ifp->if_xname); bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */ break; case BSTP_TCSTATE_PROPAG: /* flush routes on the parent bridge */ bp->bp_fdbflush = 1; taskqueue_enqueue(taskqueue_swi, &bp->bp_rtagetask); bp->bp_tc_prop = 0; bstp_set_timer_tc(bp); DPRINTF("%s -> TC_PROPAG\n", bp->bp_ifp->if_xname); bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */ break; case BSTP_TCSTATE_ACK: bstp_timer_stop(&bp->bp_tc_timer); bp->bp_rcvdtca = 0; DPRINTF("%s -> TC_ACK\n", bp->bp_ifp->if_xname); bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */ break; } } static void bstp_set_timer_tc(struct bstp_port *bp) { struct bstp_state *bs = bp->bp_bs; if (bp->bp_tc_timer.active) return; switch (bp->bp_protover) { case BSTP_PROTO_RSTP: bstp_timer_start(&bp->bp_tc_timer, bp->bp_desg_htime + BSTP_TICK_VAL); bp->bp_flags |= BSTP_PORT_NEWINFO; break; case BSTP_PROTO_STP: bstp_timer_start(&bp->bp_tc_timer, bs->bs_root_max_age + bs->bs_root_fdelay); break; } } static void bstp_set_timer_msgage(struct bstp_port *bp) { if (bp->bp_port_msg_age + BSTP_MESSAGE_AGE_INCR <= bp->bp_port_max_age) { bstp_timer_start(&bp->bp_message_age_timer, bp->bp_port_htime * 3); } else /* expires immediately */ bstp_timer_start(&bp->bp_message_age_timer, 0); } static int bstp_rerooted(struct bstp_state *bs, struct bstp_port *bp) { struct bstp_port *bp2; int rr_set = 0; LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) { if (bp2 == bp) continue; if (bp2->bp_recent_root_timer.active) { rr_set = 1; break; } } return (!rr_set); } int bstp_set_htime(struct bstp_state *bs, int t) { /* convert seconds to ticks */ t *= BSTP_TICK_VAL; /* value can only be changed in leagacy stp mode */ if (bs->bs_protover != BSTP_PROTO_STP) return (EPERM); if (t < BSTP_MIN_HELLO_TIME || t > BSTP_MAX_HELLO_TIME) return (EINVAL); BSTP_LOCK(bs); bs->bs_bridge_htime = t; bstp_reinit(bs); BSTP_UNLOCK(bs); return (0); } int bstp_set_fdelay(struct bstp_state *bs, int t) { /* convert seconds to ticks */ t *= BSTP_TICK_VAL; if (t < BSTP_MIN_FORWARD_DELAY || t > BSTP_MAX_FORWARD_DELAY) return (EINVAL); BSTP_LOCK(bs); bs->bs_bridge_fdelay = t; bstp_reinit(bs); BSTP_UNLOCK(bs); return (0); } int bstp_set_maxage(struct bstp_state *bs, int t) { /* convert seconds to ticks */ t *= BSTP_TICK_VAL; if (t < BSTP_MIN_MAX_AGE || t > BSTP_MAX_MAX_AGE) return (EINVAL); BSTP_LOCK(bs); bs->bs_bridge_max_age = t; bstp_reinit(bs); BSTP_UNLOCK(bs); return (0); } int bstp_set_holdcount(struct bstp_state *bs, int count) { struct bstp_port *bp; if (count < BSTP_MIN_HOLD_COUNT || count > BSTP_MAX_HOLD_COUNT) return (EINVAL); BSTP_LOCK(bs); bs->bs_txholdcount = count; LIST_FOREACH(bp, &bs->bs_bplist, bp_next) bp->bp_txcount = 0; BSTP_UNLOCK(bs); return (0); } int bstp_set_protocol(struct bstp_state *bs, int proto) { struct bstp_port *bp; switch (proto) { /* Supported protocol versions */ case BSTP_PROTO_STP: case BSTP_PROTO_RSTP: break; default: return (EINVAL); } BSTP_LOCK(bs); bs->bs_protover = proto; bs->bs_bridge_htime = BSTP_DEFAULT_HELLO_TIME; LIST_FOREACH(bp, &bs->bs_bplist, bp_next) { /* reinit state */ bp->bp_infois = BSTP_INFO_DISABLED; bp->bp_txcount = 0; bstp_set_port_proto(bp, bs->bs_protover); bstp_set_port_role(bp, BSTP_ROLE_DISABLED); bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE); bstp_timer_stop(&bp->bp_recent_backup_timer); } bstp_reinit(bs); BSTP_UNLOCK(bs); return (0); } int bstp_set_priority(struct bstp_state *bs, int pri) { if (pri < 0 || pri > BSTP_MAX_PRIORITY) return (EINVAL); /* Limit to steps of 4096 */ pri -= pri % 4096; BSTP_LOCK(bs); bs->bs_bridge_priority = pri; bstp_reinit(bs); BSTP_UNLOCK(bs); return (0); } int bstp_set_port_priority(struct bstp_port *bp, int pri) { struct bstp_state *bs = bp->bp_bs; if (pri < 0 || pri > BSTP_MAX_PORT_PRIORITY) return (EINVAL); /* Limit to steps of 16 */ pri -= pri % 16; BSTP_LOCK(bs); bp->bp_priority = pri; bstp_reinit(bs); BSTP_UNLOCK(bs); return (0); } int bstp_set_path_cost(struct bstp_port *bp, uint32_t path_cost) { struct bstp_state *bs = bp->bp_bs; if (path_cost > BSTP_MAX_PATH_COST) return (EINVAL); /* STP compat mode only uses 16 bits of the 32 */ if (bp->bp_protover == BSTP_PROTO_STP && path_cost > 65535) path_cost = 65535; BSTP_LOCK(bs); if (path_cost == 0) { /* use auto */ bp->bp_flags &= ~BSTP_PORT_ADMCOST; bp->bp_path_cost = bstp_calc_path_cost(bp); } else { bp->bp_path_cost = path_cost; bp->bp_flags |= BSTP_PORT_ADMCOST; } bstp_reinit(bs); BSTP_UNLOCK(bs); return (0); } int bstp_set_edge(struct bstp_port *bp, int set) { struct bstp_state *bs = bp->bp_bs; BSTP_LOCK(bs); if ((bp->bp_operedge = set) == 0) bp->bp_flags &= ~BSTP_PORT_ADMEDGE; else bp->bp_flags |= BSTP_PORT_ADMEDGE; BSTP_UNLOCK(bs); return (0); } int bstp_set_autoedge(struct bstp_port *bp, int set) { struct bstp_state *bs = bp->bp_bs; BSTP_LOCK(bs); if (set) { bp->bp_flags |= BSTP_PORT_AUTOEDGE; /* we may be able to transition straight to edge */ if (bp->bp_edge_delay_timer.active == 0) bstp_edge_delay_expiry(bs, bp); } else bp->bp_flags &= ~BSTP_PORT_AUTOEDGE; BSTP_UNLOCK(bs); return (0); } int bstp_set_ptp(struct bstp_port *bp, int set) { struct bstp_state *bs = bp->bp_bs; BSTP_LOCK(bs); bp->bp_ptp_link = set; BSTP_UNLOCK(bs); return (0); } int bstp_set_autoptp(struct bstp_port *bp, int set) { struct bstp_state *bs = bp->bp_bs; BSTP_LOCK(bs); if (set) { bp->bp_flags |= BSTP_PORT_AUTOPTP; if (bp->bp_role != BSTP_ROLE_DISABLED) taskqueue_enqueue(taskqueue_swi, &bp->bp_mediatask); } else bp->bp_flags &= ~BSTP_PORT_AUTOPTP; BSTP_UNLOCK(bs); return (0); } /* * Calculate the path cost according to the link speed. */ static uint32_t bstp_calc_path_cost(struct bstp_port *bp) { struct ifnet *ifp = bp->bp_ifp; uint32_t path_cost; /* If the priority has been manually set then retain the value */ if (bp->bp_flags & BSTP_PORT_ADMCOST) return bp->bp_path_cost; if (ifp->if_link_state == LINK_STATE_DOWN) { /* Recalc when the link comes up again */ bp->bp_flags |= BSTP_PORT_PNDCOST; return (BSTP_DEFAULT_PATH_COST); } if (ifp->if_baudrate < 1000) return (BSTP_DEFAULT_PATH_COST); /* formula from section 17.14, IEEE Std 802.1D-2004 */ path_cost = 20000000000ULL / (ifp->if_baudrate / 1000); if (path_cost > BSTP_MAX_PATH_COST) path_cost = BSTP_MAX_PATH_COST; /* STP compat mode only uses 16 bits of the 32 */ if (bp->bp_protover == BSTP_PROTO_STP && path_cost > 65535) path_cost = 65535; return (path_cost); } /* * Notify the bridge that a port state has changed, we need to do this from a * taskqueue to avoid a LOR. */ static void bstp_notify_state(void *arg, int pending) { struct bstp_port *bp = (struct bstp_port *)arg; struct bstp_state *bs = bp->bp_bs; if (bp->bp_active == 1 && bs->bs_state_cb != NULL) (*bs->bs_state_cb)(bp->bp_ifp, bp->bp_state); } /* * Flush the routes on the bridge port, we need to do this from a * taskqueue to avoid a LOR. */ static void bstp_notify_rtage(void *arg, int pending) { struct bstp_port *bp = (struct bstp_port *)arg; struct bstp_state *bs = bp->bp_bs; int age = 0; BSTP_LOCK(bs); switch (bp->bp_protover) { case BSTP_PROTO_STP: /* convert to seconds */ age = bp->bp_desg_fdelay / BSTP_TICK_VAL; break; case BSTP_PROTO_RSTP: age = 0; break; } BSTP_UNLOCK(bs); if (bp->bp_active == 1 && bs->bs_rtage_cb != NULL) (*bs->bs_rtage_cb)(bp->bp_ifp, age); /* flush is complete */ BSTP_LOCK(bs); bp->bp_fdbflush = 0; BSTP_UNLOCK(bs); } void bstp_linkstate(struct bstp_port *bp) { struct bstp_state *bs = bp->bp_bs; if (!bp->bp_active) return; bstp_ifupdstatus(bp, 0); BSTP_LOCK(bs); bstp_update_state(bs, bp); BSTP_UNLOCK(bs); } static void bstp_ifupdstatus(void *arg, int pending) { struct bstp_port *bp = (struct bstp_port *)arg; struct bstp_state *bs = bp->bp_bs; struct ifnet *ifp = bp->bp_ifp; struct ifmediareq ifmr; int error, changed; if (!bp->bp_active) return; bzero((char *)&ifmr, sizeof(ifmr)); error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr); BSTP_LOCK(bs); changed = 0; if ((error == 0) && (ifp->if_flags & IFF_UP)) { if (ifmr.ifm_status & IFM_ACTIVE) { /* A full-duplex link is assumed to be point to point */ if (bp->bp_flags & BSTP_PORT_AUTOPTP) { int fdx; fdx = ifmr.ifm_active & IFM_FDX ? 1 : 0; if (bp->bp_ptp_link ^ fdx) { bp->bp_ptp_link = fdx; changed = 1; } } /* Calc the cost if the link was down previously */ if (bp->bp_flags & BSTP_PORT_PNDCOST) { uint32_t cost; cost = bstp_calc_path_cost(bp); if (bp->bp_path_cost != cost) { bp->bp_path_cost = cost; changed = 1; } bp->bp_flags &= ~BSTP_PORT_PNDCOST; } if (bp->bp_role == BSTP_ROLE_DISABLED) { bstp_enable_port(bs, bp); changed = 1; } } else { if (bp->bp_role != BSTP_ROLE_DISABLED) { bstp_disable_port(bs, bp); changed = 1; if ((bp->bp_flags & BSTP_PORT_ADMEDGE) && bp->bp_protover == BSTP_PROTO_RSTP) bp->bp_operedge = 1; } } } else if (bp->bp_infois != BSTP_INFO_DISABLED) { bstp_disable_port(bs, bp); changed = 1; } if (changed) bstp_assign_roles(bs); BSTP_UNLOCK(bs); } static void bstp_enable_port(struct bstp_state *bs, struct bstp_port *bp) { bp->bp_infois = BSTP_INFO_AGED; } static void bstp_disable_port(struct bstp_state *bs, struct bstp_port *bp) { bp->bp_infois = BSTP_INFO_DISABLED; } static void bstp_tick(void *arg) { + struct epoch_tracker et; struct bstp_state *bs = arg; struct bstp_port *bp; BSTP_LOCK_ASSERT(bs); if (bs->bs_running == 0) return; + NET_EPOCH_ENTER(et); CURVNET_SET(bs->bs_vnet); /* poll link events on interfaces that do not support linkstate */ if (bstp_timer_dectest(&bs->bs_link_timer)) { LIST_FOREACH(bp, &bs->bs_bplist, bp_next) { if (!(bp->bp_ifp->if_capabilities & IFCAP_LINKSTATE)) taskqueue_enqueue(taskqueue_swi, &bp->bp_mediatask); } bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER); } LIST_FOREACH(bp, &bs->bs_bplist, bp_next) { /* no events need to happen for these */ bstp_timer_dectest(&bp->bp_tc_timer); bstp_timer_dectest(&bp->bp_recent_root_timer); bstp_timer_dectest(&bp->bp_forward_delay_timer); bstp_timer_dectest(&bp->bp_recent_backup_timer); if (bstp_timer_dectest(&bp->bp_hello_timer)) bstp_hello_timer_expiry(bs, bp); if (bstp_timer_dectest(&bp->bp_message_age_timer)) bstp_message_age_expiry(bs, bp); if (bstp_timer_dectest(&bp->bp_migrate_delay_timer)) bstp_migrate_delay_expiry(bs, bp); if (bstp_timer_dectest(&bp->bp_edge_delay_timer)) bstp_edge_delay_expiry(bs, bp); /* update the various state machines for the port */ bstp_update_state(bs, bp); if (bp->bp_txcount > 0) bp->bp_txcount--; } CURVNET_RESTORE(); + NET_EPOCH_EXIT(et); callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs); } static void bstp_timer_start(struct bstp_timer *t, uint16_t v) { t->value = v; t->active = 1; t->latched = 0; } static void bstp_timer_stop(struct bstp_timer *t) { t->value = 0; t->active = 0; t->latched = 0; } static void bstp_timer_latch(struct bstp_timer *t) { t->latched = 1; t->active = 1; } static int bstp_timer_dectest(struct bstp_timer *t) { if (t->active == 0 || t->latched) return (0); t->value -= BSTP_TICK_VAL; if (t->value <= 0) { bstp_timer_stop(t); return (1); } return (0); } static void bstp_hello_timer_expiry(struct bstp_state *bs, struct bstp_port *bp) { if ((bp->bp_flags & BSTP_PORT_NEWINFO) || bp->bp_role == BSTP_ROLE_DESIGNATED || (bp->bp_role == BSTP_ROLE_ROOT && bp->bp_tc_timer.active == 1)) { bstp_timer_start(&bp->bp_hello_timer, bp->bp_desg_htime); bp->bp_flags |= BSTP_PORT_NEWINFO; bstp_transmit(bs, bp); } } static void bstp_message_age_expiry(struct bstp_state *bs, struct bstp_port *bp) { if (bp->bp_infois == BSTP_INFO_RECEIVED) { bp->bp_infois = BSTP_INFO_AGED; bstp_assign_roles(bs); DPRINTF("aged info on %s\n", bp->bp_ifp->if_xname); } } static void bstp_migrate_delay_expiry(struct bstp_state *bs, struct bstp_port *bp) { bp->bp_flags |= BSTP_PORT_CANMIGRATE; } static void bstp_edge_delay_expiry(struct bstp_state *bs, struct bstp_port *bp) { if ((bp->bp_flags & BSTP_PORT_AUTOEDGE) && bp->bp_protover == BSTP_PROTO_RSTP && bp->bp_proposing && bp->bp_role == BSTP_ROLE_DESIGNATED) { bp->bp_operedge = 1; DPRINTF("%s -> edge port\n", bp->bp_ifp->if_xname); } } static int bstp_addr_cmp(const uint8_t *a, const uint8_t *b) { int i, d; for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) { d = ((int)a[i]) - ((int)b[i]); } return (d); } /* * compare the bridge address component of the bridgeid */ static int bstp_same_bridgeid(uint64_t id1, uint64_t id2) { u_char addr1[ETHER_ADDR_LEN]; u_char addr2[ETHER_ADDR_LEN]; PV2ADDR(id1, addr1); PV2ADDR(id2, addr2); if (bstp_addr_cmp(addr1, addr2) == 0) return (1); return (0); } void bstp_reinit(struct bstp_state *bs) { struct epoch_tracker et; struct bstp_port *bp; struct ifnet *ifp, *mif; u_char *e_addr; void *bridgeptr; static const u_char llzero[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ BSTP_LOCK_ASSERT(bs); if (LIST_EMPTY(&bs->bs_bplist)) goto disablestp; mif = NULL; bridgeptr = LIST_FIRST(&bs->bs_bplist)->bp_ifp->if_bridge; KASSERT(bridgeptr != NULL, ("Invalid bridge pointer")); /* * Search through the Ethernet adapters and find the one with the * lowest value. Make sure the adapter which we take the MAC address * from is part of this bridge, so we can have more than one independent * bridges in the same STP domain. */ NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifp->if_type != IFT_ETHER) continue; /* Not Ethernet */ if (ifp->if_bridge != bridgeptr) continue; /* Not part of our bridge */ if (bstp_addr_cmp(IF_LLADDR(ifp), llzero) == 0) continue; /* No mac address set */ if (mif == NULL) { mif = ifp; continue; } if (bstp_addr_cmp(IF_LLADDR(ifp), IF_LLADDR(mif)) < 0) { mif = ifp; continue; } } NET_EPOCH_EXIT(et); if (mif == NULL) goto disablestp; e_addr = IF_LLADDR(mif); bs->bs_bridge_pv.pv_dbridge_id = (((uint64_t)bs->bs_bridge_priority) << 48) | (((uint64_t)e_addr[0]) << 40) | (((uint64_t)e_addr[1]) << 32) | (((uint64_t)e_addr[2]) << 24) | (((uint64_t)e_addr[3]) << 16) | (((uint64_t)e_addr[4]) << 8) | (((uint64_t)e_addr[5])); bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id; bs->bs_bridge_pv.pv_cost = 0; bs->bs_bridge_pv.pv_dport_id = 0; bs->bs_bridge_pv.pv_port_id = 0; if (bs->bs_running && callout_pending(&bs->bs_bstpcallout) == 0) callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs); LIST_FOREACH(bp, &bs->bs_bplist, bp_next) { bp->bp_port_id = (bp->bp_priority << 8) | (bp->bp_ifp->if_index & 0xfff); taskqueue_enqueue(taskqueue_swi, &bp->bp_mediatask); } bstp_assign_roles(bs); bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER); return; disablestp: /* Set the bridge and root id (lower bits) to zero */ bs->bs_bridge_pv.pv_dbridge_id = ((uint64_t)bs->bs_bridge_priority) << 48; bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id; bs->bs_root_pv = bs->bs_bridge_pv; /* Disable any remaining ports, they will have no MAC address */ LIST_FOREACH(bp, &bs->bs_bplist, bp_next) { bp->bp_infois = BSTP_INFO_DISABLED; bstp_set_port_role(bp, BSTP_ROLE_DISABLED); } callout_stop(&bs->bs_bstpcallout); } static int bstp_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: mtx_init(&bstp_list_mtx, "bridgestp list", NULL, MTX_DEF); LIST_INIT(&bstp_list); break; case MOD_UNLOAD: mtx_destroy(&bstp_list_mtx); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t bstp_mod = { "bridgestp", bstp_modevent, 0 }; DECLARE_MODULE(bridgestp, bstp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(bridgestp, 1); void bstp_attach(struct bstp_state *bs, struct bstp_cb_ops *cb) { BSTP_LOCK_INIT(bs); callout_init_mtx(&bs->bs_bstpcallout, &bs->bs_mtx, 0); LIST_INIT(&bs->bs_bplist); bs->bs_bridge_max_age = BSTP_DEFAULT_MAX_AGE; bs->bs_bridge_htime = BSTP_DEFAULT_HELLO_TIME; bs->bs_bridge_fdelay = BSTP_DEFAULT_FORWARD_DELAY; bs->bs_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY; bs->bs_hold_time = BSTP_DEFAULT_HOLD_TIME; bs->bs_migration_delay = BSTP_DEFAULT_MIGRATE_DELAY; bs->bs_txholdcount = BSTP_DEFAULT_HOLD_COUNT; bs->bs_protover = BSTP_PROTO_RSTP; bs->bs_state_cb = cb->bcb_state; bs->bs_rtage_cb = cb->bcb_rtage; bs->bs_vnet = curvnet; getmicrotime(&bs->bs_last_tc_time); mtx_lock(&bstp_list_mtx); LIST_INSERT_HEAD(&bstp_list, bs, bs_list); mtx_unlock(&bstp_list_mtx); } void bstp_detach(struct bstp_state *bs) { KASSERT(LIST_EMPTY(&bs->bs_bplist), ("bstp still active")); mtx_lock(&bstp_list_mtx); LIST_REMOVE(bs, bs_list); mtx_unlock(&bstp_list_mtx); callout_drain(&bs->bs_bstpcallout); BSTP_LOCK_DESTROY(bs); } void bstp_init(struct bstp_state *bs) { BSTP_LOCK(bs); callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs); bs->bs_running = 1; bstp_reinit(bs); BSTP_UNLOCK(bs); } void bstp_stop(struct bstp_state *bs) { struct bstp_port *bp; BSTP_LOCK(bs); LIST_FOREACH(bp, &bs->bs_bplist, bp_next) bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING); bs->bs_running = 0; callout_stop(&bs->bs_bstpcallout); BSTP_UNLOCK(bs); } int bstp_create(struct bstp_state *bs, struct bstp_port *bp, struct ifnet *ifp) { bzero(bp, sizeof(struct bstp_port)); BSTP_LOCK(bs); bp->bp_ifp = ifp; bp->bp_bs = bs; bp->bp_priority = BSTP_DEFAULT_PORT_PRIORITY; TASK_INIT(&bp->bp_statetask, 0, bstp_notify_state, bp); TASK_INIT(&bp->bp_rtagetask, 0, bstp_notify_rtage, bp); TASK_INIT(&bp->bp_mediatask, 0, bstp_ifupdstatus, bp); /* Init state */ bp->bp_infois = BSTP_INFO_DISABLED; bp->bp_flags = BSTP_PORT_AUTOEDGE|BSTP_PORT_AUTOPTP; bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING); bstp_set_port_proto(bp, bs->bs_protover); bstp_set_port_role(bp, BSTP_ROLE_DISABLED); bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE); bp->bp_path_cost = bstp_calc_path_cost(bp); BSTP_UNLOCK(bs); return (0); } int bstp_enable(struct bstp_port *bp) { struct bstp_state *bs = bp->bp_bs; struct ifnet *ifp = bp->bp_ifp; KASSERT(bp->bp_active == 0, ("already a bstp member")); + NET_EPOCH_ASSERT(); /* Because bstp_update_roles() causes traffic. */ switch (ifp->if_type) { case IFT_ETHER: /* These can do spanning tree. */ case IFT_L2VLAN: break; default: /* Nothing else can. */ return (EINVAL); } BSTP_LOCK(bs); LIST_INSERT_HEAD(&bs->bs_bplist, bp, bp_next); bp->bp_active = 1; bp->bp_flags |= BSTP_PORT_NEWINFO; bstp_reinit(bs); bstp_update_roles(bs, bp); BSTP_UNLOCK(bs); return (0); } void bstp_disable(struct bstp_port *bp) { struct bstp_state *bs = bp->bp_bs; KASSERT(bp->bp_active == 1, ("not a bstp member")); BSTP_LOCK(bs); bstp_disable_port(bs, bp); LIST_REMOVE(bp, bp_next); bp->bp_active = 0; bstp_reinit(bs); BSTP_UNLOCK(bs); } /* * The bstp_port structure is about to be freed by the parent bridge. */ void bstp_destroy(struct bstp_port *bp) { KASSERT(bp->bp_active == 0, ("port is still attached")); taskqueue_drain(taskqueue_swi, &bp->bp_statetask); taskqueue_drain(taskqueue_swi, &bp->bp_rtagetask); taskqueue_drain(taskqueue_swi, &bp->bp_mediatask); if (bp->bp_bs->bs_root_port == bp) bstp_assign_roles(bp->bp_bs); } diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 7c4e48ff04c6..3dba672aa0fe 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -1,3679 +1,3686 @@ /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright 2001 Wasabi Systems, Inc. * All rights reserved. * * Written by Jason R. Thorpe for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp */ /* * Network interface bridge support. * * TODO: * * - Currently only supports Ethernet-like interfaces (Ethernet, * 802.11, VLANs on Ethernet, etc.) Figure out a nice way * to bridge other types of interfaces (maybe consider * heterogeneous bridges). */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include /* for net/if.h */ #include #include /* string functions */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #endif #if defined(INET) || defined(INET6) #include #endif #include #include #include #include #include #include #include #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif /* * Size of the route hash table. Must be a power of two. */ #ifndef BRIDGE_RTHASH_SIZE #define BRIDGE_RTHASH_SIZE 1024 #endif #define BRIDGE_RTHASH_MASK (BRIDGE_RTHASH_SIZE - 1) /* * Default maximum number of addresses to cache. */ #ifndef BRIDGE_RTABLE_MAX #define BRIDGE_RTABLE_MAX 2000 #endif /* * Timeout (in seconds) for entries learned dynamically. */ #ifndef BRIDGE_RTABLE_TIMEOUT #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */ #endif /* * Number of seconds between walks of the route list. */ #ifndef BRIDGE_RTABLE_PRUNE_PERIOD #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60) #endif /* * List of capabilities to possibly mask on the member interface. */ #define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\ IFCAP_TXCSUM_IPV6) /* * List of capabilities to strip */ #define BRIDGE_IFCAPS_STRIP IFCAP_LRO /* * Bridge locking * * The bridge relies heavily on the epoch(9) system to protect its data * structures. This means we can safely use CK_LISTs while in NET_EPOCH, but we * must ensure there is only one writer at a time. * * That is: for read accesses we only need to be in NET_EPOCH, but for write * accesses we must hold: * * - BRIDGE_RT_LOCK, for any change to bridge_rtnodes * - BRIDGE_LOCK, for any other change * * The BRIDGE_LOCK is a sleepable lock, because it is held accross ioctl() * calls to bridge member interfaces and these ioctl()s can sleep. * The BRIDGE_RT_LOCK is a non-sleepable mutex, because it is sometimes * required while we're in NET_EPOCH and then we're not allowed to sleep. */ #define BRIDGE_LOCK_INIT(_sc) do { \ sx_init(&(_sc)->sc_sx, "if_bridge"); \ mtx_init(&(_sc)->sc_rt_mtx, "if_bridge rt", NULL, MTX_DEF); \ } while (0) #define BRIDGE_LOCK_DESTROY(_sc) do { \ sx_destroy(&(_sc)->sc_sx); \ mtx_destroy(&(_sc)->sc_rt_mtx); \ } while (0) #define BRIDGE_LOCK(_sc) sx_xlock(&(_sc)->sc_sx) #define BRIDGE_UNLOCK(_sc) sx_xunlock(&(_sc)->sc_sx) #define BRIDGE_LOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SX_XLOCKED) #define BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(_sc) \ MPASS(in_epoch(net_epoch_preempt) || sx_xlocked(&(_sc)->sc_sx)) #define BRIDGE_UNLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SX_UNLOCKED) #define BRIDGE_RT_LOCK(_sc) mtx_lock(&(_sc)->sc_rt_mtx) #define BRIDGE_RT_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_rt_mtx) #define BRIDGE_RT_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_rt_mtx, MA_OWNED) #define BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(_sc) \ MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(_sc)->sc_rt_mtx)) /* * Bridge interface list entry. */ struct bridge_iflist { CK_LIST_ENTRY(bridge_iflist) bif_next; struct ifnet *bif_ifp; /* member if */ struct bstp_port bif_stp; /* STP state */ uint32_t bif_flags; /* member if flags */ int bif_savedcaps; /* saved capabilities */ uint32_t bif_addrmax; /* max # of addresses */ uint32_t bif_addrcnt; /* cur. # of addresses */ uint32_t bif_addrexceeded;/* # of address violations */ struct epoch_context bif_epoch_ctx; }; /* * Bridge route node. */ struct bridge_rtnode { CK_LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */ CK_LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */ struct bridge_iflist *brt_dst; /* destination if */ unsigned long brt_expire; /* expiration time */ uint8_t brt_flags; /* address flags */ uint8_t brt_addr[ETHER_ADDR_LEN]; uint16_t brt_vlan; /* vlan id */ struct vnet *brt_vnet; struct epoch_context brt_epoch_ctx; }; #define brt_ifp brt_dst->bif_ifp /* * Software state for each bridge. */ struct bridge_softc { struct ifnet *sc_ifp; /* make this an interface */ LIST_ENTRY(bridge_softc) sc_list; struct sx sc_sx; struct mtx sc_rt_mtx; uint32_t sc_brtmax; /* max # of addresses */ uint32_t sc_brtcnt; /* cur. # of addresses */ uint32_t sc_brttimeout; /* rt timeout in seconds */ struct callout sc_brcallout; /* bridge callout */ CK_LIST_HEAD(, bridge_iflist) sc_iflist; /* member interface list */ CK_LIST_HEAD(, bridge_rtnode) *sc_rthash; /* our forwarding table */ CK_LIST_HEAD(, bridge_rtnode) sc_rtlist; /* list version of above */ uint32_t sc_rthash_key; /* key for hash */ CK_LIST_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */ struct bstp_state sc_stp; /* STP state */ uint32_t sc_brtexceeded; /* # of cache drops */ struct ifnet *sc_ifaddr; /* member mac copied from */ struct ether_addr sc_defaddr; /* Default MAC address */ struct epoch_context sc_epoch_ctx; }; VNET_DEFINE_STATIC(struct sx, bridge_list_sx); #define V_bridge_list_sx VNET(bridge_list_sx) static eventhandler_tag bridge_detach_cookie; int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD; VNET_DEFINE_STATIC(uma_zone_t, bridge_rtnode_zone); #define V_bridge_rtnode_zone VNET(bridge_rtnode_zone) static int bridge_clone_create(struct if_clone *, int, caddr_t); static void bridge_clone_destroy(struct ifnet *); static int bridge_ioctl(struct ifnet *, u_long, caddr_t); static void bridge_mutecaps(struct bridge_softc *); static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *, int); static void bridge_ifdetach(void *arg __unused, struct ifnet *); static void bridge_init(void *); static void bridge_dummynet(struct mbuf *, struct ifnet *); static void bridge_stop(struct ifnet *, int); static int bridge_transmit(struct ifnet *, struct mbuf *); static void bridge_qflush(struct ifnet *); static struct mbuf *bridge_input(struct ifnet *, struct mbuf *); static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static int bridge_enqueue(struct bridge_softc *, struct ifnet *, struct mbuf *); static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int); static void bridge_forward(struct bridge_softc *, struct bridge_iflist *, struct mbuf *m); static void bridge_timer(void *); static void bridge_broadcast(struct bridge_softc *, struct ifnet *, struct mbuf *, int); static void bridge_span(struct bridge_softc *, struct mbuf *); static int bridge_rtupdate(struct bridge_softc *, const uint8_t *, uint16_t, struct bridge_iflist *, int, uint8_t); static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *, uint16_t); static void bridge_rttrim(struct bridge_softc *); static void bridge_rtage(struct bridge_softc *); static void bridge_rtflush(struct bridge_softc *, int); static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *, uint16_t); static void bridge_rtable_init(struct bridge_softc *); static void bridge_rtable_fini(struct bridge_softc *); static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *); static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *, const uint8_t *, uint16_t); static int bridge_rtnode_insert(struct bridge_softc *, struct bridge_rtnode *); static void bridge_rtnode_destroy(struct bridge_softc *, struct bridge_rtnode *); static void bridge_rtable_expire(struct ifnet *, int); static void bridge_state_change(struct ifnet *, int); static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *, const char *name); static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *, struct ifnet *ifp); static void bridge_delete_member(struct bridge_softc *, struct bridge_iflist *, int); static void bridge_delete_span(struct bridge_softc *, struct bridge_iflist *); static int bridge_ioctl_add(struct bridge_softc *, void *); static int bridge_ioctl_del(struct bridge_softc *, void *); static int bridge_ioctl_gifflags(struct bridge_softc *, void *); static int bridge_ioctl_sifflags(struct bridge_softc *, void *); static int bridge_ioctl_scache(struct bridge_softc *, void *); static int bridge_ioctl_gcache(struct bridge_softc *, void *); static int bridge_ioctl_gifs(struct bridge_softc *, void *); static int bridge_ioctl_rts(struct bridge_softc *, void *); static int bridge_ioctl_saddr(struct bridge_softc *, void *); static int bridge_ioctl_sto(struct bridge_softc *, void *); static int bridge_ioctl_gto(struct bridge_softc *, void *); static int bridge_ioctl_daddr(struct bridge_softc *, void *); static int bridge_ioctl_flush(struct bridge_softc *, void *); static int bridge_ioctl_gpri(struct bridge_softc *, void *); static int bridge_ioctl_spri(struct bridge_softc *, void *); static int bridge_ioctl_ght(struct bridge_softc *, void *); static int bridge_ioctl_sht(struct bridge_softc *, void *); static int bridge_ioctl_gfd(struct bridge_softc *, void *); static int bridge_ioctl_sfd(struct bridge_softc *, void *); static int bridge_ioctl_gma(struct bridge_softc *, void *); static int bridge_ioctl_sma(struct bridge_softc *, void *); static int bridge_ioctl_sifprio(struct bridge_softc *, void *); static int bridge_ioctl_sifcost(struct bridge_softc *, void *); static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *); static int bridge_ioctl_addspan(struct bridge_softc *, void *); static int bridge_ioctl_delspan(struct bridge_softc *, void *); static int bridge_ioctl_gbparam(struct bridge_softc *, void *); static int bridge_ioctl_grte(struct bridge_softc *, void *); static int bridge_ioctl_gifsstp(struct bridge_softc *, void *); static int bridge_ioctl_sproto(struct bridge_softc *, void *); static int bridge_ioctl_stxhc(struct bridge_softc *, void *); static int bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *, int); static int bridge_ip_checkbasic(struct mbuf **mp); #ifdef INET6 static int bridge_ip6_checkbasic(struct mbuf **mp); #endif /* INET6 */ static int bridge_fragment(struct ifnet *, struct mbuf **mp, struct ether_header *, int, struct llc *); static void bridge_linkstate(struct ifnet *ifp); static void bridge_linkcheck(struct bridge_softc *sc); /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */ #define VLANTAGOF(_m) \ (_m->m_flags & M_VLANTAG) ? EVL_VLANOFTAG(_m->m_pkthdr.ether_vtag) : 1 static struct bstp_cb_ops bridge_ops = { .bcb_state = bridge_state_change, .bcb_rtage = bridge_rtable_expire }; SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Bridge"); /* only pass IP[46] packets when pfil is enabled */ VNET_DEFINE_STATIC(int, pfil_onlyip) = 1; #define V_pfil_onlyip VNET(pfil_onlyip) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0, "Only pass IP packets when pfil is enabled"); /* run pfil hooks on the bridge interface */ VNET_DEFINE_STATIC(int, pfil_bridge) = 1; #define V_pfil_bridge VNET(pfil_bridge) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0, "Packet filter on the bridge interface"); /* layer2 filter with ipfw */ VNET_DEFINE_STATIC(int, pfil_ipfw); #define V_pfil_ipfw VNET(pfil_ipfw) /* layer2 ARP filter with ipfw */ VNET_DEFINE_STATIC(int, pfil_ipfw_arp); #define V_pfil_ipfw_arp VNET(pfil_ipfw_arp) SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0, "Filter ARP packets through IPFW layer2"); /* run pfil hooks on the member interface */ VNET_DEFINE_STATIC(int, pfil_member) = 1; #define V_pfil_member VNET(pfil_member) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0, "Packet filter on the member interface"); /* run pfil hooks on the physical interface for locally destined packets */ VNET_DEFINE_STATIC(int, pfil_local_phys); #define V_pfil_local_phys VNET(pfil_local_phys) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0, "Packet filter on the physical interface for locally destined packets"); /* log STP state changes */ VNET_DEFINE_STATIC(int, log_stp); #define V_log_stp VNET(log_stp) SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0, "Log STP state changes"); /* share MAC with first bridge member */ VNET_DEFINE_STATIC(int, bridge_inherit_mac); #define V_bridge_inherit_mac VNET(bridge_inherit_mac) SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0, "Inherit MAC address from the first bridge member"); VNET_DEFINE_STATIC(int, allow_llz_overlap) = 0; #define V_allow_llz_overlap VNET(allow_llz_overlap) SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope " "zones of a bridge interface and the member interfaces"); struct bridge_control { int (*bc_func)(struct bridge_softc *, void *); int bc_argsize; int bc_flags; }; #define BC_F_COPYIN 0x01 /* copy arguments in */ #define BC_F_COPYOUT 0x02 /* copy arguments out */ #define BC_F_SUSER 0x04 /* do super-user check */ const struct bridge_control bridge_control_table[] = { { bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_addspan, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_delspan, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gbparam, sizeof(struct ifbropreq), BC_F_COPYOUT }, { bridge_ioctl_grte, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_gifsstp, sizeof(struct ifbpstpconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_sproto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_stxhc, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifmaxaddr, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, }; const int bridge_control_table_size = nitems(bridge_control_table); VNET_DEFINE_STATIC(LIST_HEAD(, bridge_softc), bridge_list); #define V_bridge_list VNET(bridge_list) #define BRIDGE_LIST_LOCK_INIT(x) sx_init(&V_bridge_list_sx, \ "if_bridge list") #define BRIDGE_LIST_LOCK_DESTROY(x) sx_destroy(&V_bridge_list_sx) #define BRIDGE_LIST_LOCK(x) sx_xlock(&V_bridge_list_sx) #define BRIDGE_LIST_UNLOCK(x) sx_xunlock(&V_bridge_list_sx) VNET_DEFINE_STATIC(struct if_clone *, bridge_cloner); #define V_bridge_cloner VNET(bridge_cloner) static const char bridge_name[] = "bridge"; static void vnet_bridge_init(const void *unused __unused) { V_bridge_rtnode_zone = uma_zcreate("bridge_rtnode", sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); BRIDGE_LIST_LOCK_INIT(); LIST_INIT(&V_bridge_list); V_bridge_cloner = if_clone_simple(bridge_name, bridge_clone_create, bridge_clone_destroy, 0); } VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_bridge_init, NULL); static void vnet_bridge_uninit(const void *unused __unused) { if_clone_detach(V_bridge_cloner); V_bridge_cloner = NULL; BRIDGE_LIST_LOCK_DESTROY(); /* Callbacks may use the UMA zone. */ epoch_drain_callbacks(net_epoch_preempt); uma_zdestroy(V_bridge_rtnode_zone); } VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_bridge_uninit, NULL); static int bridge_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: bridge_dn_p = bridge_dummynet; bridge_detach_cookie = EVENTHANDLER_REGISTER( ifnet_departure_event, bridge_ifdetach, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, bridge_detach_cookie); bridge_dn_p = NULL; break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t bridge_mod = { "if_bridge", bridge_modevent, 0 }; DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_bridge, 1); MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1); /* * handler for net.link.bridge.ipfw */ static int sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS) { int enable = V_pfil_ipfw; int error; error = sysctl_handle_int(oidp, &enable, 0, req); enable &= 1; if (enable != V_pfil_ipfw) { V_pfil_ipfw = enable; /* * Disable pfil so that ipfw doesnt run twice, if the user * really wants both then they can re-enable pfil_bridge and/or * pfil_member. Also allow non-ip packets as ipfw can filter by * layer2 type. */ if (V_pfil_ipfw) { V_pfil_onlyip = 0; V_pfil_bridge = 0; V_pfil_member = 0; } } return (error); } SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET | CTLFLAG_NEEDGIANT, &VNET_NAME(pfil_ipfw), 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW"); /* * bridge_clone_create: * * Create a new bridge instance. */ static int bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct bridge_softc *sc; struct ifnet *ifp; sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { free(sc, M_DEVBUF); return (ENOSPC); } BRIDGE_LOCK_INIT(sc); sc->sc_brtmax = BRIDGE_RTABLE_MAX; sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT; /* Initialize our routing table. */ bridge_rtable_init(sc); callout_init_mtx(&sc->sc_brcallout, &sc->sc_rt_mtx, 0); CK_LIST_INIT(&sc->sc_iflist); CK_LIST_INIT(&sc->sc_spanlist); ifp->if_softc = sc; if_initname(ifp, bridge_name, unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = bridge_ioctl; ifp->if_transmit = bridge_transmit; ifp->if_qflush = bridge_qflush; ifp->if_init = bridge_init; ifp->if_type = IFT_BRIDGE; ether_gen_addr(ifp, &sc->sc_defaddr); bstp_attach(&sc->sc_stp, &bridge_ops); ether_ifattach(ifp, sc->sc_defaddr.octet); /* Now undo some of the damage... */ ifp->if_baudrate = 0; ifp->if_type = IFT_BRIDGE; BRIDGE_LIST_LOCK(); LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list); BRIDGE_LIST_UNLOCK(); return (0); } static void bridge_clone_destroy_cb(struct epoch_context *ctx) { struct bridge_softc *sc; sc = __containerof(ctx, struct bridge_softc, sc_epoch_ctx); BRIDGE_LOCK_DESTROY(sc); free(sc, M_DEVBUF); } /* * bridge_clone_destroy: * * Destroy a bridge instance. */ static void bridge_clone_destroy(struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_softc; struct bridge_iflist *bif; struct epoch_tracker et; BRIDGE_LOCK(sc); bridge_stop(ifp, 1); ifp->if_flags &= ~IFF_UP; while ((bif = CK_LIST_FIRST(&sc->sc_iflist)) != NULL) bridge_delete_member(sc, bif, 0); while ((bif = CK_LIST_FIRST(&sc->sc_spanlist)) != NULL) { bridge_delete_span(sc, bif); } /* Tear down the routing table. */ bridge_rtable_fini(sc); BRIDGE_UNLOCK(sc); NET_EPOCH_ENTER(et); callout_drain(&sc->sc_brcallout); BRIDGE_LIST_LOCK(); LIST_REMOVE(sc, sc_list); BRIDGE_LIST_UNLOCK(); bstp_detach(&sc->sc_stp); NET_EPOCH_EXIT(et); ether_ifdetach(ifp); if_free(ifp); NET_EPOCH_CALL(bridge_clone_destroy_cb, &sc->sc_epoch_ctx); } /* * bridge_ioctl: * * Handle a control request from the operator. */ static int bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct bridge_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct bridge_iflist *bif; struct thread *td = curthread; union { struct ifbreq ifbreq; struct ifbifconf ifbifconf; struct ifbareq ifbareq; struct ifbaconf ifbaconf; struct ifbrparam ifbrparam; struct ifbropreq ifbropreq; } args; struct ifdrv *ifd = (struct ifdrv *) data; const struct bridge_control *bc; int error = 0, oldmtu; BRIDGE_LOCK(sc); switch (cmd) { case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCGDRVSPEC: case SIOCSDRVSPEC: if (ifd->ifd_cmd >= bridge_control_table_size) { error = EINVAL; break; } bc = &bridge_control_table[ifd->ifd_cmd]; if (cmd == SIOCGDRVSPEC && (bc->bc_flags & BC_F_COPYOUT) == 0) { error = EINVAL; break; } else if (cmd == SIOCSDRVSPEC && (bc->bc_flags & BC_F_COPYOUT) != 0) { error = EINVAL; break; } if (bc->bc_flags & BC_F_SUSER) { error = priv_check(td, PRIV_NET_BRIDGE); if (error) break; } if (ifd->ifd_len != bc->bc_argsize || ifd->ifd_len > sizeof(args)) { error = EINVAL; break; } bzero(&args, sizeof(args)); if (bc->bc_flags & BC_F_COPYIN) { error = copyin(ifd->ifd_data, &args, ifd->ifd_len); if (error) break; } oldmtu = ifp->if_mtu; error = (*bc->bc_func)(sc, &args); if (error) break; /* * Bridge MTU may change during addition of the first port. * If it did, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) { #ifdef INET6 nd6_setmtu(ifp); #endif rt_updatemtu(ifp); } if (bc->bc_flags & BC_F_COPYOUT) error = copyout(&args, ifd->ifd_data, ifd->ifd_len); break; case SIOCSIFFLAGS: if (!(ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked down and it is running, * then stop and disable it. */ bridge_stop(ifp, 1); } else if ((ifp->if_flags & IFF_UP) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked up and it is stopped, then * start it. */ BRIDGE_UNLOCK(sc); (*ifp->if_init)(sc); BRIDGE_LOCK(sc); } break; case SIOCSIFMTU: if (ifr->ifr_mtu < 576) { error = EINVAL; break; } if (CK_LIST_EMPTY(&sc->sc_iflist)) { sc->sc_ifp->if_mtu = ifr->ifr_mtu; break; } CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp->if_mtu != ifr->ifr_mtu) { log(LOG_NOTICE, "%s: invalid MTU: %u(%s)" " != %d\n", sc->sc_ifp->if_xname, bif->bif_ifp->if_mtu, bif->bif_ifp->if_xname, ifr->ifr_mtu); error = EINVAL; break; } } if (!error) sc->sc_ifp->if_mtu = ifr->ifr_mtu; break; default: /* * drop the lock as ether_ioctl() will call bridge_start() and * cause the lock to be recursed. */ BRIDGE_UNLOCK(sc); error = ether_ioctl(ifp, cmd, data); BRIDGE_LOCK(sc); break; } BRIDGE_UNLOCK(sc); return (error); } /* * bridge_mutecaps: * * Clear or restore unwanted capabilities on the member interface */ static void bridge_mutecaps(struct bridge_softc *sc) { struct bridge_iflist *bif; int enabled, mask; BRIDGE_LOCK_ASSERT(sc); /* Initial bitmask of capabilities to test */ mask = BRIDGE_IFCAPS_MASK; CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { /* Every member must support it or its disabled */ mask &= bif->bif_savedcaps; } CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { enabled = bif->bif_ifp->if_capenable; enabled &= ~BRIDGE_IFCAPS_STRIP; /* strip off mask bits and enable them again if allowed */ enabled &= ~BRIDGE_IFCAPS_MASK; enabled |= mask; bridge_set_ifcap(sc, bif, enabled); } } static void bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set) { struct ifnet *ifp = bif->bif_ifp; struct ifreq ifr; int error, mask, stuck; bzero(&ifr, sizeof(ifr)); ifr.ifr_reqcap = set; if (ifp->if_capenable != set) { error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr); if (error) if_printf(sc->sc_ifp, "error setting capabilities on %s: %d\n", ifp->if_xname, error); mask = BRIDGE_IFCAPS_MASK | BRIDGE_IFCAPS_STRIP; stuck = ifp->if_capenable & mask & ~set; if (stuck != 0) if_printf(sc->sc_ifp, "can't disable some capabilities on %s: 0x%x\n", ifp->if_xname, stuck); } } /* * bridge_lookup_member: * * Lookup a bridge member interface. */ static struct bridge_iflist * bridge_lookup_member(struct bridge_softc *sc, const char *name) { struct bridge_iflist *bif; struct ifnet *ifp; BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { ifp = bif->bif_ifp; if (strcmp(ifp->if_xname, name) == 0) return (bif); } return (NULL); } /* * bridge_lookup_member_if: * * Lookup a bridge member interface by ifnet*. */ static struct bridge_iflist * bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp) { struct bridge_iflist *bif; BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp == member_ifp) return (bif); } return (NULL); } static void bridge_delete_member_cb(struct epoch_context *ctx) { struct bridge_iflist *bif; bif = __containerof(ctx, struct bridge_iflist, bif_epoch_ctx); free(bif, M_DEVBUF); } /* * bridge_delete_member: * * Delete the specified member interface. */ static void bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif, int gone) { struct ifnet *ifs = bif->bif_ifp; struct ifnet *fif = NULL; struct bridge_iflist *bifl; BRIDGE_LOCK_ASSERT(sc); if (bif->bif_flags & IFBIF_STP) bstp_disable(&bif->bif_stp); ifs->if_bridge = NULL; CK_LIST_REMOVE(bif, bif_next); /* * If removing the interface that gave the bridge its mac address, set * the mac address of the bridge to the address of the next member, or * to its default address if no members are left. */ if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) { if (CK_LIST_EMPTY(&sc->sc_iflist)) { bcopy(&sc->sc_defaddr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = NULL; } else { bifl = CK_LIST_FIRST(&sc->sc_iflist); fif = bifl->bif_ifp; bcopy(IF_LLADDR(fif), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = fif; } EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } bridge_linkcheck(sc); bridge_mutecaps(sc); /* recalcuate now this interface is removed */ BRIDGE_RT_LOCK(sc); bridge_rtdelete(sc, ifs, IFBF_FLUSHALL); BRIDGE_RT_UNLOCK(sc); KASSERT(bif->bif_addrcnt == 0, ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt)); ifs->if_bridge_output = NULL; ifs->if_bridge_input = NULL; ifs->if_bridge_linkstate = NULL; if (!gone) { switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: /* * Take the interface out of promiscuous mode, but only * if it was promiscuous in the first place. It might * not be if we're in the bridge_ioctl_add() error path. */ if (ifs->if_flags & IFF_PROMISC) (void) ifpromisc(ifs, 0); break; case IFT_GIF: break; default: #ifdef DIAGNOSTIC panic("bridge_delete_member: impossible"); #endif break; } /* reneable any interface capabilities */ bridge_set_ifcap(sc, bif, bif->bif_savedcaps); } bstp_destroy(&bif->bif_stp); /* prepare to free */ NET_EPOCH_CALL(bridge_delete_member_cb, &bif->bif_epoch_ctx); } /* * bridge_delete_span: * * Delete the specified span interface. */ static void bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif) { BRIDGE_LOCK_ASSERT(sc); KASSERT(bif->bif_ifp->if_bridge == NULL, ("%s: not a span interface", __func__)); CK_LIST_REMOVE(bif, bif_next); NET_EPOCH_CALL(bridge_delete_member_cb, &bif->bif_epoch_ctx); } static int bridge_ioctl_add(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif = NULL; struct ifnet *ifs; int error = 0; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (ENOENT); if (ifs->if_ioctl == NULL) /* must be supported */ return (EINVAL); /* If it's in the span list, it can't be a member. */ CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) return (EBUSY); if (ifs->if_bridge == sc) return (EEXIST); if (ifs->if_bridge != NULL) return (EBUSY); switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: case IFT_GIF: /* permitted interface types */ break; default: return (EINVAL); } #ifdef INET6 /* * Two valid inet6 addresses with link-local scope must not be * on the parent interface and the member interfaces at the * same time. This restriction is needed to prevent violation * of link-local scope zone. Attempts to add a member * interface which has inet6 addresses when the parent has * inet6 triggers removal of all inet6 addresses on the member * interface. */ /* Check if the parent interface has a link-local scope addr. */ if (V_allow_llz_overlap == 0 && in6ifa_llaonifp(sc->sc_ifp) != NULL) { /* * If any, remove all inet6 addresses from the member * interfaces. */ CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (in6ifa_llaonifp(bif->bif_ifp)) { in6_ifdetach(bif->bif_ifp); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", bif->bif_ifp->if_xname); } } if (in6ifa_llaonifp(ifs)) { in6_ifdetach(ifs); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", ifs->if_xname); } } #endif /* Allow the first Ethernet member to define the MTU */ if (CK_LIST_EMPTY(&sc->sc_iflist)) sc->sc_ifp->if_mtu = ifs->if_mtu; else if (sc->sc_ifp->if_mtu != ifs->if_mtu) { if_printf(sc->sc_ifp, "invalid MTU: %u(%s) != %u\n", ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu); return (EINVAL); } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); if (bif == NULL) return (ENOMEM); bif->bif_ifp = ifs; bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER; bif->bif_savedcaps = ifs->if_capenable; /* * Assign the interface's MAC address to the bridge if it's the first * member and the MAC address of the bridge has not been changed from * the default randomly generated one. */ if (V_bridge_inherit_mac && CK_LIST_EMPTY(&sc->sc_iflist) && !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr.octet, ETHER_ADDR_LEN)) { bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = ifs; EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } ifs->if_bridge = sc; ifs->if_bridge_output = bridge_output; ifs->if_bridge_input = bridge_input; ifs->if_bridge_linkstate = bridge_linkstate; bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp); /* * XXX: XLOCK HERE!?! * * NOTE: insert_***HEAD*** should be safe for the traversals. */ CK_LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next); /* Set interface capabilities to the intersection set of all members */ bridge_mutecaps(sc); bridge_linkcheck(sc); /* Place the interface into promiscuous mode */ switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: error = ifpromisc(ifs, 1); break; } if (error) bridge_delete_member(sc, bif, 0); return (error); } static int bridge_ioctl_del(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bridge_delete_member(sc, bif, 0); return (0); } static int bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; struct bstp_port *bp; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bp = &bif->bif_stp; req->ifbr_ifsflags = bif->bif_flags; req->ifbr_state = bp->bp_state; req->ifbr_priority = bp->bp_priority; req->ifbr_path_cost = bp->bp_path_cost; req->ifbr_portno = bif->bif_ifp->if_index & 0xfff; req->ifbr_proto = bp->bp_protover; req->ifbr_role = bp->bp_role; req->ifbr_stpflags = bp->bp_flags; req->ifbr_addrcnt = bif->bif_addrcnt; req->ifbr_addrmax = bif->bif_addrmax; req->ifbr_addrexceeded = bif->bif_addrexceeded; /* Copy STP state options as flags */ if (bp->bp_operedge) req->ifbr_ifsflags |= IFBIF_BSTP_EDGE; if (bp->bp_flags & BSTP_PORT_AUTOEDGE) req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE; if (bp->bp_ptp_link) req->ifbr_ifsflags |= IFBIF_BSTP_PTP; if (bp->bp_flags & BSTP_PORT_AUTOPTP) req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP; if (bp->bp_flags & BSTP_PORT_ADMEDGE) req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE; if (bp->bp_flags & BSTP_PORT_ADMCOST) req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST; return (0); } static int bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg) { + struct epoch_tracker et; struct ifbreq *req = arg; struct bridge_iflist *bif; struct bstp_port *bp; int error; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bp = &bif->bif_stp; if (req->ifbr_ifsflags & IFBIF_SPAN) /* SPAN is readonly */ return (EINVAL); + NET_EPOCH_ENTER(et); + if (req->ifbr_ifsflags & IFBIF_STP) { if ((bif->bif_flags & IFBIF_STP) == 0) { error = bstp_enable(&bif->bif_stp); - if (error) + if (error) { + NET_EPOCH_EXIT(et); return (error); + } } } else { if ((bif->bif_flags & IFBIF_STP) != 0) bstp_disable(&bif->bif_stp); } /* Pass on STP flags */ bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0); bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0); bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0); bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0); /* Save the bits relating to the bridge */ bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK; + NET_EPOCH_EXIT(et); + return (0); } static int bridge_ioctl_scache(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; sc->sc_brtmax = param->ifbrp_csize; bridge_rttrim(sc); return (0); } static int bridge_ioctl_gcache(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_csize = sc->sc_brtmax; return (0); } static int bridge_ioctl_gifs(struct bridge_softc *sc, void *arg) { struct ifbifconf *bifc = arg; struct bridge_iflist *bif; struct ifbreq breq; char *buf, *outbuf; int count, buflen, len, error = 0; count = 0; CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) count++; CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) count++; buflen = sizeof(breq) * count; if (bifc->ifbic_len == 0) { bifc->ifbic_len = buflen; return (0); } outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO); if (outbuf == NULL) return (ENOMEM); count = 0; buf = outbuf; len = min(bifc->ifbic_len, buflen); bzero(&breq, sizeof(breq)); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (len < sizeof(breq)) break; strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname, sizeof(breq.ifbr_ifsname)); /* Fill in the ifbreq structure */ error = bridge_ioctl_gifflags(sc, &breq); if (error) break; memcpy(buf, &breq, sizeof(breq)); count++; buf += sizeof(breq); len -= sizeof(breq); } CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) { if (len < sizeof(breq)) break; strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname, sizeof(breq.ifbr_ifsname)); breq.ifbr_ifsflags = bif->bif_flags; breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff; memcpy(buf, &breq, sizeof(breq)); count++; buf += sizeof(breq); len -= sizeof(breq); } bifc->ifbic_len = sizeof(breq) * count; error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_rts(struct bridge_softc *sc, void *arg) { struct ifbaconf *bac = arg; struct bridge_rtnode *brt; struct ifbareq bareq; char *buf, *outbuf; int count, buflen, len, error = 0; if (bac->ifbac_len == 0) return (0); count = 0; CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) count++; buflen = sizeof(bareq) * count; outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO); if (outbuf == NULL) return (ENOMEM); count = 0; buf = outbuf; len = min(bac->ifbac_len, buflen); bzero(&bareq, sizeof(bareq)); CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { if (len < sizeof(bareq)) goto out; strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname, sizeof(bareq.ifba_ifsname)); memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr)); bareq.ifba_vlan = brt->brt_vlan; if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && time_uptime < brt->brt_expire) bareq.ifba_expire = brt->brt_expire - time_uptime; else bareq.ifba_expire = 0; bareq.ifba_flags = brt->brt_flags; memcpy(buf, &bareq, sizeof(bareq)); count++; buf += sizeof(bareq); len -= sizeof(bareq); } out: bac->ifbac_len = sizeof(bareq) * count; error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_saddr(struct bridge_softc *sc, void *arg) { struct ifbareq *req = arg; struct bridge_iflist *bif; struct epoch_tracker et; int error; NET_EPOCH_ENTER(et); bif = bridge_lookup_member(sc, req->ifba_ifsname); if (bif == NULL) { NET_EPOCH_EXIT(et); return (ENOENT); } /* bridge_rtupdate() may acquire the lock. */ error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1, req->ifba_flags); NET_EPOCH_EXIT(et); return (error); } static int bridge_ioctl_sto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; sc->sc_brttimeout = param->ifbrp_ctime; return (0); } static int bridge_ioctl_gto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_ctime = sc->sc_brttimeout; return (0); } static int bridge_ioctl_daddr(struct bridge_softc *sc, void *arg) { struct ifbareq *req = arg; return (bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan)); } static int bridge_ioctl_flush(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; BRIDGE_RT_LOCK(sc); bridge_rtflush(sc, req->ifbr_ifsflags); BRIDGE_RT_UNLOCK(sc); return (0); } static int bridge_ioctl_gpri(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_prio = bs->bs_bridge_priority; return (0); } static int bridge_ioctl_spri(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio)); } static int bridge_ioctl_ght(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_hellotime = bs->bs_bridge_htime >> 8; return (0); } static int bridge_ioctl_sht(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime)); } static int bridge_ioctl_gfd(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8; return (0); } static int bridge_ioctl_sfd(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay)); } static int bridge_ioctl_gma(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_maxage = bs->bs_bridge_max_age >> 8; return (0); } static int bridge_ioctl_sma(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage)); } static int bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority)); } static int bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost)); } static int bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bif->bif_addrmax = req->ifbr_addrmax; return (0); } static int bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif = NULL; struct ifnet *ifs; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (ENOENT); CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) return (EBUSY); if (ifs->if_bridge != NULL) return (EBUSY); switch (ifs->if_type) { case IFT_ETHER: case IFT_GIF: case IFT_L2VLAN: break; default: return (EINVAL); } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); if (bif == NULL) return (ENOMEM); bif->bif_ifp = ifs; bif->bif_flags = IFBIF_SPAN; CK_LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next); return (0); } static int bridge_ioctl_delspan(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; struct ifnet *ifs; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (ENOENT); CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) break; if (bif == NULL) return (ENOENT); bridge_delete_span(sc, bif); return (0); } static int bridge_ioctl_gbparam(struct bridge_softc *sc, void *arg) { struct ifbropreq *req = arg; struct bstp_state *bs = &sc->sc_stp; struct bstp_port *root_port; req->ifbop_maxage = bs->bs_bridge_max_age >> 8; req->ifbop_hellotime = bs->bs_bridge_htime >> 8; req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; root_port = bs->bs_root_port; if (root_port == NULL) req->ifbop_root_port = 0; else req->ifbop_root_port = root_port->bp_ifp->if_index; req->ifbop_holdcount = bs->bs_txholdcount; req->ifbop_priority = bs->bs_bridge_priority; req->ifbop_protocol = bs->bs_protover; req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; return (0); } static int bridge_ioctl_grte(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_cexceeded = sc->sc_brtexceeded; return (0); } static int bridge_ioctl_gifsstp(struct bridge_softc *sc, void *arg) { struct ifbpstpconf *bifstp = arg; struct bridge_iflist *bif; struct bstp_port *bp; struct ifbpstpreq bpreq; char *buf, *outbuf; int count, buflen, len, error = 0; count = 0; CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if ((bif->bif_flags & IFBIF_STP) != 0) count++; } buflen = sizeof(bpreq) * count; if (bifstp->ifbpstp_len == 0) { bifstp->ifbpstp_len = buflen; return (0); } outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO); if (outbuf == NULL) return (ENOMEM); count = 0; buf = outbuf; len = min(bifstp->ifbpstp_len, buflen); bzero(&bpreq, sizeof(bpreq)); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (len < sizeof(bpreq)) break; if ((bif->bif_flags & IFBIF_STP) == 0) continue; bp = &bif->bif_stp; bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; memcpy(buf, &bpreq, sizeof(bpreq)); count++; buf += sizeof(bpreq); len -= sizeof(bpreq); } bifstp->ifbpstp_len = sizeof(bpreq) * count; error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_sproto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto)); } static int bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc)); } /* * bridge_ifdetach: * * Detach an interface from a bridge. Called when a member * interface is detaching. */ static void bridge_ifdetach(void *arg __unused, struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif; if (ifp->if_flags & IFF_RENAMING) return; if (V_bridge_cloner == NULL) { /* * This detach handler can be called after * vnet_bridge_uninit(). Just return in that case. */ return; } /* Check if the interface is a bridge member */ if (sc != NULL) { BRIDGE_LOCK(sc); bif = bridge_lookup_member_if(sc, ifp); if (bif != NULL) bridge_delete_member(sc, bif, 1); BRIDGE_UNLOCK(sc); return; } /* Check if the interface is a span port */ BRIDGE_LIST_LOCK(); LIST_FOREACH(sc, &V_bridge_list, sc_list) { BRIDGE_LOCK(sc); CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifp == bif->bif_ifp) { bridge_delete_span(sc, bif); break; } BRIDGE_UNLOCK(sc); } BRIDGE_LIST_UNLOCK(); } /* * bridge_init: * * Initialize a bridge interface. */ static void bridge_init(void *xsc) { struct bridge_softc *sc = (struct bridge_softc *)xsc; struct ifnet *ifp = sc->sc_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; BRIDGE_LOCK(sc); callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz, bridge_timer, sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */ BRIDGE_UNLOCK(sc); } /* * bridge_stop: * * Stop the bridge interface. */ static void bridge_stop(struct ifnet *ifp, int disable) { struct bridge_softc *sc = ifp->if_softc; BRIDGE_LOCK_ASSERT(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; BRIDGE_RT_LOCK(sc); callout_stop(&sc->sc_brcallout); bstp_stop(&sc->sc_stp); bridge_rtflush(sc, IFBF_FLUSHDYN); BRIDGE_RT_UNLOCK(sc); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } /* * bridge_enqueue: * * Enqueue a packet on a bridge member interface. * */ static int bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) { int len, err = 0; short mflags; struct mbuf *m0; /* We may be sending a fragment so traverse the mbuf */ for (; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = NULL; len = m->m_pkthdr.len; mflags = m->m_flags; /* * If underlying interface can not do VLAN tag insertion itself * then attach a packet tag that holds it. */ if ((m->m_flags & M_VLANTAG) && (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) { m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); if (m == NULL) { if_printf(dst_ifp, "unable to prepend VLAN header\n"); if_inc_counter(dst_ifp, IFCOUNTER_OERRORS, 1); continue; } m->m_flags &= ~M_VLANTAG; } M_ASSERTPKTHDR(m); /* We shouldn't transmit mbuf without pkthdr */ if ((err = dst_ifp->if_transmit(dst_ifp, m))) { m_freem(m0); if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); break; } if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len); if (mflags & M_MCAST) if_inc_counter(sc->sc_ifp, IFCOUNTER_OMCASTS, 1); } return (err); } /* * bridge_dummynet: * * Receive a queued packet from dummynet and pass it on to the output * interface. * * The mbuf has the Ethernet header already attached. */ static void bridge_dummynet(struct mbuf *m, struct ifnet *ifp) { struct bridge_softc *sc; sc = ifp->if_bridge; /* * The packet didnt originate from a member interface. This should only * ever happen if a member interface is removed while packets are * queued for it. */ if (sc == NULL) { m_freem(m); return; } if (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif ) { if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) return; if (m == NULL) return; } bridge_enqueue(sc, ifp, m); } /* * bridge_output: * * Send output from a bridge member interface. This * performs the bridging function for locally originated * packets. * * The mbuf has the Ethernet header already attached. We must * enqueue or free the mbuf before returning. */ static int bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, struct rtentry *rt) { struct ether_header *eh; struct ifnet *bifp, *dst_if; struct bridge_softc *sc; uint16_t vlan; NET_EPOCH_ASSERT(); if (m->m_len < ETHER_HDR_LEN) { m = m_pullup(m, ETHER_HDR_LEN); if (m == NULL) return (0); } eh = mtod(m, struct ether_header *); sc = ifp->if_bridge; vlan = VLANTAGOF(m); bifp = sc->sc_ifp; /* * If bridge is down, but the original output interface is up, * go ahead and send out that interface. Otherwise, the packet * is dropped below. */ if ((bifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { dst_if = ifp; goto sendunicast; } /* * If the packet is a multicast, or we don't know a better way to * get there, send to all interfaces. */ if (ETHER_IS_MULTICAST(eh->ether_dhost)) dst_if = NULL; else dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan); /* Tap any traffic not passing back out the originating interface */ if (dst_if != ifp) ETHER_BPF_MTAP(bifp, m); if (dst_if == NULL) { struct bridge_iflist *bif; struct mbuf *mc; int used = 0; bridge_span(sc, m); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { dst_if = bif->bif_ifp; if (dst_if->if_type == IFT_GIF) continue; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; /* * If this is not the original output interface, * and the interface is participating in spanning * tree, make sure the port is in a state that * allows forwarding. */ if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; if (CK_LIST_NEXT(bif, bif_next) == NULL) { used = 1; mc = m; } else { mc = m_copypacket(m, M_NOWAIT); if (mc == NULL) { if_inc_counter(bifp, IFCOUNTER_OERRORS, 1); continue; } } bridge_enqueue(sc, dst_if, mc); } if (used == 0) m_freem(m); return (0); } sendunicast: /* * XXX Spanning tree consideration here? */ bridge_span(sc, m); if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) { m_freem(m); return (0); } bridge_enqueue(sc, dst_if, m); return (0); } /* * bridge_transmit: * * Do output on a bridge. * */ static int bridge_transmit(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc; struct ether_header *eh; struct ifnet *dst_if; int error = 0; sc = ifp->if_softc; ETHER_BPF_MTAP(ifp, m); eh = mtod(m, struct ether_header *); if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) && (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) { error = bridge_enqueue(sc, dst_if, m); } else bridge_broadcast(sc, ifp, m, 0); return (error); } /* * The ifp->if_qflush entry point for if_bridge(4) is no-op. */ static void bridge_qflush(struct ifnet *ifp __unused) { } /* * bridge_forward: * * The forwarding function of the bridge. * * NOTE: Releases the lock on return. */ static void bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, struct mbuf *m) { struct bridge_iflist *dbif; struct ifnet *src_if, *dst_if, *ifp; struct ether_header *eh; uint16_t vlan; uint8_t *dst; int error; NET_EPOCH_ASSERT(); src_if = m->m_pkthdr.rcvif; ifp = sc->sc_ifp; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); vlan = VLANTAGOF(m); if ((sbif->bif_flags & IFBIF_STP) && sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; eh = mtod(m, struct ether_header *); dst = eh->ether_dhost; /* If the interface is learning, record the address. */ if (sbif->bif_flags & IFBIF_LEARNING) { error = bridge_rtupdate(sc, eh->ether_shost, vlan, sbif, 0, IFBAF_DYNAMIC); /* * If the interface has addresses limits then deny any source * that is not in the cache. */ if (error && sbif->bif_addrmax) goto drop; } if ((sbif->bif_flags & IFBIF_STP) != 0 && sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) goto drop; /* * At this point, the port either doesn't participate * in spanning tree or it is in the forwarding state. */ /* * If the packet is unicast, destined for someone on * "this" side of the bridge, drop it. */ if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) { dst_if = bridge_rtlookup(sc, dst, vlan); if (src_if == dst_if) goto drop; } else { /* * Check if its a reserved multicast address, any address * listed in 802.1D section 7.12.6 may not be forwarded by the * bridge. * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F */ if (dst[0] == 0x01 && dst[1] == 0x80 && dst[2] == 0xc2 && dst[3] == 0x00 && dst[4] == 0x00 && dst[5] <= 0x0f) goto drop; /* ...forward it to all interfaces. */ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); dst_if = NULL; } /* * If we have a destination interface which is a member of our bridge, * OR this is a unicast packet, push it through the bpf(4) machinery. * For broadcast or multicast packets, don't bother because it will * be reinjected into ether_input. We do this before we pass the packets * through the pfil(9) framework, as it is possible that pfil(9) will * drop the packet, or possibly modify it, making it difficult to debug * firewall issues on the bridge. */ if (dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) ETHER_BPF_MTAP(ifp, m); /* run the packet filter */ if (PFIL_HOOKED_IN(V_inet_pfil_head) #ifdef INET6 || PFIL_HOOKED_IN(V_inet6_pfil_head) #endif ) { if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0) return; if (m == NULL) return; } if (dst_if == NULL) { bridge_broadcast(sc, src_if, m, 1); return; } /* * At this point, we're dealing with a unicast frame * going to a different interface. */ if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) goto drop; dbif = bridge_lookup_member_if(sc, dst_if); if (dbif == NULL) /* Not a member of the bridge (anymore?) */ goto drop; /* Private segments can not talk to each other */ if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE) goto drop; if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; if (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif ) { if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0) return; if (m == NULL) return; } bridge_enqueue(sc, dst_if, m); return; drop: m_freem(m); } /* * bridge_input: * * Receive input from a member interface. Queue the packet for * bridging if it is not for us. */ static struct mbuf * bridge_input(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif, *bif2; struct ifnet *bifp; struct ether_header *eh; struct mbuf *mc, *mc2; uint16_t vlan; int error; NET_EPOCH_ASSERT(); if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (m); bifp = sc->sc_ifp; vlan = VLANTAGOF(m); /* * Implement support for bridge monitoring. If this flag has been * set on this interface, discard the packet once we push it through * the bpf(4) machinery, but before we do, increment the byte and * packet counters associated with this interface. */ if ((bifp->if_flags & IFF_MONITOR) != 0) { m->m_pkthdr.rcvif = bifp; ETHER_BPF_MTAP(bifp, m); if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); m_freem(m); return (NULL); } bif = bridge_lookup_member_if(sc, ifp); if (bif == NULL) { return (m); } eh = mtod(m, struct ether_header *); bridge_span(sc, m); if (m->m_flags & (M_BCAST|M_MCAST)) { /* Tap off 802.1D packets; they do not get forwarded. */ if (memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) { bstp_input(&bif->bif_stp, ifp, m); /* consumes mbuf */ return (NULL); } if ((bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { return (m); } /* * Make a deep copy of the packet and enqueue the copy * for bridge processing; return the original packet for * local processing. */ mc = m_dup(m, M_NOWAIT); if (mc == NULL) { return (m); } /* Perform the bridge forwarding function with the copy. */ bridge_forward(sc, bif, mc); /* * Reinject the mbuf as arriving on the bridge so we have a * chance at claiming multicast packets. We can not loop back * here from ether_input as a bridge is never a member of a * bridge. */ KASSERT(bifp->if_bridge == NULL, ("loop created in bridge_input")); mc2 = m_dup(m, M_NOWAIT); if (mc2 != NULL) { /* Keep the layer3 header aligned */ int i = min(mc2->m_pkthdr.len, max_protohdr); mc2 = m_copyup(mc2, i, ETHER_ALIGN); } if (mc2 != NULL) { mc2->m_pkthdr.rcvif = bifp; (*bifp->if_input)(bifp, mc2); } /* Return the original packet for local processing. */ return (m); } if ((bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { return (m); } #if (defined(INET) || defined(INET6)) # define OR_CARP_CHECK_WE_ARE_DST(iface) \ || ((iface)->if_carp \ && (*carp_forus_p)((iface), eh->ether_dhost)) # define OR_CARP_CHECK_WE_ARE_SRC(iface) \ || ((iface)->if_carp \ && (*carp_forus_p)((iface), eh->ether_shost)) #else # define OR_CARP_CHECK_WE_ARE_DST(iface) # define OR_CARP_CHECK_WE_ARE_SRC(iface) #endif #ifdef INET6 # define OR_PFIL_HOOKED_INET6 \ || PFIL_HOOKED_IN(V_inet6_pfil_head) #else # define OR_PFIL_HOOKED_INET6 #endif #define GRAB_OUR_PACKETS(iface) \ if ((iface)->if_type == IFT_GIF) \ continue; \ /* It is destined for us. */ \ if (memcmp(IF_LLADDR((iface)), eh->ether_dhost, ETHER_ADDR_LEN) == 0 \ OR_CARP_CHECK_WE_ARE_DST((iface)) \ ) { \ if (bif->bif_flags & IFBIF_LEARNING) { \ error = bridge_rtupdate(sc, eh->ether_shost, \ vlan, bif, 0, IFBAF_DYNAMIC); \ if (error && bif->bif_addrmax) { \ m_freem(m); \ return (NULL); \ } \ } \ m->m_pkthdr.rcvif = iface; \ if ((iface) == ifp) { \ /* Skip bridge processing... src == dest */ \ return (m); \ } \ /* It's passing over or to the bridge, locally. */ \ ETHER_BPF_MTAP(bifp, m); \ if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1); \ if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); \ /* Filter on the physical interface. */ \ if (V_pfil_local_phys && (PFIL_HOOKED_IN(V_inet_pfil_head) \ OR_PFIL_HOOKED_INET6)) { \ if (bridge_pfil(&m, NULL, ifp, \ PFIL_IN) != 0 || m == NULL) { \ return (NULL); \ } \ } \ if ((iface) != bifp) \ ETHER_BPF_MTAP(iface, m); \ return (m); \ } \ \ /* We just received a packet that we sent out. */ \ if (memcmp(IF_LLADDR((iface)), eh->ether_shost, ETHER_ADDR_LEN) == 0 \ OR_CARP_CHECK_WE_ARE_SRC((iface)) \ ) { \ m_freem(m); \ return (NULL); \ } /* * Unicast. Make sure it's not for the bridge. */ do { GRAB_OUR_PACKETS(bifp) } while (0); /* * Give a chance for ifp at first priority. This will help when the * packet comes through the interface like VLAN's with the same MACs * on several interfaces from the same bridge. This also will save * some CPU cycles in case the destination interface and the input * interface (eq ifp) are the same. */ do { GRAB_OUR_PACKETS(ifp) } while (0); /* Now check the all bridge members. */ CK_LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) { GRAB_OUR_PACKETS(bif2->bif_ifp) } #undef OR_CARP_CHECK_WE_ARE_DST #undef OR_CARP_CHECK_WE_ARE_SRC #undef OR_PFIL_HOOKED_INET6 #undef GRAB_OUR_PACKETS /* Perform the bridge forwarding function. */ bridge_forward(sc, bif, m); return (NULL); } /* * bridge_broadcast: * * Send a frame to all interfaces that are members of * the bridge, except for the one on which the packet * arrived. * * NOTE: Releases the lock on return. */ static void bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, struct mbuf *m, int runfilt) { struct bridge_iflist *dbif, *sbif; struct mbuf *mc; struct ifnet *dst_if; int used = 0, i; NET_EPOCH_ASSERT(); sbif = bridge_lookup_member_if(sc, src_if); /* Filter on the bridge interface before broadcasting */ if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif )) { if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0) return; if (m == NULL) return; } CK_LIST_FOREACH(dbif, &sc->sc_iflist, bif_next) { dst_if = dbif->bif_ifp; if (dst_if == src_if) continue; /* Private segments can not talk to each other */ if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)) continue; if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 && (m->m_flags & (M_BCAST|M_MCAST)) == 0) continue; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; if (CK_LIST_NEXT(dbif, bif_next) == NULL) { mc = m; used = 1; } else { mc = m_dup(m, M_NOWAIT); if (mc == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); continue; } } /* * Filter on the output interface. Pass a NULL bridge interface * pointer so we do not redundantly filter on the bridge for * each interface we broadcast on. */ if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif )) { if (used == 0) { /* Keep the layer3 header aligned */ i = min(mc->m_pkthdr.len, max_protohdr); mc = m_copyup(mc, i, ETHER_ALIGN); if (mc == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); continue; } } if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0) continue; if (mc == NULL) continue; } bridge_enqueue(sc, dst_if, mc); } if (used == 0) m_freem(m); } /* * bridge_span: * * Duplicate a packet out one or more interfaces that are in span mode, * the original mbuf is unmodified. */ static void bridge_span(struct bridge_softc *sc, struct mbuf *m) { struct bridge_iflist *bif; struct ifnet *dst_if; struct mbuf *mc; NET_EPOCH_ASSERT(); if (CK_LIST_EMPTY(&sc->sc_spanlist)) return; CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) { dst_if = bif->bif_ifp; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; mc = m_copypacket(m, M_NOWAIT); if (mc == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); continue; } bridge_enqueue(sc, dst_if, mc); } } /* * bridge_rtupdate: * * Add a bridge routing entry. */ static int bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan, struct bridge_iflist *bif, int setflags, uint8_t flags) { struct bridge_rtnode *brt; int error; BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); /* Check the source address is valid and not multicast. */ if (ETHER_IS_MULTICAST(dst) || (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 && dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) return (EINVAL); /* 802.1p frames map to vlan 1 */ if (vlan == 0) vlan = 1; /* * A route for this destination might already exist. If so, * update it, otherwise create a new one. */ if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) { BRIDGE_RT_LOCK(sc); /* Check again, now that we have the lock. There could have * been a race and we only want to insert this once. */ if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) != NULL) { BRIDGE_RT_UNLOCK(sc); return (0); } if (sc->sc_brtcnt >= sc->sc_brtmax) { sc->sc_brtexceeded++; BRIDGE_RT_UNLOCK(sc); return (ENOSPC); } /* Check per interface address limits (if enabled) */ if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) { bif->bif_addrexceeded++; BRIDGE_RT_UNLOCK(sc); return (ENOSPC); } /* * Allocate a new bridge forwarding node, and * initialize the expiration time and Ethernet * address. */ brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO); if (brt == NULL) { BRIDGE_RT_UNLOCK(sc); return (ENOMEM); } brt->brt_vnet = curvnet; if (bif->bif_flags & IFBIF_STICKY) brt->brt_flags = IFBAF_STICKY; else brt->brt_flags = IFBAF_DYNAMIC; memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN); brt->brt_vlan = vlan; if ((error = bridge_rtnode_insert(sc, brt)) != 0) { uma_zfree(V_bridge_rtnode_zone, brt); BRIDGE_RT_UNLOCK(sc); return (error); } brt->brt_dst = bif; bif->bif_addrcnt++; BRIDGE_RT_UNLOCK(sc); } if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && brt->brt_dst != bif) { BRIDGE_RT_LOCK(sc); brt->brt_dst->bif_addrcnt--; brt->brt_dst = bif; brt->brt_dst->bif_addrcnt++; BRIDGE_RT_UNLOCK(sc); } if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) brt->brt_expire = time_uptime + sc->sc_brttimeout; if (setflags) brt->brt_flags = flags; return (0); } /* * bridge_rtlookup: * * Lookup the destination interface for an address. */ static struct ifnet * bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) { struct bridge_rtnode *brt; NET_EPOCH_ASSERT(); if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) return (NULL); return (brt->brt_ifp); } /* * bridge_rttrim: * * Trim the routine table so that we have a number * of routing entries less than or equal to the * maximum number. */ static void bridge_rttrim(struct bridge_softc *sc) { struct bridge_rtnode *brt, *nbrt; NET_EPOCH_ASSERT(); BRIDGE_RT_LOCK_ASSERT(sc); /* Make sure we actually need to do this. */ if (sc->sc_brtcnt <= sc->sc_brtmax) return; /* Force an aging cycle; this might trim enough addresses. */ bridge_rtage(sc); if (sc->sc_brtcnt <= sc->sc_brtmax) return; CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { bridge_rtnode_destroy(sc, brt); if (sc->sc_brtcnt <= sc->sc_brtmax) return; } } } /* * bridge_timer: * * Aging timer for the bridge. */ static void bridge_timer(void *arg) { struct bridge_softc *sc = arg; BRIDGE_RT_LOCK_ASSERT(sc); /* Destruction of rtnodes requires a proper vnet context */ CURVNET_SET(sc->sc_ifp->if_vnet); bridge_rtage(sc); if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz, bridge_timer, sc); CURVNET_RESTORE(); } /* * bridge_rtage: * * Perform an aging cycle. */ static void bridge_rtage(struct bridge_softc *sc) { struct bridge_rtnode *brt, *nbrt; BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { if (time_uptime >= brt->brt_expire) bridge_rtnode_destroy(sc, brt); } } } /* * bridge_rtflush: * * Remove all dynamic addresses from the bridge. */ static void bridge_rtflush(struct bridge_softc *sc, int full) { struct bridge_rtnode *brt, *nbrt; BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) bridge_rtnode_destroy(sc, brt); } } /* * bridge_rtdaddr: * * Remove an address from the table. */ static int bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) { struct bridge_rtnode *brt; int found = 0; BRIDGE_RT_LOCK(sc); /* * If vlan is zero then we want to delete for all vlans so the lookup * may return more than one. */ while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) { bridge_rtnode_destroy(sc, brt); found = 1; } BRIDGE_RT_UNLOCK(sc); return (found ? 0 : ENOENT); } /* * bridge_rtdelete: * * Delete routes to a speicifc member interface. */ static void bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full) { struct bridge_rtnode *brt, *nbrt; BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (brt->brt_ifp == ifp && (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) bridge_rtnode_destroy(sc, brt); } } /* * bridge_rtable_init: * * Initialize the route table for this bridge. */ static void bridge_rtable_init(struct bridge_softc *sc) { int i; sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE, M_DEVBUF, M_WAITOK); for (i = 0; i < BRIDGE_RTHASH_SIZE; i++) CK_LIST_INIT(&sc->sc_rthash[i]); sc->sc_rthash_key = arc4random(); CK_LIST_INIT(&sc->sc_rtlist); } /* * bridge_rtable_fini: * * Deconstruct the route table for this bridge. */ static void bridge_rtable_fini(struct bridge_softc *sc) { KASSERT(sc->sc_brtcnt == 0, ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt)); free(sc->sc_rthash, M_DEVBUF); } /* * The following hash function is adapted from "Hash Functions" by Bob Jenkins * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). */ #define mix(a, b, c) \ do { \ a -= b; a -= c; a ^= (c >> 13); \ b -= c; b -= a; b ^= (a << 8); \ c -= a; c -= b; c ^= (b >> 13); \ a -= b; a -= c; a ^= (c >> 12); \ b -= c; b -= a; b ^= (a << 16); \ c -= a; c -= b; c ^= (b >> 5); \ a -= b; a -= c; a ^= (c >> 3); \ b -= c; b -= a; b ^= (a << 10); \ c -= a; c -= b; c ^= (b >> 15); \ } while (/*CONSTCOND*/0) static __inline uint32_t bridge_rthash(struct bridge_softc *sc, const uint8_t *addr) { uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key; b += addr[5] << 8; b += addr[4]; a += addr[3] << 24; a += addr[2] << 16; a += addr[1] << 8; a += addr[0]; mix(a, b, c); return (c & BRIDGE_RTHASH_MASK); } #undef mix static int bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b) { int i, d; for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) { d = ((int)a[i]) - ((int)b[i]); } return (d); } /* * bridge_rtnode_lookup: * * Look up a bridge route node for the specified destination. Compare the * vlan id or if zero then just return the first match. */ static struct bridge_rtnode * bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) { struct bridge_rtnode *brt; uint32_t hash; int dir; BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(sc); hash = bridge_rthash(sc, addr); CK_LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) { dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr); if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) return (brt); if (dir > 0) return (NULL); } return (NULL); } /* * bridge_rtnode_insert: * * Insert the specified bridge node into the route table. We * assume the entry is not already in the table. */ static int bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt) { struct bridge_rtnode *lbrt; uint32_t hash; int dir; BRIDGE_RT_LOCK_ASSERT(sc); hash = bridge_rthash(sc, brt->brt_addr); lbrt = CK_LIST_FIRST(&sc->sc_rthash[hash]); if (lbrt == NULL) { CK_LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash); goto out; } do { dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr); if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) return (EEXIST); if (dir > 0) { CK_LIST_INSERT_BEFORE(lbrt, brt, brt_hash); goto out; } if (CK_LIST_NEXT(lbrt, brt_hash) == NULL) { CK_LIST_INSERT_AFTER(lbrt, brt, brt_hash); goto out; } lbrt = CK_LIST_NEXT(lbrt, brt_hash); } while (lbrt != NULL); #ifdef DIAGNOSTIC panic("bridge_rtnode_insert: impossible"); #endif out: CK_LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list); sc->sc_brtcnt++; return (0); } static void bridge_rtnode_destroy_cb(struct epoch_context *ctx) { struct bridge_rtnode *brt; brt = __containerof(ctx, struct bridge_rtnode, brt_epoch_ctx); CURVNET_SET(brt->brt_vnet); uma_zfree(V_bridge_rtnode_zone, brt); CURVNET_RESTORE(); } /* * bridge_rtnode_destroy: * * Destroy a bridge rtnode. */ static void bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt) { BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_REMOVE(brt, brt_hash); CK_LIST_REMOVE(brt, brt_list); sc->sc_brtcnt--; brt->brt_dst->bif_addrcnt--; NET_EPOCH_CALL(bridge_rtnode_destroy_cb, &brt->brt_epoch_ctx); } /* * bridge_rtable_expire: * * Set the expiry time for all routes on an interface. */ static void bridge_rtable_expire(struct ifnet *ifp, int age) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_rtnode *brt; CURVNET_SET(ifp->if_vnet); BRIDGE_RT_LOCK(sc); /* * If the age is zero then flush, otherwise set all the expiry times to * age for the interface */ if (age == 0) bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN); else { CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { /* Cap the expiry time to 'age' */ if (brt->brt_ifp == ifp && brt->brt_expire > time_uptime + age && (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) brt->brt_expire = time_uptime + age; } } BRIDGE_RT_UNLOCK(sc); CURVNET_RESTORE(); } /* * bridge_state_change: * * Callback from the bridgestp code when a port changes states. */ static void bridge_state_change(struct ifnet *ifp, int state) { struct bridge_softc *sc = ifp->if_bridge; static const char *stpstates[] = { "disabled", "listening", "learning", "forwarding", "blocking", "discarding" }; CURVNET_SET(ifp->if_vnet); if (V_log_stp) log(LOG_NOTICE, "%s: state changed to %s on %s\n", sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname); CURVNET_RESTORE(); } /* * Send bridge packets through pfil if they are one of the types pfil can deal * with, or if they are ARP or REVARP. (pfil will pass ARP and REVARP without * question.) If *bifp or *ifp are NULL then packet filtering is skipped for * that interface. */ static int bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) { int snap, error, i, hlen; struct ether_header *eh1, eh2; struct ip *ip; struct llc llc1; u_int16_t ether_type; pfil_return_t rv; snap = 0; error = -1; /* Default error if not error == 0 */ #if 0 /* we may return with the IP fields swapped, ensure its not shared */ KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__)); #endif if (V_pfil_bridge == 0 && V_pfil_member == 0 && V_pfil_ipfw == 0) return (0); /* filtering is disabled */ i = min((*mp)->m_pkthdr.len, max_protohdr); if ((*mp)->m_len < i) { *mp = m_pullup(*mp, i); if (*mp == NULL) { printf("%s: m_pullup failed\n", __func__); return (-1); } } eh1 = mtod(*mp, struct ether_header *); ether_type = ntohs(eh1->ether_type); /* * Check for SNAP/LLC. */ if (ether_type < ETHERMTU) { struct llc *llc2 = (struct llc *)(eh1 + 1); if ((*mp)->m_len >= ETHER_HDR_LEN + 8 && llc2->llc_dsap == LLC_SNAP_LSAP && llc2->llc_ssap == LLC_SNAP_LSAP && llc2->llc_control == LLC_UI) { ether_type = htons(llc2->llc_un.type_snap.ether_type); snap = 1; } } /* * If we're trying to filter bridge traffic, don't look at anything * other than IP and ARP traffic. If the filter doesn't understand * IPv6, don't allow IPv6 through the bridge either. This is lame * since if we really wanted, say, an AppleTalk filter, we are hosed, * but of course we don't have an AppleTalk filter to begin with. * (Note that since pfil doesn't understand ARP it will pass *ALL* * ARP traffic.) */ switch (ether_type) { case ETHERTYPE_ARP: case ETHERTYPE_REVARP: if (V_pfil_ipfw_arp == 0) return (0); /* Automatically pass */ break; case ETHERTYPE_IP: #ifdef INET6 case ETHERTYPE_IPV6: #endif /* INET6 */ break; default: /* * Check to see if the user wants to pass non-ip * packets, these will not be checked by pfil(9) and * passed unconditionally so the default is to drop. */ if (V_pfil_onlyip) goto bad; } /* Run the packet through pfil before stripping link headers */ if (PFIL_HOOKED_OUT(V_link_pfil_head) && V_pfil_ipfw != 0 && dir == PFIL_OUT && ifp != NULL) { switch (pfil_run_hooks(V_link_pfil_head, mp, ifp, dir, NULL)) { case PFIL_DROPPED: return (EACCES); case PFIL_CONSUMED: return (0); } } /* Strip off the Ethernet header and keep a copy. */ m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2); m_adj(*mp, ETHER_HDR_LEN); /* Strip off snap header, if present */ if (snap) { m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1); m_adj(*mp, sizeof(struct llc)); } /* * Check the IP header for alignment and errors */ if (dir == PFIL_IN) { switch (ether_type) { case ETHERTYPE_IP: error = bridge_ip_checkbasic(mp); break; #ifdef INET6 case ETHERTYPE_IPV6: error = bridge_ip6_checkbasic(mp); break; #endif /* INET6 */ default: error = 0; } if (error) goto bad; } error = 0; /* * Run the packet through pfil */ rv = PFIL_PASS; switch (ether_type) { case ETHERTYPE_IP: /* * Run pfil on the member interface and the bridge, both can * be skipped by clearing pfil_member or pfil_bridge. * * Keep the order: * in_if -> bridge_if -> out_if */ if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv = pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) != PFIL_PASS) break; if (V_pfil_member && ifp != NULL && (rv = pfil_run_hooks(V_inet_pfil_head, mp, ifp, dir, NULL)) != PFIL_PASS) break; if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv = pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) != PFIL_PASS) break; /* check if we need to fragment the packet */ /* bridge_fragment generates a mbuf chain of packets */ /* that already include eth headers */ if (V_pfil_member && ifp != NULL && dir == PFIL_OUT) { i = (*mp)->m_pkthdr.len; if (i > ifp->if_mtu) { error = bridge_fragment(ifp, mp, &eh2, snap, &llc1); return (error); } } /* Recalculate the ip checksum. */ ip = mtod(*mp, struct ip *); hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) goto bad; if (hlen > (*mp)->m_len) { if ((*mp = m_pullup(*mp, hlen)) == NULL) goto bad; ip = mtod(*mp, struct ip *); if (ip == NULL) goto bad; } ip->ip_sum = 0; if (hlen == sizeof(struct ip)) ip->ip_sum = in_cksum_hdr(ip); else ip->ip_sum = in_cksum(*mp, hlen); break; #ifdef INET6 case ETHERTYPE_IPV6: if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv = pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) != PFIL_PASS) break; if (V_pfil_member && ifp != NULL && (rv = pfil_run_hooks(V_inet6_pfil_head, mp, ifp, dir, NULL)) != PFIL_PASS) break; if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv = pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) != PFIL_PASS) break; break; #endif } switch (rv) { case PFIL_CONSUMED: return (0); case PFIL_DROPPED: return (EACCES); default: break; } error = -1; /* * Finally, put everything back the way it was and return */ if (snap) { M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT); if (*mp == NULL) return (error); bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc)); } M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT); if (*mp == NULL) return (error); bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN); return (0); bad: m_freem(*mp); *mp = NULL; return (error); } /* * Perform basic checks on header size since * pfil assumes ip_input has already processed * it for it. Cut-and-pasted from ip_input.c. * Given how simple the IPv6 version is, * does the IPv4 version really need to be * this complicated? * * XXX Should we update ipstat here, or not? * XXX Right now we update ipstat but not * XXX csum_counter. */ static int bridge_ip_checkbasic(struct mbuf **mp) { struct mbuf *m = *mp; struct ip *ip; int len, hlen; u_short sum; if (*mp == NULL) return (-1); if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) { if ((m = m_copyup(m, sizeof(struct ip), (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ KMOD_IPSTAT_INC(ips_toosmall); goto bad; } } else if (__predict_false(m->m_len < sizeof (struct ip))) { if ((m = m_pullup(m, sizeof (struct ip))) == NULL) { KMOD_IPSTAT_INC(ips_toosmall); goto bad; } } ip = mtod(m, struct ip *); if (ip == NULL) goto bad; if (ip->ip_v != IPVERSION) { KMOD_IPSTAT_INC(ips_badvers); goto bad; } hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { /* minimum header length */ KMOD_IPSTAT_INC(ips_badhlen); goto bad; } if (hlen > m->m_len) { if ((m = m_pullup(m, hlen)) == NULL) { KMOD_IPSTAT_INC(ips_badhlen); goto bad; } ip = mtod(m, struct ip *); if (ip == NULL) goto bad; } if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); } else { if (hlen == sizeof(struct ip)) { sum = in_cksum_hdr(ip); } else { sum = in_cksum(m, hlen); } } if (sum) { KMOD_IPSTAT_INC(ips_badsum); goto bad; } /* Retrieve the packet length. */ len = ntohs(ip->ip_len); /* * Check for additional length bogosity */ if (len < hlen) { KMOD_IPSTAT_INC(ips_badlen); goto bad; } /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Drop packet if shorter than we expect. */ if (m->m_pkthdr.len < len) { KMOD_IPSTAT_INC(ips_tooshort); goto bad; } /* Checks out, proceed */ *mp = m; return (0); bad: *mp = m; return (-1); } #ifdef INET6 /* * Same as above, but for IPv6. * Cut-and-pasted from ip6_input.c. * XXX Should we update ip6stat, or not? */ static int bridge_ip6_checkbasic(struct mbuf **mp) { struct mbuf *m = *mp; struct ip6_hdr *ip6; /* * If the IPv6 header is not aligned, slurp it up into a new * mbuf with space for link headers, in the event we forward * it. Otherwise, if it is aligned, make sure the entire base * IPv6 header is in the first mbuf of the chain. */ if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) { struct ifnet *inifp = m->m_pkthdr.rcvif; if ((m = m_copyup(m, sizeof(struct ip6_hdr), (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ IP6STAT_INC(ip6s_toosmall); in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto bad; } } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) { struct ifnet *inifp = m->m_pkthdr.rcvif; if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { IP6STAT_INC(ip6s_toosmall); in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto bad; } } ip6 = mtod(m, struct ip6_hdr *); if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { IP6STAT_INC(ip6s_badvers); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); goto bad; } /* Checks out, proceed */ *mp = m; return (0); bad: *mp = m; return (-1); } #endif /* INET6 */ /* * bridge_fragment: * * Fragment mbuf chain in multiple packets and prepend ethernet header. */ static int bridge_fragment(struct ifnet *ifp, struct mbuf **mp, struct ether_header *eh, int snap, struct llc *llc) { struct mbuf *m = *mp, *nextpkt = NULL, *mprev = NULL, *mcur = NULL; struct ip *ip; int error = -1; if (m->m_len < sizeof(struct ip) && (m = m_pullup(m, sizeof(struct ip))) == NULL) goto dropit; ip = mtod(m, struct ip *); m->m_pkthdr.csum_flags |= CSUM_IP; error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist); if (error) goto dropit; /* * Walk the chain and re-add the Ethernet header for * each mbuf packet. */ for (mcur = m; mcur; mcur = mcur->m_nextpkt) { nextpkt = mcur->m_nextpkt; mcur->m_nextpkt = NULL; if (snap) { M_PREPEND(mcur, sizeof(struct llc), M_NOWAIT); if (mcur == NULL) { error = ENOBUFS; if (mprev != NULL) mprev->m_nextpkt = nextpkt; goto dropit; } bcopy(llc, mtod(mcur, caddr_t),sizeof(struct llc)); } M_PREPEND(mcur, ETHER_HDR_LEN, M_NOWAIT); if (mcur == NULL) { error = ENOBUFS; if (mprev != NULL) mprev->m_nextpkt = nextpkt; goto dropit; } bcopy(eh, mtod(mcur, caddr_t), ETHER_HDR_LEN); /* * The previous two M_PREPEND could have inserted one or two * mbufs in front so we have to update the previous packet's * m_nextpkt. */ mcur->m_nextpkt = nextpkt; if (mprev != NULL) mprev->m_nextpkt = mcur; else { /* The first mbuf in the original chain needs to be * updated. */ *mp = mcur; } mprev = mcur; } KMOD_IPSTAT_INC(ips_fragmented); return (error); dropit: for (mcur = *mp; mcur; mcur = m) { /* droping the full packet chain */ m = mcur->m_nextpkt; m_freem(mcur); } return (error); } static void bridge_linkstate(struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif; struct epoch_tracker et; NET_EPOCH_ENTER(et); bif = bridge_lookup_member_if(sc, ifp); if (bif == NULL) { NET_EPOCH_EXIT(et); return; } bridge_linkcheck(sc); bstp_linkstate(&bif->bif_stp); NET_EPOCH_EXIT(et); } static void bridge_linkcheck(struct bridge_softc *sc) { struct bridge_iflist *bif; int new_link, hasls; BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); new_link = LINK_STATE_DOWN; hasls = 0; /* Our link is considered up if at least one of our ports is active */ CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp->if_capabilities & IFCAP_LINKSTATE) hasls++; if (bif->bif_ifp->if_link_state == LINK_STATE_UP) { new_link = LINK_STATE_UP; break; } } if (!CK_LIST_EMPTY(&sc->sc_iflist) && !hasls) { /* If no interfaces support link-state then we default to up */ new_link = LINK_STATE_UP; } if_link_state_change(sc->sc_ifp, new_link); }