Index: head/sys/net/ethernet.h =================================================================== --- head/sys/net/ethernet.h (revision 345138) +++ head/sys/net/ethernet.h (revision 345139) @@ -1,451 +1,452 @@ /* * Fundamental constants relating to ethernet. * * $FreeBSD$ * */ #ifndef _NET_ETHERNET_H_ #define _NET_ETHERNET_H_ /* * Some basic Ethernet constants. */ #define ETHER_ADDR_LEN 6 /* length of an Ethernet address */ #define ETHER_TYPE_LEN 2 /* length of the Ethernet type field */ #define ETHER_CRC_LEN 4 /* length of the Ethernet CRC */ #define ETHER_HDR_LEN (ETHER_ADDR_LEN*2+ETHER_TYPE_LEN) #define ETHER_MIN_LEN 64 /* minimum frame len, including CRC */ #define ETHER_MAX_LEN 1518 /* maximum frame len, including CRC */ #define ETHER_MAX_LEN_JUMBO 9018 /* max jumbo frame len, including CRC */ #define ETHER_VLAN_ENCAP_LEN 4 /* len of 802.1Q VLAN encapsulation */ /* * Mbuf adjust factor to force 32-bit alignment of IP header. * Drivers should do m_adj(m, ETHER_ALIGN) when setting up a * receive so the upper layers get the IP header properly aligned * past the 14-byte Ethernet header. */ #define ETHER_ALIGN 2 /* driver adjust for IP hdr alignment */ /* * Compute the maximum frame size based on ethertype (i.e. possible * encapsulation) and whether or not an FCS is present. */ #define ETHER_MAX_FRAME(ifp, etype, hasfcs) \ ((ifp)->if_mtu + ETHER_HDR_LEN + \ ((hasfcs) ? ETHER_CRC_LEN : 0) + \ (((etype) == ETHERTYPE_VLAN) ? ETHER_VLAN_ENCAP_LEN : 0)) /* * Ethernet-specific mbuf flags. */ #define M_HASFCS M_PROTO5 /* FCS included at end of frame */ /* * Ethernet CRC32 polynomials (big- and little-endian verions). */ #define ETHER_CRC_POLY_LE 0xedb88320 #define ETHER_CRC_POLY_BE 0x04c11db6 /* * A macro to validate a length with */ #define ETHER_IS_VALID_LEN(foo) \ ((foo) >= ETHER_MIN_LEN && (foo) <= ETHER_MAX_LEN) /* * Structure of a 10Mb/s Ethernet header. */ struct ether_header { u_char ether_dhost[ETHER_ADDR_LEN]; u_char ether_shost[ETHER_ADDR_LEN]; u_short ether_type; } __packed; /* * Structure of a 48-bit Ethernet address. */ struct ether_addr { u_char octet[ETHER_ADDR_LEN]; } __packed; #define ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */ #define ETHER_IS_BROADCAST(addr) \ (((addr)[0] & (addr)[1] & (addr)[2] & \ (addr)[3] & (addr)[4] & (addr)[5]) == 0xff) /* * 802.1q Virtual LAN header. */ struct ether_vlan_header { uint8_t evl_dhost[ETHER_ADDR_LEN]; uint8_t evl_shost[ETHER_ADDR_LEN]; uint16_t evl_encap_proto; uint16_t evl_tag; uint16_t evl_proto; } __packed; #define EVL_VLID_MASK 0x0FFF #define EVL_PRI_MASK 0xE000 #define EVL_VLANOFTAG(tag) ((tag) & EVL_VLID_MASK) #define EVL_PRIOFTAG(tag) (((tag) >> 13) & 7) #define EVL_CFIOFTAG(tag) (((tag) >> 12) & 1) #define EVL_MAKETAG(vlid, pri, cfi) \ ((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK)) /* * NOTE: 0x0000-0x05DC (0..1500) are generally IEEE 802.3 length fields. * However, there are some conflicts. */ #define ETHERTYPE_8023 0x0004 /* IEEE 802.3 packet */ /* 0x0101 .. 0x1FF Experimental */ #define ETHERTYPE_PUP 0x0200 /* Xerox PUP protocol - see 0A00 */ #define ETHERTYPE_PUPAT 0x0200 /* PUP Address Translation - see 0A01 */ #define ETHERTYPE_SPRITE 0x0500 /* ??? */ /* 0x0400 Nixdorf */ #define ETHERTYPE_NS 0x0600 /* XNS */ #define ETHERTYPE_NSAT 0x0601 /* XNS Address Translation (3Mb only) */ #define ETHERTYPE_DLOG1 0x0660 /* DLOG (?) */ #define ETHERTYPE_DLOG2 0x0661 /* DLOG (?) */ #define ETHERTYPE_IP 0x0800 /* IP protocol */ #define ETHERTYPE_X75 0x0801 /* X.75 Internet */ #define ETHERTYPE_NBS 0x0802 /* NBS Internet */ #define ETHERTYPE_ECMA 0x0803 /* ECMA Internet */ #define ETHERTYPE_CHAOS 0x0804 /* CHAOSnet */ #define ETHERTYPE_X25 0x0805 /* X.25 Level 3 */ #define ETHERTYPE_ARP 0x0806 /* Address resolution protocol */ #define ETHERTYPE_NSCOMPAT 0x0807 /* XNS Compatibility */ #define ETHERTYPE_FRARP 0x0808 /* Frame Relay ARP (RFC1701) */ /* 0x081C Symbolics Private */ /* 0x0888 - 0x088A Xyplex */ #define ETHERTYPE_UBDEBUG 0x0900 /* Ungermann-Bass network debugger */ #define ETHERTYPE_IEEEPUP 0x0A00 /* Xerox IEEE802.3 PUP */ #define ETHERTYPE_IEEEPUPAT 0x0A01 /* Xerox IEEE802.3 PUP Address Translation */ #define ETHERTYPE_VINES 0x0BAD /* Banyan VINES */ #define ETHERTYPE_VINESLOOP 0x0BAE /* Banyan VINES Loopback */ #define ETHERTYPE_VINESECHO 0x0BAF /* Banyan VINES Echo */ /* 0x1000 - 0x100F Berkeley Trailer */ /* * The ETHERTYPE_NTRAILER packet types starting at ETHERTYPE_TRAIL have * (type-ETHERTYPE_TRAIL)*512 bytes of data followed * by an ETHER type (as given above) and then the (variable-length) header. */ #define ETHERTYPE_TRAIL 0x1000 /* Trailer packet */ #define ETHERTYPE_NTRAILER 16 #define ETHERTYPE_DCA 0x1234 /* DCA - Multicast */ #define ETHERTYPE_VALID 0x1600 /* VALID system protocol */ #define ETHERTYPE_DOGFIGHT 0x1989 /* Artificial Horizons ("Aviator" dogfight simulator [on Sun]) */ #define ETHERTYPE_RCL 0x1995 /* Datapoint Corporation (RCL lan protocol) */ /* The following 3C0x types are unregistered: */ #define ETHERTYPE_NBPVCD 0x3C00 /* 3Com NBP virtual circuit datagram (like XNS SPP) not registered */ #define ETHERTYPE_NBPSCD 0x3C01 /* 3Com NBP System control datagram not registered */ #define ETHERTYPE_NBPCREQ 0x3C02 /* 3Com NBP Connect request (virtual cct) not registered */ #define ETHERTYPE_NBPCRSP 0x3C03 /* 3Com NBP Connect response not registered */ #define ETHERTYPE_NBPCC 0x3C04 /* 3Com NBP Connect complete not registered */ #define ETHERTYPE_NBPCLREQ 0x3C05 /* 3Com NBP Close request (virtual cct) not registered */ #define ETHERTYPE_NBPCLRSP 0x3C06 /* 3Com NBP Close response not registered */ #define ETHERTYPE_NBPDG 0x3C07 /* 3Com NBP Datagram (like XNS IDP) not registered */ #define ETHERTYPE_NBPDGB 0x3C08 /* 3Com NBP Datagram broadcast not registered */ #define ETHERTYPE_NBPCLAIM 0x3C09 /* 3Com NBP Claim NetBIOS name not registered */ #define ETHERTYPE_NBPDLTE 0x3C0A /* 3Com NBP Delete NetBIOS name not registered */ #define ETHERTYPE_NBPRAS 0x3C0B /* 3Com NBP Remote adaptor status request not registered */ #define ETHERTYPE_NBPRAR 0x3C0C /* 3Com NBP Remote adaptor response not registered */ #define ETHERTYPE_NBPRST 0x3C0D /* 3Com NBP Reset not registered */ #define ETHERTYPE_PCS 0x4242 /* PCS Basic Block Protocol */ #define ETHERTYPE_IMLBLDIAG 0x424C /* Information Modes Little Big LAN diagnostic */ #define ETHERTYPE_DIDDLE 0x4321 /* THD - Diddle */ #define ETHERTYPE_IMLBL 0x4C42 /* Information Modes Little Big LAN */ #define ETHERTYPE_SIMNET 0x5208 /* BBN Simnet Private */ #define ETHERTYPE_DECEXPER 0x6000 /* DEC Unassigned, experimental */ #define ETHERTYPE_MOPDL 0x6001 /* DEC MOP dump/load */ #define ETHERTYPE_MOPRC 0x6002 /* DEC MOP remote console */ #define ETHERTYPE_DECnet 0x6003 /* DEC DECNET Phase IV route */ #define ETHERTYPE_DN ETHERTYPE_DECnet /* libpcap, tcpdump */ #define ETHERTYPE_LAT 0x6004 /* DEC LAT */ #define ETHERTYPE_DECDIAG 0x6005 /* DEC diagnostic protocol (at interface initialization?) */ #define ETHERTYPE_DECCUST 0x6006 /* DEC customer protocol */ #define ETHERTYPE_SCA 0x6007 /* DEC LAVC, SCA */ #define ETHERTYPE_AMBER 0x6008 /* DEC AMBER */ #define ETHERTYPE_DECMUMPS 0x6009 /* DEC MUMPS */ /* 0x6010 - 0x6014 3Com Corporation */ #define ETHERTYPE_TRANSETHER 0x6558 /* Trans Ether Bridging (RFC1701)*/ #define ETHERTYPE_RAWFR 0x6559 /* Raw Frame Relay (RFC1701) */ #define ETHERTYPE_UBDL 0x7000 /* Ungermann-Bass download */ #define ETHERTYPE_UBNIU 0x7001 /* Ungermann-Bass NIUs */ #define ETHERTYPE_UBDIAGLOOP 0x7002 /* Ungermann-Bass diagnostic/loopback */ #define ETHERTYPE_UBNMC 0x7003 /* Ungermann-Bass ??? (NMC to/from UB Bridge) */ #define ETHERTYPE_UBBST 0x7005 /* Ungermann-Bass Bridge Spanning Tree */ #define ETHERTYPE_OS9 0x7007 /* OS/9 Microware */ #define ETHERTYPE_OS9NET 0x7009 /* OS/9 Net? */ /* 0x7020 - 0x7029 LRT (England) (now Sintrom) */ #define ETHERTYPE_RACAL 0x7030 /* Racal-Interlan */ #define ETHERTYPE_PRIMENTS 0x7031 /* Prime NTS (Network Terminal Service) */ #define ETHERTYPE_CABLETRON 0x7034 /* Cabletron */ #define ETHERTYPE_CRONUSVLN 0x8003 /* Cronus VLN */ #define ETHERTYPE_CRONUS 0x8004 /* Cronus Direct */ #define ETHERTYPE_HP 0x8005 /* HP Probe */ #define ETHERTYPE_NESTAR 0x8006 /* Nestar */ #define ETHERTYPE_ATTSTANFORD 0x8008 /* AT&T/Stanford (local use) */ #define ETHERTYPE_EXCELAN 0x8010 /* Excelan */ #define ETHERTYPE_SG_DIAG 0x8013 /* SGI diagnostic type */ #define ETHERTYPE_SG_NETGAMES 0x8014 /* SGI network games */ #define ETHERTYPE_SG_RESV 0x8015 /* SGI reserved type */ #define ETHERTYPE_SG_BOUNCE 0x8016 /* SGI bounce server */ #define ETHERTYPE_APOLLODOMAIN 0x8019 /* Apollo DOMAIN */ #define ETHERTYPE_TYMSHARE 0x802E /* Tymeshare */ #define ETHERTYPE_TIGAN 0x802F /* Tigan, Inc. */ #define ETHERTYPE_REVARP 0x8035 /* Reverse addr resolution protocol */ #define ETHERTYPE_AEONIC 0x8036 /* Aeonic Systems */ #define ETHERTYPE_IPXNEW 0x8037 /* IPX (Novell Netware?) */ #define ETHERTYPE_LANBRIDGE 0x8038 /* DEC LANBridge */ #define ETHERTYPE_DSMD 0x8039 /* DEC DSM/DDP */ #define ETHERTYPE_ARGONAUT 0x803A /* DEC Argonaut Console */ #define ETHERTYPE_VAXELN 0x803B /* DEC VAXELN */ #define ETHERTYPE_DECDNS 0x803C /* DEC DNS Naming Service */ #define ETHERTYPE_ENCRYPT 0x803D /* DEC Ethernet Encryption */ #define ETHERTYPE_DECDTS 0x803E /* DEC Distributed Time Service */ #define ETHERTYPE_DECLTM 0x803F /* DEC LAN Traffic Monitor */ #define ETHERTYPE_DECNETBIOS 0x8040 /* DEC PATHWORKS DECnet NETBIOS Emulation */ #define ETHERTYPE_DECLAST 0x8041 /* DEC Local Area System Transport */ /* 0x8042 DEC Unassigned */ #define ETHERTYPE_PLANNING 0x8044 /* Planning Research Corp. */ /* 0x8046 - 0x8047 AT&T */ #define ETHERTYPE_DECAM 0x8048 /* DEC Availability Manager for Distributed Systems DECamds (but someone at DEC says not) */ #define ETHERTYPE_EXPERDATA 0x8049 /* ExperData */ #define ETHERTYPE_VEXP 0x805B /* Stanford V Kernel exp. */ #define ETHERTYPE_VPROD 0x805C /* Stanford V Kernel prod. */ #define ETHERTYPE_ES 0x805D /* Evans & Sutherland */ #define ETHERTYPE_LITTLE 0x8060 /* Little Machines */ #define ETHERTYPE_COUNTERPOINT 0x8062 /* Counterpoint Computers */ /* 0x8065 - 0x8066 Univ. of Mass @ Amherst */ #define ETHERTYPE_VEECO 0x8067 /* Veeco Integrated Auto. */ #define ETHERTYPE_GENDYN 0x8068 /* General Dynamics */ #define ETHERTYPE_ATT 0x8069 /* AT&T */ #define ETHERTYPE_AUTOPHON 0x806A /* Autophon */ #define ETHERTYPE_COMDESIGN 0x806C /* ComDesign */ #define ETHERTYPE_COMPUGRAPHIC 0x806D /* Compugraphic Corporation */ /* 0x806E - 0x8077 Landmark Graphics Corp. */ #define ETHERTYPE_MATRA 0x807A /* Matra */ #define ETHERTYPE_DDE 0x807B /* Dansk Data Elektronik */ #define ETHERTYPE_MERIT 0x807C /* Merit Internodal (or Univ of Michigan?) */ /* 0x807D - 0x807F Vitalink Communications */ #define ETHERTYPE_VLTLMAN 0x8080 /* Vitalink TransLAN III Management */ /* 0x8081 - 0x8083 Counterpoint Computers */ /* 0x8088 - 0x808A Xyplex */ #define ETHERTYPE_ATALK 0x809B /* AppleTalk */ #define ETHERTYPE_AT ETHERTYPE_ATALK /* old NetBSD */ #define ETHERTYPE_APPLETALK ETHERTYPE_ATALK /* HP-UX */ /* 0x809C - 0x809E Datability */ #define ETHERTYPE_SPIDER 0x809F /* Spider Systems Ltd. */ /* 0x80A3 Nixdorf */ /* 0x80A4 - 0x80B3 Siemens Gammasonics Inc. */ /* 0x80C0 - 0x80C3 DCA (Digital Comm. Assoc.) Data Exchange Cluster */ /* 0x80C4 - 0x80C5 Banyan Systems */ #define ETHERTYPE_PACER 0x80C6 /* Pacer Software */ #define ETHERTYPE_APPLITEK 0x80C7 /* Applitek Corporation */ /* 0x80C8 - 0x80CC Intergraph Corporation */ /* 0x80CD - 0x80CE Harris Corporation */ /* 0x80CF - 0x80D2 Taylor Instrument */ /* 0x80D3 - 0x80D4 Rosemount Corporation */ #define ETHERTYPE_SNA 0x80D5 /* IBM SNA Services over Ethernet */ #define ETHERTYPE_VARIAN 0x80DD /* Varian Associates */ /* 0x80DE - 0x80DF TRFS (Integrated Solutions Transparent Remote File System) */ /* 0x80E0 - 0x80E3 Allen-Bradley */ /* 0x80E4 - 0x80F0 Datability */ #define ETHERTYPE_RETIX 0x80F2 /* Retix */ #define ETHERTYPE_AARP 0x80F3 /* AppleTalk AARP */ /* 0x80F4 - 0x80F5 Kinetics */ #define ETHERTYPE_APOLLO 0x80F7 /* Apollo Computer */ #define ETHERTYPE_VLAN 0x8100 /* IEEE 802.1Q VLAN tagging (XXX conflicts) */ /* 0x80FF - 0x8101 Wellfleet Communications (XXX conflicts) */ #define ETHERTYPE_BOFL 0x8102 /* Wellfleet; BOFL (Breath OF Life) pkts [every 5-10 secs.] */ #define ETHERTYPE_WELLFLEET 0x8103 /* Wellfleet Communications */ /* 0x8107 - 0x8109 Symbolics Private */ #define ETHERTYPE_TALARIS 0x812B /* Talaris */ #define ETHERTYPE_WATERLOO 0x8130 /* Waterloo Microsystems Inc. (XXX which?) */ #define ETHERTYPE_HAYES 0x8130 /* Hayes Microcomputers (XXX which?) */ #define ETHERTYPE_VGLAB 0x8131 /* VG Laboratory Systems */ /* 0x8132 - 0x8137 Bridge Communications */ #define ETHERTYPE_IPX 0x8137 /* Novell (old) NetWare IPX (ECONFIG E option) */ #define ETHERTYPE_NOVELL 0x8138 /* Novell, Inc. */ /* 0x8139 - 0x813D KTI */ #define ETHERTYPE_MUMPS 0x813F /* M/MUMPS data sharing */ #define ETHERTYPE_AMOEBA 0x8145 /* Vrije Universiteit (NL) Amoeba 4 RPC (obsolete) */ #define ETHERTYPE_FLIP 0x8146 /* Vrije Universiteit (NL) FLIP (Fast Local Internet Protocol) */ #define ETHERTYPE_VURESERVED 0x8147 /* Vrije Universiteit (NL) [reserved] */ #define ETHERTYPE_LOGICRAFT 0x8148 /* Logicraft */ #define ETHERTYPE_NCD 0x8149 /* Network Computing Devices */ #define ETHERTYPE_ALPHA 0x814A /* Alpha Micro */ #define ETHERTYPE_SNMP 0x814C /* SNMP over Ethernet (see RFC1089) */ /* 0x814D - 0x814E BIIN */ #define ETHERTYPE_TEC 0x814F /* Technically Elite Concepts */ #define ETHERTYPE_RATIONAL 0x8150 /* Rational Corp */ /* 0x8151 - 0x8153 Qualcomm */ /* 0x815C - 0x815E Computer Protocol Pty Ltd */ /* 0x8164 - 0x8166 Charles River Data Systems */ #define ETHERTYPE_XTP 0x817D /* Protocol Engines XTP */ #define ETHERTYPE_SGITW 0x817E /* SGI/Time Warner prop. */ #define ETHERTYPE_HIPPI_FP 0x8180 /* HIPPI-FP encapsulation */ #define ETHERTYPE_STP 0x8181 /* Scheduled Transfer STP, HIPPI-ST */ /* 0x8182 - 0x8183 Reserved for HIPPI-6400 */ /* 0x8184 - 0x818C SGI prop. */ #define ETHERTYPE_MOTOROLA 0x818D /* Motorola */ #define ETHERTYPE_NETBEUI 0x8191 /* PowerLAN NetBIOS/NetBEUI (PC) */ /* 0x819A - 0x81A3 RAD Network Devices */ /* 0x81B7 - 0x81B9 Xyplex */ /* 0x81CC - 0x81D5 Apricot Computers */ /* 0x81D6 - 0x81DD Artisoft Lantastic */ /* 0x81E6 - 0x81EF Polygon */ /* 0x81F0 - 0x81F2 Comsat Labs */ /* 0x81F3 - 0x81F5 SAIC */ /* 0x81F6 - 0x81F8 VG Analytical */ /* 0x8203 - 0x8205 QNX Software Systems Ltd. */ /* 0x8221 - 0x8222 Ascom Banking Systems */ /* 0x823E - 0x8240 Advanced Encryption Systems */ /* 0x8263 - 0x826A Charles River Data Systems */ /* 0x827F - 0x8282 Athena Programming */ /* 0x829A - 0x829B Inst Ind Info Tech */ /* 0x829C - 0x82AB Taurus Controls */ /* 0x82AC - 0x8693 Walker Richer & Quinn */ #define ETHERTYPE_ACCTON 0x8390 /* Accton Technologies (unregistered) */ #define ETHERTYPE_TALARISMC 0x852B /* Talaris multicast */ #define ETHERTYPE_KALPANA 0x8582 /* Kalpana */ /* 0x8694 - 0x869D Idea Courier */ /* 0x869E - 0x86A1 Computer Network Tech */ /* 0x86A3 - 0x86AC Gateway Communications */ #define ETHERTYPE_SECTRA 0x86DB /* SECTRA */ #define ETHERTYPE_IPV6 0x86DD /* IP protocol version 6 */ #define ETHERTYPE_DELTACON 0x86DE /* Delta Controls */ #define ETHERTYPE_ATOMIC 0x86DF /* ATOMIC */ /* 0x86E0 - 0x86EF Landis & Gyr Powers */ /* 0x8700 - 0x8710 Motorola */ #define ETHERTYPE_RDP 0x8739 /* Control Technology Inc. RDP Without IP */ #define ETHERTYPE_MICP 0x873A /* Control Technology Inc. Mcast Industrial Ctrl Proto. */ /* 0x873B - 0x873C Control Technology Inc. Proprietary */ #define ETHERTYPE_TCPCOMP 0x876B /* TCP/IP Compression (RFC1701) */ #define ETHERTYPE_IPAS 0x876C /* IP Autonomous Systems (RFC1701) */ #define ETHERTYPE_SECUREDATA 0x876D /* Secure Data (RFC1701) */ #define ETHERTYPE_FLOWCONTROL 0x8808 /* 802.3x flow control packet */ #define ETHERTYPE_SLOW 0x8809 /* 802.3ad link aggregation (LACP) */ #define ETHERTYPE_PPP 0x880B /* PPP (obsolete by PPPoE) */ #define ETHERTYPE_HITACHI 0x8820 /* Hitachi Cable (Optoelectronic Systems Laboratory) */ #define ETHERTYPE_TEST 0x8822 /* Network Conformance Testing */ #define ETHERTYPE_MPLS 0x8847 /* MPLS Unicast */ #define ETHERTYPE_MPLS_MCAST 0x8848 /* MPLS Multicast */ #define ETHERTYPE_AXIS 0x8856 /* Axis Communications AB proprietary bootstrap/config */ #define ETHERTYPE_PPPOEDISC 0x8863 /* PPP Over Ethernet Discovery Stage */ #define ETHERTYPE_PPPOE 0x8864 /* PPP Over Ethernet Session Stage */ #define ETHERTYPE_LANPROBE 0x8888 /* HP LanProbe test? */ #define ETHERTYPE_PAE 0x888e /* EAPOL PAE/802.1x */ #define ETHERTYPE_QINQ 0x88A8 /* 802.1ad VLAN stacking */ #define ETHERTYPE_LOOPBACK 0x9000 /* Loopback: used to test interfaces */ #define ETHERTYPE_LBACK ETHERTYPE_LOOPBACK /* DEC MOP loopback */ #define ETHERTYPE_XNSSM 0x9001 /* 3Com (Formerly Bridge Communications), XNS Systems Management */ #define ETHERTYPE_TCPSM 0x9002 /* 3Com (Formerly Bridge Communications), TCP/IP Systems Management */ #define ETHERTYPE_BCLOOP 0x9003 /* 3Com (Formerly Bridge Communications), loopback detection */ #define ETHERTYPE_DEBNI 0xAAAA /* DECNET? Used by VAX 6220 DEBNI */ #define ETHERTYPE_SONIX 0xFAF5 /* Sonix Arpeggio */ #define ETHERTYPE_VITAL 0xFF00 /* BBN VITAL-LanBridge cache wakeups */ /* 0xFF00 - 0xFFOF ISC Bunker Ramo */ #define ETHERTYPE_MAX 0xFFFF /* Maximum valid ethernet type, reserved */ /* * The ETHERTYPE_NTRAILER packet types starting at ETHERTYPE_TRAIL have * (type-ETHERTYPE_TRAIL)*512 bytes of data followed * by an ETHER type (as given above) and then the (variable-length) header. */ #define ETHERTYPE_TRAIL 0x1000 /* Trailer packet */ #define ETHERTYPE_NTRAILER 16 #define ETHERMTU (ETHER_MAX_LEN-ETHER_HDR_LEN-ETHER_CRC_LEN) #define ETHERMIN (ETHER_MIN_LEN-ETHER_HDR_LEN-ETHER_CRC_LEN) #define ETHERMTU_JUMBO (ETHER_MAX_LEN_JUMBO - ETHER_HDR_LEN - ETHER_CRC_LEN) /* * The ETHER_BPF_MTAP macro should be used by drivers which support hardware * offload for VLAN tag processing. It will check the mbuf to see if it has * M_VLANTAG set, and if it does, will pass the packet along to * ether_vlan_mtap. This function will re-insert VLAN tags for the duration * of the tap, so they show up properly for network analyzers. */ #define ETHER_BPF_MTAP(_ifp, _m) do { \ if (bpf_peers_present((_ifp)->if_bpf)) { \ M_ASSERTVALID(_m); \ if (((_m)->m_flags & M_VLANTAG) != 0) \ ether_vlan_mtap((_ifp)->if_bpf, (_m), NULL, 0); \ else \ bpf_mtap((_ifp)->if_bpf, (_m)); \ } \ } while (0) /* * Names for 802.1q priorities ("802.1p"). Notice that in this scheme, * (0 < 1), allowing default 0-tagged traffic to take priority over background * tagged traffic. */ #define IEEE8021Q_PCP_BK 1 /* Background (lowest) */ #define IEEE8021Q_PCP_BE 0 /* Best effort (default) */ #define IEEE8021Q_PCP_EE 2 /* Excellent effort */ #define IEEE8021Q_PCP_CA 3 /* Critical applications */ #define IEEE8021Q_PCP_VI 4 /* Video, < 100ms latency */ #define IEEE8021Q_PCP_VO 5 /* Video, < 10ms latency */ #define IEEE8021Q_PCP_IC 6 /* Internetwork control */ #define IEEE8021Q_PCP_NC 7 /* Network control (highest) */ #ifdef _KERNEL struct ifnet; struct mbuf; struct route; struct sockaddr; struct bpf_if; extern uint32_t ether_crc32_le(const uint8_t *, size_t); extern uint32_t ether_crc32_be(const uint8_t *, size_t); extern void ether_demux(struct ifnet *, struct mbuf *); extern void ether_ifattach(struct ifnet *, const u_int8_t *); extern void ether_ifdetach(struct ifnet *); extern int ether_ioctl(struct ifnet *, u_long, caddr_t); extern int ether_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); extern int ether_output_frame(struct ifnet *, struct mbuf *); extern char *ether_sprintf(const u_int8_t *); void ether_vlan_mtap(struct bpf_if *, struct mbuf *, void *, u_int); struct mbuf *ether_vlanencap(struct mbuf *, uint16_t); bool ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p, uint16_t vid, uint8_t pcp); +void ether_fakeaddr(struct ether_addr *hwaddr); #ifdef _SYS_EVENTHANDLER_H_ /* new ethernet interface attached event */ typedef void (*ether_ifattach_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ether_ifattach_event, ether_ifattach_event_handler_t); #endif #else /* _KERNEL */ #include /* * Ethernet address conversion/parsing routines. */ __BEGIN_DECLS struct ether_addr *ether_aton(const char *); struct ether_addr *ether_aton_r(const char *, struct ether_addr *); int ether_hostton(const char *, struct ether_addr *); int ether_line(const char *, struct ether_addr *, char *); char *ether_ntoa(const struct ether_addr *); char *ether_ntoa_r(const struct ether_addr *, char *); int ether_ntohost(char *, const struct ether_addr *); __END_DECLS #endif /* !_KERNEL */ #endif /* !_NET_ETHERNET_H_ */ Index: head/sys/net/if_bridge.c =================================================================== --- head/sys/net/if_bridge.c (revision 345138) +++ head/sys/net/if_bridge.c (revision 345139) @@ -1,3606 +1,3604 @@ /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright 2001 Wasabi Systems, Inc. * All rights reserved. * * Written by Jason R. Thorpe for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp */ /* * Network interface bridge support. * * TODO: * * - Currently only supports Ethernet-like interfaces (Ethernet, * 802.11, VLANs on Ethernet, etc.) Figure out a nice way * to bridge other types of interfaces (maybe consider * heterogeneous bridges). */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include /* for net/if.h */ #include #include /* string functions */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #endif #if defined(INET) || defined(INET6) #include #endif #include #include #include #include #include #include #include /* * Size of the route hash table. Must be a power of two. */ #ifndef BRIDGE_RTHASH_SIZE #define BRIDGE_RTHASH_SIZE 1024 #endif #define BRIDGE_RTHASH_MASK (BRIDGE_RTHASH_SIZE - 1) /* * Default maximum number of addresses to cache. */ #ifndef BRIDGE_RTABLE_MAX #define BRIDGE_RTABLE_MAX 2000 #endif /* * Timeout (in seconds) for entries learned dynamically. */ #ifndef BRIDGE_RTABLE_TIMEOUT #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */ #endif /* * Number of seconds between walks of the route list. */ #ifndef BRIDGE_RTABLE_PRUNE_PERIOD #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60) #endif /* * List of capabilities to possibly mask on the member interface. */ #define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\ IFCAP_TXCSUM_IPV6) /* * List of capabilities to strip */ #define BRIDGE_IFCAPS_STRIP IFCAP_LRO /* * Bridge interface list entry. */ struct bridge_iflist { LIST_ENTRY(bridge_iflist) bif_next; struct ifnet *bif_ifp; /* member if */ struct bstp_port bif_stp; /* STP state */ uint32_t bif_flags; /* member if flags */ int bif_savedcaps; /* saved capabilities */ uint32_t bif_addrmax; /* max # of addresses */ uint32_t bif_addrcnt; /* cur. # of addresses */ uint32_t bif_addrexceeded;/* # of address violations */ }; /* * Bridge route node. */ struct bridge_rtnode { LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */ LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */ struct bridge_iflist *brt_dst; /* destination if */ unsigned long brt_expire; /* expiration time */ uint8_t brt_flags; /* address flags */ uint8_t brt_addr[ETHER_ADDR_LEN]; uint16_t brt_vlan; /* vlan id */ }; #define brt_ifp brt_dst->bif_ifp /* * Software state for each bridge. */ struct bridge_softc { struct ifnet *sc_ifp; /* make this an interface */ LIST_ENTRY(bridge_softc) sc_list; struct mtx sc_mtx; struct cv sc_cv; uint32_t sc_brtmax; /* max # of addresses */ uint32_t sc_brtcnt; /* cur. # of addresses */ uint32_t sc_brttimeout; /* rt timeout in seconds */ struct callout sc_brcallout; /* bridge callout */ uint32_t sc_iflist_ref; /* refcount for sc_iflist */ uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */ LIST_HEAD(, bridge_iflist) sc_iflist; /* member interface list */ LIST_HEAD(, bridge_rtnode) *sc_rthash; /* our forwarding table */ LIST_HEAD(, bridge_rtnode) sc_rtlist; /* list version of above */ uint32_t sc_rthash_key; /* key for hash */ LIST_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */ struct bstp_state sc_stp; /* STP state */ uint32_t sc_brtexceeded; /* # of cache drops */ struct ifnet *sc_ifaddr; /* member mac copied from */ - u_char sc_defaddr[6]; /* Default MAC address */ + struct ether_addr sc_defaddr; /* Default MAC address */ }; VNET_DEFINE_STATIC(struct mtx, bridge_list_mtx); #define V_bridge_list_mtx VNET(bridge_list_mtx) static eventhandler_tag bridge_detach_cookie; int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD; uma_zone_t bridge_rtnode_zone; static int bridge_clone_create(struct if_clone *, int, caddr_t); static void bridge_clone_destroy(struct ifnet *); static int bridge_ioctl(struct ifnet *, u_long, caddr_t); static void bridge_mutecaps(struct bridge_softc *); static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *, int); static void bridge_ifdetach(void *arg __unused, struct ifnet *); static void bridge_init(void *); static void bridge_dummynet(struct mbuf *, struct ifnet *); static void bridge_stop(struct ifnet *, int); static int bridge_transmit(struct ifnet *, struct mbuf *); static void bridge_qflush(struct ifnet *); static struct mbuf *bridge_input(struct ifnet *, struct mbuf *); static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static int bridge_enqueue(struct bridge_softc *, struct ifnet *, struct mbuf *); static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int); static void bridge_forward(struct bridge_softc *, struct bridge_iflist *, struct mbuf *m); static void bridge_timer(void *); static void bridge_broadcast(struct bridge_softc *, struct ifnet *, struct mbuf *, int); static void bridge_span(struct bridge_softc *, struct mbuf *); static int bridge_rtupdate(struct bridge_softc *, const uint8_t *, uint16_t, struct bridge_iflist *, int, uint8_t); static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *, uint16_t); static void bridge_rttrim(struct bridge_softc *); static void bridge_rtage(struct bridge_softc *); static void bridge_rtflush(struct bridge_softc *, int); static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *, uint16_t); static void bridge_rtable_init(struct bridge_softc *); static void bridge_rtable_fini(struct bridge_softc *); static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *); static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *, const uint8_t *, uint16_t); static int bridge_rtnode_insert(struct bridge_softc *, struct bridge_rtnode *); static void bridge_rtnode_destroy(struct bridge_softc *, struct bridge_rtnode *); static void bridge_rtable_expire(struct ifnet *, int); static void bridge_state_change(struct ifnet *, int); static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *, const char *name); static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *, struct ifnet *ifp); static void bridge_delete_member(struct bridge_softc *, struct bridge_iflist *, int); static void bridge_delete_span(struct bridge_softc *, struct bridge_iflist *); static int bridge_ioctl_add(struct bridge_softc *, void *); static int bridge_ioctl_del(struct bridge_softc *, void *); static int bridge_ioctl_gifflags(struct bridge_softc *, void *); static int bridge_ioctl_sifflags(struct bridge_softc *, void *); static int bridge_ioctl_scache(struct bridge_softc *, void *); static int bridge_ioctl_gcache(struct bridge_softc *, void *); static int bridge_ioctl_gifs(struct bridge_softc *, void *); static int bridge_ioctl_rts(struct bridge_softc *, void *); static int bridge_ioctl_saddr(struct bridge_softc *, void *); static int bridge_ioctl_sto(struct bridge_softc *, void *); static int bridge_ioctl_gto(struct bridge_softc *, void *); static int bridge_ioctl_daddr(struct bridge_softc *, void *); static int bridge_ioctl_flush(struct bridge_softc *, void *); static int bridge_ioctl_gpri(struct bridge_softc *, void *); static int bridge_ioctl_spri(struct bridge_softc *, void *); static int bridge_ioctl_ght(struct bridge_softc *, void *); static int bridge_ioctl_sht(struct bridge_softc *, void *); static int bridge_ioctl_gfd(struct bridge_softc *, void *); static int bridge_ioctl_sfd(struct bridge_softc *, void *); static int bridge_ioctl_gma(struct bridge_softc *, void *); static int bridge_ioctl_sma(struct bridge_softc *, void *); static int bridge_ioctl_sifprio(struct bridge_softc *, void *); static int bridge_ioctl_sifcost(struct bridge_softc *, void *); static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *); static int bridge_ioctl_addspan(struct bridge_softc *, void *); static int bridge_ioctl_delspan(struct bridge_softc *, void *); static int bridge_ioctl_gbparam(struct bridge_softc *, void *); static int bridge_ioctl_grte(struct bridge_softc *, void *); static int bridge_ioctl_gifsstp(struct bridge_softc *, void *); static int bridge_ioctl_sproto(struct bridge_softc *, void *); static int bridge_ioctl_stxhc(struct bridge_softc *, void *); static int bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *, int); static int bridge_ip_checkbasic(struct mbuf **mp); #ifdef INET6 static int bridge_ip6_checkbasic(struct mbuf **mp); #endif /* INET6 */ static int bridge_fragment(struct ifnet *, struct mbuf **mp, struct ether_header *, int, struct llc *); static void bridge_linkstate(struct ifnet *ifp); static void bridge_linkcheck(struct bridge_softc *sc); /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */ #define VLANTAGOF(_m) \ (_m->m_flags & M_VLANTAG) ? EVL_VLANOFTAG(_m->m_pkthdr.ether_vtag) : 1 static struct bstp_cb_ops bridge_ops = { .bcb_state = bridge_state_change, .bcb_rtage = bridge_rtable_expire }; SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge"); /* only pass IP[46] packets when pfil is enabled */ VNET_DEFINE_STATIC(int, pfil_onlyip) = 1; #define V_pfil_onlyip VNET(pfil_onlyip) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0, "Only pass IP packets when pfil is enabled"); /* run pfil hooks on the bridge interface */ VNET_DEFINE_STATIC(int, pfil_bridge) = 1; #define V_pfil_bridge VNET(pfil_bridge) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0, "Packet filter on the bridge interface"); /* layer2 filter with ipfw */ VNET_DEFINE_STATIC(int, pfil_ipfw); #define V_pfil_ipfw VNET(pfil_ipfw) /* layer2 ARP filter with ipfw */ VNET_DEFINE_STATIC(int, pfil_ipfw_arp); #define V_pfil_ipfw_arp VNET(pfil_ipfw_arp) SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0, "Filter ARP packets through IPFW layer2"); /* run pfil hooks on the member interface */ VNET_DEFINE_STATIC(int, pfil_member) = 1; #define V_pfil_member VNET(pfil_member) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0, "Packet filter on the member interface"); /* run pfil hooks on the physical interface for locally destined packets */ VNET_DEFINE_STATIC(int, pfil_local_phys); #define V_pfil_local_phys VNET(pfil_local_phys) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0, "Packet filter on the physical interface for locally destined packets"); /* log STP state changes */ VNET_DEFINE_STATIC(int, log_stp); #define V_log_stp VNET(log_stp) SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0, "Log STP state changes"); /* share MAC with first bridge member */ VNET_DEFINE_STATIC(int, bridge_inherit_mac); #define V_bridge_inherit_mac VNET(bridge_inherit_mac) SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0, "Inherit MAC address from the first bridge member"); VNET_DEFINE_STATIC(int, allow_llz_overlap) = 0; #define V_allow_llz_overlap VNET(allow_llz_overlap) SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope " "zones of a bridge interface and the member interfaces"); struct bridge_control { int (*bc_func)(struct bridge_softc *, void *); int bc_argsize; int bc_flags; }; #define BC_F_COPYIN 0x01 /* copy arguments in */ #define BC_F_COPYOUT 0x02 /* copy arguments out */ #define BC_F_SUSER 0x04 /* do super-user check */ const struct bridge_control bridge_control_table[] = { { bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_addspan, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_delspan, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gbparam, sizeof(struct ifbropreq), BC_F_COPYOUT }, { bridge_ioctl_grte, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_gifsstp, sizeof(struct ifbpstpconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_sproto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_stxhc, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifmaxaddr, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, }; const int bridge_control_table_size = nitems(bridge_control_table); VNET_DEFINE_STATIC(LIST_HEAD(, bridge_softc), bridge_list); #define V_bridge_list VNET(bridge_list) #define BRIDGE_LIST_LOCK_INIT(x) mtx_init(&V_bridge_list_mtx, \ "if_bridge list", NULL, MTX_DEF) #define BRIDGE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_bridge_list_mtx) #define BRIDGE_LIST_LOCK(x) mtx_lock(&V_bridge_list_mtx) #define BRIDGE_LIST_UNLOCK(x) mtx_unlock(&V_bridge_list_mtx) VNET_DEFINE_STATIC(struct if_clone *, bridge_cloner); #define V_bridge_cloner VNET(bridge_cloner) static const char bridge_name[] = "bridge"; static void vnet_bridge_init(const void *unused __unused) { BRIDGE_LIST_LOCK_INIT(); LIST_INIT(&V_bridge_list); V_bridge_cloner = if_clone_simple(bridge_name, bridge_clone_create, bridge_clone_destroy, 0); } VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_bridge_init, NULL); static void vnet_bridge_uninit(const void *unused __unused) { if_clone_detach(V_bridge_cloner); V_bridge_cloner = NULL; BRIDGE_LIST_LOCK_DESTROY(); } VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_bridge_uninit, NULL); static int bridge_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: bridge_rtnode_zone = uma_zcreate("bridge_rtnode", sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); bridge_dn_p = bridge_dummynet; bridge_detach_cookie = EVENTHANDLER_REGISTER( ifnet_departure_event, bridge_ifdetach, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, bridge_detach_cookie); uma_zdestroy(bridge_rtnode_zone); bridge_dn_p = NULL; break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t bridge_mod = { "if_bridge", bridge_modevent, 0 }; DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_bridge, 1); MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1); /* * handler for net.link.bridge.ipfw */ static int sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS) { int enable = V_pfil_ipfw; int error; error = sysctl_handle_int(oidp, &enable, 0, req); enable &= 1; if (enable != V_pfil_ipfw) { V_pfil_ipfw = enable; /* * Disable pfil so that ipfw doesnt run twice, if the user * really wants both then they can re-enable pfil_bridge and/or * pfil_member. Also allow non-ip packets as ipfw can filter by * layer2 type. */ if (V_pfil_ipfw) { V_pfil_onlyip = 0; V_pfil_bridge = 0; V_pfil_member = 0; } } return (error); } SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw), 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW"); /* * bridge_clone_create: * * Create a new bridge instance. */ static int bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct bridge_softc *sc, *sc2; struct ifnet *bifp, *ifp; int fb, retry; unsigned long hostid; sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { free(sc, M_DEVBUF); return (ENOSPC); } BRIDGE_LOCK_INIT(sc); sc->sc_brtmax = BRIDGE_RTABLE_MAX; sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT; /* Initialize our routing table. */ bridge_rtable_init(sc); callout_init_mtx(&sc->sc_brcallout, &sc->sc_mtx, 0); LIST_INIT(&sc->sc_iflist); LIST_INIT(&sc->sc_spanlist); ifp->if_softc = sc; if_initname(ifp, bridge_name, unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = bridge_ioctl; ifp->if_transmit = bridge_transmit; ifp->if_qflush = bridge_qflush; ifp->if_init = bridge_init; ifp->if_type = IFT_BRIDGE; /* * Generate an ethernet address with a locally administered address. * * Since we are using random ethernet addresses for the bridge, it is * possible that we might have address collisions, so make sure that * this hardware address isn't already in use on another bridge. * The first try uses the hostid and falls back to arc4rand(). */ fb = 0; getcredhostid(curthread->td_ucred, &hostid); do { if (fb || hostid == 0) { - arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1); - sc->sc_defaddr[0] &= ~1;/* clear multicast bit */ - sc->sc_defaddr[0] |= 2; /* set the LAA bit */ + ether_fakeaddr(&sc->sc_defaddr); } else { - sc->sc_defaddr[0] = 0x2; - sc->sc_defaddr[1] = (hostid >> 24) & 0xff; - sc->sc_defaddr[2] = (hostid >> 16) & 0xff; - sc->sc_defaddr[3] = (hostid >> 8 ) & 0xff; - sc->sc_defaddr[4] = hostid & 0xff; - sc->sc_defaddr[5] = ifp->if_dunit & 0xff; + sc->sc_defaddr.octet[0] = 0x2; + sc->sc_defaddr.octet[1] = (hostid >> 24) & 0xff; + sc->sc_defaddr.octet[2] = (hostid >> 16) & 0xff; + sc->sc_defaddr.octet[3] = (hostid >> 8 ) & 0xff; + sc->sc_defaddr.octet[4] = hostid & 0xff; + sc->sc_defaddr.octet[5] = ifp->if_dunit & 0xff; } fb = 1; retry = 0; BRIDGE_LIST_LOCK(); LIST_FOREACH(sc2, &V_bridge_list, sc_list) { bifp = sc2->sc_ifp; - if (memcmp(sc->sc_defaddr, + if (memcmp(sc->sc_defaddr.octet, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) { retry = 1; break; } } BRIDGE_LIST_UNLOCK(); } while (retry == 1); bstp_attach(&sc->sc_stp, &bridge_ops); - ether_ifattach(ifp, sc->sc_defaddr); + ether_ifattach(ifp, sc->sc_defaddr.octet); /* Now undo some of the damage... */ ifp->if_baudrate = 0; ifp->if_type = IFT_BRIDGE; BRIDGE_LIST_LOCK(); LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list); BRIDGE_LIST_UNLOCK(); return (0); } /* * bridge_clone_destroy: * * Destroy a bridge instance. */ static void bridge_clone_destroy(struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_softc; struct bridge_iflist *bif; BRIDGE_LOCK(sc); bridge_stop(ifp, 1); ifp->if_flags &= ~IFF_UP; while ((bif = LIST_FIRST(&sc->sc_iflist)) != NULL) bridge_delete_member(sc, bif, 0); while ((bif = LIST_FIRST(&sc->sc_spanlist)) != NULL) { bridge_delete_span(sc, bif); } BRIDGE_UNLOCK(sc); callout_drain(&sc->sc_brcallout); BRIDGE_LIST_LOCK(); LIST_REMOVE(sc, sc_list); BRIDGE_LIST_UNLOCK(); bstp_detach(&sc->sc_stp); ether_ifdetach(ifp); if_free(ifp); /* Tear down the routing table. */ bridge_rtable_fini(sc); BRIDGE_LOCK_DESTROY(sc); free(sc, M_DEVBUF); } /* * bridge_ioctl: * * Handle a control request from the operator. */ static int bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct bridge_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct bridge_iflist *bif; struct thread *td = curthread; union { struct ifbreq ifbreq; struct ifbifconf ifbifconf; struct ifbareq ifbareq; struct ifbaconf ifbaconf; struct ifbrparam ifbrparam; struct ifbropreq ifbropreq; } args; struct ifdrv *ifd = (struct ifdrv *) data; const struct bridge_control *bc; int error = 0; switch (cmd) { case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCGDRVSPEC: case SIOCSDRVSPEC: if (ifd->ifd_cmd >= bridge_control_table_size) { error = EINVAL; break; } bc = &bridge_control_table[ifd->ifd_cmd]; if (cmd == SIOCGDRVSPEC && (bc->bc_flags & BC_F_COPYOUT) == 0) { error = EINVAL; break; } else if (cmd == SIOCSDRVSPEC && (bc->bc_flags & BC_F_COPYOUT) != 0) { error = EINVAL; break; } if (bc->bc_flags & BC_F_SUSER) { error = priv_check(td, PRIV_NET_BRIDGE); if (error) break; } if (ifd->ifd_len != bc->bc_argsize || ifd->ifd_len > sizeof(args)) { error = EINVAL; break; } bzero(&args, sizeof(args)); if (bc->bc_flags & BC_F_COPYIN) { error = copyin(ifd->ifd_data, &args, ifd->ifd_len); if (error) break; } BRIDGE_LOCK(sc); error = (*bc->bc_func)(sc, &args); BRIDGE_UNLOCK(sc); if (error) break; if (bc->bc_flags & BC_F_COPYOUT) error = copyout(&args, ifd->ifd_data, ifd->ifd_len); break; case SIOCSIFFLAGS: if (!(ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked down and it is running, * then stop and disable it. */ BRIDGE_LOCK(sc); bridge_stop(ifp, 1); BRIDGE_UNLOCK(sc); } else if ((ifp->if_flags & IFF_UP) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked up and it is stopped, then * start it. */ (*ifp->if_init)(sc); } break; case SIOCSIFMTU: if (ifr->ifr_mtu < 576) { error = EINVAL; break; } if (LIST_EMPTY(&sc->sc_iflist)) { sc->sc_ifp->if_mtu = ifr->ifr_mtu; break; } BRIDGE_LOCK(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp->if_mtu != ifr->ifr_mtu) { log(LOG_NOTICE, "%s: invalid MTU: %u(%s)" " != %d\n", sc->sc_ifp->if_xname, bif->bif_ifp->if_mtu, bif->bif_ifp->if_xname, ifr->ifr_mtu); error = EINVAL; break; } } if (!error) sc->sc_ifp->if_mtu = ifr->ifr_mtu; BRIDGE_UNLOCK(sc); break; default: /* * drop the lock as ether_ioctl() will call bridge_start() and * cause the lock to be recursed. */ error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* * bridge_mutecaps: * * Clear or restore unwanted capabilities on the member interface */ static void bridge_mutecaps(struct bridge_softc *sc) { struct bridge_iflist *bif; int enabled, mask; /* Initial bitmask of capabilities to test */ mask = BRIDGE_IFCAPS_MASK; LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { /* Every member must support it or its disabled */ mask &= bif->bif_savedcaps; } BRIDGE_XLOCK(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { enabled = bif->bif_ifp->if_capenable; enabled &= ~BRIDGE_IFCAPS_STRIP; /* strip off mask bits and enable them again if allowed */ enabled &= ~BRIDGE_IFCAPS_MASK; enabled |= mask; BRIDGE_UNLOCK(sc); bridge_set_ifcap(sc, bif, enabled); BRIDGE_LOCK(sc); } BRIDGE_XDROP(sc); } static void bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set) { struct ifnet *ifp = bif->bif_ifp; struct ifreq ifr; int error, mask, stuck; BRIDGE_UNLOCK_ASSERT(sc); bzero(&ifr, sizeof(ifr)); ifr.ifr_reqcap = set; if (ifp->if_capenable != set) { error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr); if (error) if_printf(sc->sc_ifp, "error setting capabilities on %s: %d\n", ifp->if_xname, error); mask = BRIDGE_IFCAPS_MASK | BRIDGE_IFCAPS_STRIP; stuck = ifp->if_capenable & mask & ~set; if (stuck != 0) if_printf(sc->sc_ifp, "can't disable some capabilities on %s: 0x%x\n", ifp->if_xname, stuck); } } /* * bridge_lookup_member: * * Lookup a bridge member interface. */ static struct bridge_iflist * bridge_lookup_member(struct bridge_softc *sc, const char *name) { struct bridge_iflist *bif; struct ifnet *ifp; BRIDGE_LOCK_ASSERT(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { ifp = bif->bif_ifp; if (strcmp(ifp->if_xname, name) == 0) return (bif); } return (NULL); } /* * bridge_lookup_member_if: * * Lookup a bridge member interface by ifnet*. */ static struct bridge_iflist * bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp) { struct bridge_iflist *bif; BRIDGE_LOCK_ASSERT(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp == member_ifp) return (bif); } return (NULL); } /* * bridge_delete_member: * * Delete the specified member interface. */ static void bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif, int gone) { struct ifnet *ifs = bif->bif_ifp; struct ifnet *fif = NULL; BRIDGE_LOCK_ASSERT(sc); if (bif->bif_flags & IFBIF_STP) bstp_disable(&bif->bif_stp); ifs->if_bridge = NULL; BRIDGE_XLOCK(sc); LIST_REMOVE(bif, bif_next); BRIDGE_XDROP(sc); /* * If removing the interface that gave the bridge its mac address, set * the mac address of the bridge to the address of the next member, or * to its default address if no members are left. */ if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) { if (LIST_EMPTY(&sc->sc_iflist)) { - bcopy(sc->sc_defaddr, + bcopy(&sc->sc_defaddr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = NULL; } else { fif = LIST_FIRST(&sc->sc_iflist)->bif_ifp; bcopy(IF_LLADDR(fif), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = fif; } EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } bridge_linkcheck(sc); bridge_mutecaps(sc); /* recalcuate now this interface is removed */ bridge_rtdelete(sc, ifs, IFBF_FLUSHALL); KASSERT(bif->bif_addrcnt == 0, ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt)); ifs->if_bridge_output = NULL; ifs->if_bridge_input = NULL; ifs->if_bridge_linkstate = NULL; BRIDGE_UNLOCK(sc); if (!gone) { switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: /* * Take the interface out of promiscuous mode, but only * if it was promiscuous in the first place. It might * not be if we're in the bridge_ioctl_add() error path. */ if (ifs->if_flags & IFF_PROMISC) (void) ifpromisc(ifs, 0); break; case IFT_GIF: break; default: #ifdef DIAGNOSTIC panic("bridge_delete_member: impossible"); #endif break; } /* reneable any interface capabilities */ bridge_set_ifcap(sc, bif, bif->bif_savedcaps); } bstp_destroy(&bif->bif_stp); /* prepare to free */ BRIDGE_LOCK(sc); free(bif, M_DEVBUF); } /* * bridge_delete_span: * * Delete the specified span interface. */ static void bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif) { BRIDGE_LOCK_ASSERT(sc); KASSERT(bif->bif_ifp->if_bridge == NULL, ("%s: not a span interface", __func__)); LIST_REMOVE(bif, bif_next); free(bif, M_DEVBUF); } static int bridge_ioctl_add(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif = NULL; struct ifnet *ifs; int error = 0; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (ENOENT); if (ifs->if_ioctl == NULL) /* must be supported */ return (EINVAL); /* If it's in the span list, it can't be a member. */ LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) return (EBUSY); if (ifs->if_bridge == sc) return (EEXIST); if (ifs->if_bridge != NULL) return (EBUSY); switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: case IFT_GIF: /* permitted interface types */ break; default: return (EINVAL); } #ifdef INET6 /* * Two valid inet6 addresses with link-local scope must not be * on the parent interface and the member interfaces at the * same time. This restriction is needed to prevent violation * of link-local scope zone. Attempts to add a member * interface which has inet6 addresses when the parent has * inet6 triggers removal of all inet6 addresses on the member * interface. */ /* Check if the parent interface has a link-local scope addr. */ if (V_allow_llz_overlap == 0 && in6ifa_llaonifp(sc->sc_ifp) != NULL) { /* * If any, remove all inet6 addresses from the member * interfaces. */ BRIDGE_XLOCK(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (in6ifa_llaonifp(bif->bif_ifp)) { BRIDGE_UNLOCK(sc); in6_ifdetach(bif->bif_ifp); BRIDGE_LOCK(sc); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", bif->bif_ifp->if_xname); } } BRIDGE_XDROP(sc); if (in6ifa_llaonifp(ifs)) { BRIDGE_UNLOCK(sc); in6_ifdetach(ifs); BRIDGE_LOCK(sc); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", ifs->if_xname); } } #endif /* Allow the first Ethernet member to define the MTU */ if (LIST_EMPTY(&sc->sc_iflist)) sc->sc_ifp->if_mtu = ifs->if_mtu; else if (sc->sc_ifp->if_mtu != ifs->if_mtu) { if_printf(sc->sc_ifp, "invalid MTU: %u(%s) != %u\n", ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu); return (EINVAL); } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); if (bif == NULL) return (ENOMEM); bif->bif_ifp = ifs; bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER; bif->bif_savedcaps = ifs->if_capenable; /* * Assign the interface's MAC address to the bridge if it's the first * member and the MAC address of the bridge has not been changed from * the default randomly generated one. */ if (V_bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) && - !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) { + !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr.octet, ETHER_ADDR_LEN)) { bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = ifs; EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } ifs->if_bridge = sc; ifs->if_bridge_output = bridge_output; ifs->if_bridge_input = bridge_input; ifs->if_bridge_linkstate = bridge_linkstate; bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp); /* * XXX: XLOCK HERE!?! * * NOTE: insert_***HEAD*** should be safe for the traversals. */ LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next); /* Set interface capabilities to the intersection set of all members */ bridge_mutecaps(sc); bridge_linkcheck(sc); /* Place the interface into promiscuous mode */ switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: BRIDGE_UNLOCK(sc); error = ifpromisc(ifs, 1); BRIDGE_LOCK(sc); break; } if (error) bridge_delete_member(sc, bif, 0); return (error); } static int bridge_ioctl_del(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bridge_delete_member(sc, bif, 0); return (0); } static int bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; struct bstp_port *bp; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bp = &bif->bif_stp; req->ifbr_ifsflags = bif->bif_flags; req->ifbr_state = bp->bp_state; req->ifbr_priority = bp->bp_priority; req->ifbr_path_cost = bp->bp_path_cost; req->ifbr_portno = bif->bif_ifp->if_index & 0xfff; req->ifbr_proto = bp->bp_protover; req->ifbr_role = bp->bp_role; req->ifbr_stpflags = bp->bp_flags; req->ifbr_addrcnt = bif->bif_addrcnt; req->ifbr_addrmax = bif->bif_addrmax; req->ifbr_addrexceeded = bif->bif_addrexceeded; /* Copy STP state options as flags */ if (bp->bp_operedge) req->ifbr_ifsflags |= IFBIF_BSTP_EDGE; if (bp->bp_flags & BSTP_PORT_AUTOEDGE) req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE; if (bp->bp_ptp_link) req->ifbr_ifsflags |= IFBIF_BSTP_PTP; if (bp->bp_flags & BSTP_PORT_AUTOPTP) req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP; if (bp->bp_flags & BSTP_PORT_ADMEDGE) req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE; if (bp->bp_flags & BSTP_PORT_ADMCOST) req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST; return (0); } static int bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; struct bstp_port *bp; int error; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bp = &bif->bif_stp; if (req->ifbr_ifsflags & IFBIF_SPAN) /* SPAN is readonly */ return (EINVAL); if (req->ifbr_ifsflags & IFBIF_STP) { if ((bif->bif_flags & IFBIF_STP) == 0) { error = bstp_enable(&bif->bif_stp); if (error) return (error); } } else { if ((bif->bif_flags & IFBIF_STP) != 0) bstp_disable(&bif->bif_stp); } /* Pass on STP flags */ bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0); bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0); bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0); bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0); /* Save the bits relating to the bridge */ bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK; return (0); } static int bridge_ioctl_scache(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; sc->sc_brtmax = param->ifbrp_csize; bridge_rttrim(sc); return (0); } static int bridge_ioctl_gcache(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_csize = sc->sc_brtmax; return (0); } static int bridge_ioctl_gifs(struct bridge_softc *sc, void *arg) { struct ifbifconf *bifc = arg; struct bridge_iflist *bif; struct ifbreq breq; char *buf, *outbuf; int count, buflen, len, error = 0; count = 0; LIST_FOREACH(bif, &sc->sc_iflist, bif_next) count++; LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) count++; buflen = sizeof(breq) * count; if (bifc->ifbic_len == 0) { bifc->ifbic_len = buflen; return (0); } BRIDGE_UNLOCK(sc); outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); BRIDGE_LOCK(sc); count = 0; buf = outbuf; len = min(bifc->ifbic_len, buflen); bzero(&breq, sizeof(breq)); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (len < sizeof(breq)) break; strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname, sizeof(breq.ifbr_ifsname)); /* Fill in the ifbreq structure */ error = bridge_ioctl_gifflags(sc, &breq); if (error) break; memcpy(buf, &breq, sizeof(breq)); count++; buf += sizeof(breq); len -= sizeof(breq); } LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) { if (len < sizeof(breq)) break; strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname, sizeof(breq.ifbr_ifsname)); breq.ifbr_ifsflags = bif->bif_flags; breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff; memcpy(buf, &breq, sizeof(breq)); count++; buf += sizeof(breq); len -= sizeof(breq); } BRIDGE_UNLOCK(sc); bifc->ifbic_len = sizeof(breq) * count; error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); BRIDGE_LOCK(sc); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_rts(struct bridge_softc *sc, void *arg) { struct ifbaconf *bac = arg; struct bridge_rtnode *brt; struct ifbareq bareq; char *buf, *outbuf; int count, buflen, len, error = 0; if (bac->ifbac_len == 0) return (0); count = 0; LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) count++; buflen = sizeof(bareq) * count; BRIDGE_UNLOCK(sc); outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); BRIDGE_LOCK(sc); count = 0; buf = outbuf; len = min(bac->ifbac_len, buflen); bzero(&bareq, sizeof(bareq)); LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { if (len < sizeof(bareq)) goto out; strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname, sizeof(bareq.ifba_ifsname)); memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr)); bareq.ifba_vlan = brt->brt_vlan; if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && time_uptime < brt->brt_expire) bareq.ifba_expire = brt->brt_expire - time_uptime; else bareq.ifba_expire = 0; bareq.ifba_flags = brt->brt_flags; memcpy(buf, &bareq, sizeof(bareq)); count++; buf += sizeof(bareq); len -= sizeof(bareq); } out: BRIDGE_UNLOCK(sc); bac->ifbac_len = sizeof(bareq) * count; error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); BRIDGE_LOCK(sc); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_saddr(struct bridge_softc *sc, void *arg) { struct ifbareq *req = arg; struct bridge_iflist *bif; int error; bif = bridge_lookup_member(sc, req->ifba_ifsname); if (bif == NULL) return (ENOENT); error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1, req->ifba_flags); return (error); } static int bridge_ioctl_sto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; sc->sc_brttimeout = param->ifbrp_ctime; return (0); } static int bridge_ioctl_gto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_ctime = sc->sc_brttimeout; return (0); } static int bridge_ioctl_daddr(struct bridge_softc *sc, void *arg) { struct ifbareq *req = arg; return (bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan)); } static int bridge_ioctl_flush(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; bridge_rtflush(sc, req->ifbr_ifsflags); return (0); } static int bridge_ioctl_gpri(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_prio = bs->bs_bridge_priority; return (0); } static int bridge_ioctl_spri(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio)); } static int bridge_ioctl_ght(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_hellotime = bs->bs_bridge_htime >> 8; return (0); } static int bridge_ioctl_sht(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime)); } static int bridge_ioctl_gfd(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8; return (0); } static int bridge_ioctl_sfd(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay)); } static int bridge_ioctl_gma(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_maxage = bs->bs_bridge_max_age >> 8; return (0); } static int bridge_ioctl_sma(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage)); } static int bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority)); } static int bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost)); } static int bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bif->bif_addrmax = req->ifbr_addrmax; return (0); } static int bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif = NULL; struct ifnet *ifs; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (ENOENT); LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) return (EBUSY); if (ifs->if_bridge != NULL) return (EBUSY); switch (ifs->if_type) { case IFT_ETHER: case IFT_GIF: case IFT_L2VLAN: break; default: return (EINVAL); } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); if (bif == NULL) return (ENOMEM); bif->bif_ifp = ifs; bif->bif_flags = IFBIF_SPAN; LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next); return (0); } static int bridge_ioctl_delspan(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; struct ifnet *ifs; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (ENOENT); LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) break; if (bif == NULL) return (ENOENT); bridge_delete_span(sc, bif); return (0); } static int bridge_ioctl_gbparam(struct bridge_softc *sc, void *arg) { struct ifbropreq *req = arg; struct bstp_state *bs = &sc->sc_stp; struct bstp_port *root_port; req->ifbop_maxage = bs->bs_bridge_max_age >> 8; req->ifbop_hellotime = bs->bs_bridge_htime >> 8; req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; root_port = bs->bs_root_port; if (root_port == NULL) req->ifbop_root_port = 0; else req->ifbop_root_port = root_port->bp_ifp->if_index; req->ifbop_holdcount = bs->bs_txholdcount; req->ifbop_priority = bs->bs_bridge_priority; req->ifbop_protocol = bs->bs_protover; req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; return (0); } static int bridge_ioctl_grte(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_cexceeded = sc->sc_brtexceeded; return (0); } static int bridge_ioctl_gifsstp(struct bridge_softc *sc, void *arg) { struct ifbpstpconf *bifstp = arg; struct bridge_iflist *bif; struct bstp_port *bp; struct ifbpstpreq bpreq; char *buf, *outbuf; int count, buflen, len, error = 0; count = 0; LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if ((bif->bif_flags & IFBIF_STP) != 0) count++; } buflen = sizeof(bpreq) * count; if (bifstp->ifbpstp_len == 0) { bifstp->ifbpstp_len = buflen; return (0); } BRIDGE_UNLOCK(sc); outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); BRIDGE_LOCK(sc); count = 0; buf = outbuf; len = min(bifstp->ifbpstp_len, buflen); bzero(&bpreq, sizeof(bpreq)); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (len < sizeof(bpreq)) break; if ((bif->bif_flags & IFBIF_STP) == 0) continue; bp = &bif->bif_stp; bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; memcpy(buf, &bpreq, sizeof(bpreq)); count++; buf += sizeof(bpreq); len -= sizeof(bpreq); } BRIDGE_UNLOCK(sc); bifstp->ifbpstp_len = sizeof(bpreq) * count; error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); BRIDGE_LOCK(sc); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_sproto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto)); } static int bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc)); } /* * bridge_ifdetach: * * Detach an interface from a bridge. Called when a member * interface is detaching. */ static void bridge_ifdetach(void *arg __unused, struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif; if (ifp->if_flags & IFF_RENAMING) return; if (V_bridge_cloner == NULL) { /* * This detach handler can be called after * vnet_bridge_uninit(). Just return in that case. */ return; } /* Check if the interface is a bridge member */ if (sc != NULL) { BRIDGE_LOCK(sc); bif = bridge_lookup_member_if(sc, ifp); if (bif != NULL) bridge_delete_member(sc, bif, 1); BRIDGE_UNLOCK(sc); return; } /* Check if the interface is a span port */ BRIDGE_LIST_LOCK(); LIST_FOREACH(sc, &V_bridge_list, sc_list) { BRIDGE_LOCK(sc); LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifp == bif->bif_ifp) { bridge_delete_span(sc, bif); break; } BRIDGE_UNLOCK(sc); } BRIDGE_LIST_UNLOCK(); } /* * bridge_init: * * Initialize a bridge interface. */ static void bridge_init(void *xsc) { struct bridge_softc *sc = (struct bridge_softc *)xsc; struct ifnet *ifp = sc->sc_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; BRIDGE_LOCK(sc); callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz, bridge_timer, sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */ BRIDGE_UNLOCK(sc); } /* * bridge_stop: * * Stop the bridge interface. */ static void bridge_stop(struct ifnet *ifp, int disable) { struct bridge_softc *sc = ifp->if_softc; BRIDGE_LOCK_ASSERT(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; callout_stop(&sc->sc_brcallout); bstp_stop(&sc->sc_stp); bridge_rtflush(sc, IFBF_FLUSHDYN); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } /* * bridge_enqueue: * * Enqueue a packet on a bridge member interface. * */ static int bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) { int len, err = 0; short mflags; struct mbuf *m0; /* We may be sending a fragment so traverse the mbuf */ for (; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = NULL; len = m->m_pkthdr.len; mflags = m->m_flags; /* * If underlying interface can not do VLAN tag insertion itself * then attach a packet tag that holds it. */ if ((m->m_flags & M_VLANTAG) && (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) { m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); if (m == NULL) { if_printf(dst_ifp, "unable to prepend VLAN header\n"); if_inc_counter(dst_ifp, IFCOUNTER_OERRORS, 1); continue; } m->m_flags &= ~M_VLANTAG; } M_ASSERTPKTHDR(m); /* We shouldn't transmit mbuf without pkthdr */ if ((err = dst_ifp->if_transmit(dst_ifp, m))) { m_freem(m0); if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); break; } if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len); if (mflags & M_MCAST) if_inc_counter(sc->sc_ifp, IFCOUNTER_OMCASTS, 1); } return (err); } /* * bridge_dummynet: * * Receive a queued packet from dummynet and pass it on to the output * interface. * * The mbuf has the Ethernet header already attached. */ static void bridge_dummynet(struct mbuf *m, struct ifnet *ifp) { struct bridge_softc *sc; sc = ifp->if_bridge; /* * The packet didnt originate from a member interface. This should only * ever happen if a member interface is removed while packets are * queued for it. */ if (sc == NULL) { m_freem(m); return; } if (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif ) { if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) return; if (m == NULL) return; } bridge_enqueue(sc, ifp, m); } /* * bridge_output: * * Send output from a bridge member interface. This * performs the bridging function for locally originated * packets. * * The mbuf has the Ethernet header already attached. We must * enqueue or free the mbuf before returning. */ static int bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, struct rtentry *rt) { struct ether_header *eh; struct ifnet *dst_if; struct bridge_softc *sc; uint16_t vlan; if (m->m_len < ETHER_HDR_LEN) { m = m_pullup(m, ETHER_HDR_LEN); if (m == NULL) return (0); } eh = mtod(m, struct ether_header *); sc = ifp->if_bridge; vlan = VLANTAGOF(m); BRIDGE_LOCK(sc); /* * If bridge is down, but the original output interface is up, * go ahead and send out that interface. Otherwise, the packet * is dropped below. */ if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { dst_if = ifp; goto sendunicast; } /* * If the packet is a multicast, or we don't know a better way to * get there, send to all interfaces. */ if (ETHER_IS_MULTICAST(eh->ether_dhost)) dst_if = NULL; else dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan); if (dst_if == NULL) { struct bridge_iflist *bif; struct mbuf *mc; int error = 0, used = 0; bridge_span(sc, m); BRIDGE_LOCK2REF(sc, error); if (error) { m_freem(m); return (0); } LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { dst_if = bif->bif_ifp; if (dst_if->if_type == IFT_GIF) continue; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; /* * If this is not the original output interface, * and the interface is participating in spanning * tree, make sure the port is in a state that * allows forwarding. */ if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; if (LIST_NEXT(bif, bif_next) == NULL) { used = 1; mc = m; } else { mc = m_copypacket(m, M_NOWAIT); if (mc == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); continue; } } bridge_enqueue(sc, dst_if, mc); } if (used == 0) m_freem(m); BRIDGE_UNREF(sc); return (0); } sendunicast: /* * XXX Spanning tree consideration here? */ bridge_span(sc, m); if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) { m_freem(m); BRIDGE_UNLOCK(sc); return (0); } BRIDGE_UNLOCK(sc); bridge_enqueue(sc, dst_if, m); return (0); } /* * bridge_transmit: * * Do output on a bridge. * */ static int bridge_transmit(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc; struct ether_header *eh; struct ifnet *dst_if; int error = 0; sc = ifp->if_softc; ETHER_BPF_MTAP(ifp, m); eh = mtod(m, struct ether_header *); BRIDGE_LOCK(sc); if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) && (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) { BRIDGE_UNLOCK(sc); error = bridge_enqueue(sc, dst_if, m); } else bridge_broadcast(sc, ifp, m, 0); return (error); } /* * The ifp->if_qflush entry point for if_bridge(4) is no-op. */ static void bridge_qflush(struct ifnet *ifp __unused) { } /* * bridge_forward: * * The forwarding function of the bridge. * * NOTE: Releases the lock on return. */ static void bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, struct mbuf *m) { struct bridge_iflist *dbif; struct ifnet *src_if, *dst_if, *ifp; struct ether_header *eh; uint16_t vlan; uint8_t *dst; int error; src_if = m->m_pkthdr.rcvif; ifp = sc->sc_ifp; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); vlan = VLANTAGOF(m); if ((sbif->bif_flags & IFBIF_STP) && sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; eh = mtod(m, struct ether_header *); dst = eh->ether_dhost; /* If the interface is learning, record the address. */ if (sbif->bif_flags & IFBIF_LEARNING) { error = bridge_rtupdate(sc, eh->ether_shost, vlan, sbif, 0, IFBAF_DYNAMIC); /* * If the interface has addresses limits then deny any source * that is not in the cache. */ if (error && sbif->bif_addrmax) goto drop; } if ((sbif->bif_flags & IFBIF_STP) != 0 && sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) goto drop; /* * At this point, the port either doesn't participate * in spanning tree or it is in the forwarding state. */ /* * If the packet is unicast, destined for someone on * "this" side of the bridge, drop it. */ if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) { dst_if = bridge_rtlookup(sc, dst, vlan); if (src_if == dst_if) goto drop; } else { /* * Check if its a reserved multicast address, any address * listed in 802.1D section 7.12.6 may not be forwarded by the * bridge. * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F */ if (dst[0] == 0x01 && dst[1] == 0x80 && dst[2] == 0xc2 && dst[3] == 0x00 && dst[4] == 0x00 && dst[5] <= 0x0f) goto drop; /* ...forward it to all interfaces. */ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); dst_if = NULL; } /* * If we have a destination interface which is a member of our bridge, * OR this is a unicast packet, push it through the bpf(4) machinery. * For broadcast or multicast packets, don't bother because it will * be reinjected into ether_input. We do this before we pass the packets * through the pfil(9) framework, as it is possible that pfil(9) will * drop the packet, or possibly modify it, making it difficult to debug * firewall issues on the bridge. */ if (dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) ETHER_BPF_MTAP(ifp, m); /* run the packet filter */ if (PFIL_HOOKED_IN(V_inet_pfil_head) #ifdef INET6 || PFIL_HOOKED_IN(V_inet6_pfil_head) #endif ) { BRIDGE_UNLOCK(sc); if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0) return; if (m == NULL) return; BRIDGE_LOCK(sc); } if (dst_if == NULL) { bridge_broadcast(sc, src_if, m, 1); return; } /* * At this point, we're dealing with a unicast frame * going to a different interface. */ if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) goto drop; dbif = bridge_lookup_member_if(sc, dst_if); if (dbif == NULL) /* Not a member of the bridge (anymore?) */ goto drop; /* Private segments can not talk to each other */ if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE) goto drop; if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; BRIDGE_UNLOCK(sc); if (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif ) { if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0) return; if (m == NULL) return; } bridge_enqueue(sc, dst_if, m); return; drop: BRIDGE_UNLOCK(sc); m_freem(m); } /* * bridge_input: * * Receive input from a member interface. Queue the packet for * bridging if it is not for us. */ static struct mbuf * bridge_input(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif, *bif2; struct ifnet *bifp; struct ether_header *eh; struct mbuf *mc, *mc2; uint16_t vlan; int error; if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (m); bifp = sc->sc_ifp; vlan = VLANTAGOF(m); /* * Implement support for bridge monitoring. If this flag has been * set on this interface, discard the packet once we push it through * the bpf(4) machinery, but before we do, increment the byte and * packet counters associated with this interface. */ if ((bifp->if_flags & IFF_MONITOR) != 0) { m->m_pkthdr.rcvif = bifp; ETHER_BPF_MTAP(bifp, m); if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); m_freem(m); return (NULL); } BRIDGE_LOCK(sc); bif = bridge_lookup_member_if(sc, ifp); if (bif == NULL) { BRIDGE_UNLOCK(sc); return (m); } eh = mtod(m, struct ether_header *); bridge_span(sc, m); if (m->m_flags & (M_BCAST|M_MCAST)) { /* Tap off 802.1D packets; they do not get forwarded. */ if (memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) { bstp_input(&bif->bif_stp, ifp, m); /* consumes mbuf */ BRIDGE_UNLOCK(sc); return (NULL); } if ((bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { BRIDGE_UNLOCK(sc); return (m); } /* * Make a deep copy of the packet and enqueue the copy * for bridge processing; return the original packet for * local processing. */ mc = m_dup(m, M_NOWAIT); if (mc == NULL) { BRIDGE_UNLOCK(sc); return (m); } /* Perform the bridge forwarding function with the copy. */ bridge_forward(sc, bif, mc); /* * Reinject the mbuf as arriving on the bridge so we have a * chance at claiming multicast packets. We can not loop back * here from ether_input as a bridge is never a member of a * bridge. */ KASSERT(bifp->if_bridge == NULL, ("loop created in bridge_input")); mc2 = m_dup(m, M_NOWAIT); if (mc2 != NULL) { /* Keep the layer3 header aligned */ int i = min(mc2->m_pkthdr.len, max_protohdr); mc2 = m_copyup(mc2, i, ETHER_ALIGN); } if (mc2 != NULL) { mc2->m_pkthdr.rcvif = bifp; (*bifp->if_input)(bifp, mc2); } /* Return the original packet for local processing. */ return (m); } if ((bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { BRIDGE_UNLOCK(sc); return (m); } #if (defined(INET) || defined(INET6)) # define OR_CARP_CHECK_WE_ARE_DST(iface) \ || ((iface)->if_carp \ && (*carp_forus_p)((iface), eh->ether_dhost)) # define OR_CARP_CHECK_WE_ARE_SRC(iface) \ || ((iface)->if_carp \ && (*carp_forus_p)((iface), eh->ether_shost)) #else # define OR_CARP_CHECK_WE_ARE_DST(iface) # define OR_CARP_CHECK_WE_ARE_SRC(iface) #endif #ifdef INET6 # define OR_PFIL_HOOKED_INET6 \ || PFIL_HOOKED_IN(V_inet6_pfil_head) #else # define OR_PFIL_HOOKED_INET6 #endif #define GRAB_OUR_PACKETS(iface) \ if ((iface)->if_type == IFT_GIF) \ continue; \ /* It is destined for us. */ \ if (memcmp(IF_LLADDR((iface)), eh->ether_dhost, ETHER_ADDR_LEN) == 0 \ OR_CARP_CHECK_WE_ARE_DST((iface)) \ ) { \ if ((iface)->if_type == IFT_BRIDGE) { \ ETHER_BPF_MTAP(iface, m); \ if_inc_counter(iface, IFCOUNTER_IPACKETS, 1); \ if_inc_counter(iface, IFCOUNTER_IBYTES, m->m_pkthdr.len); \ /* Filter on the physical interface. */ \ if (V_pfil_local_phys && \ (PFIL_HOOKED_IN(V_inet_pfil_head) \ OR_PFIL_HOOKED_INET6)) { \ if (bridge_pfil(&m, NULL, ifp, \ PFIL_IN) != 0 || m == NULL) { \ BRIDGE_UNLOCK(sc); \ return (NULL); \ } \ eh = mtod(m, struct ether_header *); \ } \ } \ if (bif->bif_flags & IFBIF_LEARNING) { \ error = bridge_rtupdate(sc, eh->ether_shost, \ vlan, bif, 0, IFBAF_DYNAMIC); \ if (error && bif->bif_addrmax) { \ BRIDGE_UNLOCK(sc); \ m_freem(m); \ return (NULL); \ } \ } \ m->m_pkthdr.rcvif = iface; \ BRIDGE_UNLOCK(sc); \ return (m); \ } \ \ /* We just received a packet that we sent out. */ \ if (memcmp(IF_LLADDR((iface)), eh->ether_shost, ETHER_ADDR_LEN) == 0 \ OR_CARP_CHECK_WE_ARE_SRC((iface)) \ ) { \ BRIDGE_UNLOCK(sc); \ m_freem(m); \ return (NULL); \ } /* * Unicast. Make sure it's not for the bridge. */ do { GRAB_OUR_PACKETS(bifp) } while (0); /* * Give a chance for ifp at first priority. This will help when the * packet comes through the interface like VLAN's with the same MACs * on several interfaces from the same bridge. This also will save * some CPU cycles in case the destination interface and the input * interface (eq ifp) are the same. */ do { GRAB_OUR_PACKETS(ifp) } while (0); /* Now check the all bridge members. */ LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) { GRAB_OUR_PACKETS(bif2->bif_ifp) } #undef OR_CARP_CHECK_WE_ARE_DST #undef OR_CARP_CHECK_WE_ARE_SRC #undef OR_PFIL_HOOKED_INET6 #undef GRAB_OUR_PACKETS /* Perform the bridge forwarding function. */ bridge_forward(sc, bif, m); return (NULL); } /* * bridge_broadcast: * * Send a frame to all interfaces that are members of * the bridge, except for the one on which the packet * arrived. * * NOTE: Releases the lock on return. */ static void bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, struct mbuf *m, int runfilt) { struct bridge_iflist *dbif, *sbif; struct mbuf *mc; struct ifnet *dst_if; int error = 0, used = 0, i; sbif = bridge_lookup_member_if(sc, src_if); BRIDGE_LOCK2REF(sc, error); if (error) { m_freem(m); return; } /* Filter on the bridge interface before broadcasting */ if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif )) { if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0) goto out; if (m == NULL) goto out; } LIST_FOREACH(dbif, &sc->sc_iflist, bif_next) { dst_if = dbif->bif_ifp; if (dst_if == src_if) continue; /* Private segments can not talk to each other */ if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)) continue; if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 && (m->m_flags & (M_BCAST|M_MCAST)) == 0) continue; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; if (LIST_NEXT(dbif, bif_next) == NULL) { mc = m; used = 1; } else { mc = m_dup(m, M_NOWAIT); if (mc == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); continue; } } /* * Filter on the output interface. Pass a NULL bridge interface * pointer so we do not redundantly filter on the bridge for * each interface we broadcast on. */ if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif )) { if (used == 0) { /* Keep the layer3 header aligned */ i = min(mc->m_pkthdr.len, max_protohdr); mc = m_copyup(mc, i, ETHER_ALIGN); if (mc == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); continue; } } if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0) continue; if (mc == NULL) continue; } bridge_enqueue(sc, dst_if, mc); } if (used == 0) m_freem(m); out: BRIDGE_UNREF(sc); } /* * bridge_span: * * Duplicate a packet out one or more interfaces that are in span mode, * the original mbuf is unmodified. */ static void bridge_span(struct bridge_softc *sc, struct mbuf *m) { struct bridge_iflist *bif; struct ifnet *dst_if; struct mbuf *mc; if (LIST_EMPTY(&sc->sc_spanlist)) return; LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) { dst_if = bif->bif_ifp; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; mc = m_copypacket(m, M_NOWAIT); if (mc == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); continue; } bridge_enqueue(sc, dst_if, mc); } } /* * bridge_rtupdate: * * Add a bridge routing entry. */ static int bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan, struct bridge_iflist *bif, int setflags, uint8_t flags) { struct bridge_rtnode *brt; int error; BRIDGE_LOCK_ASSERT(sc); /* Check the source address is valid and not multicast. */ if (ETHER_IS_MULTICAST(dst) || (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 && dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) return (EINVAL); /* 802.1p frames map to vlan 1 */ if (vlan == 0) vlan = 1; /* * A route for this destination might already exist. If so, * update it, otherwise create a new one. */ if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) { if (sc->sc_brtcnt >= sc->sc_brtmax) { sc->sc_brtexceeded++; return (ENOSPC); } /* Check per interface address limits (if enabled) */ if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) { bif->bif_addrexceeded++; return (ENOSPC); } /* * Allocate a new bridge forwarding node, and * initialize the expiration time and Ethernet * address. */ brt = uma_zalloc(bridge_rtnode_zone, M_NOWAIT | M_ZERO); if (brt == NULL) return (ENOMEM); if (bif->bif_flags & IFBIF_STICKY) brt->brt_flags = IFBAF_STICKY; else brt->brt_flags = IFBAF_DYNAMIC; memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN); brt->brt_vlan = vlan; if ((error = bridge_rtnode_insert(sc, brt)) != 0) { uma_zfree(bridge_rtnode_zone, brt); return (error); } brt->brt_dst = bif; bif->bif_addrcnt++; } if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && brt->brt_dst != bif) { brt->brt_dst->bif_addrcnt--; brt->brt_dst = bif; brt->brt_dst->bif_addrcnt++; } if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) brt->brt_expire = time_uptime + sc->sc_brttimeout; if (setflags) brt->brt_flags = flags; return (0); } /* * bridge_rtlookup: * * Lookup the destination interface for an address. */ static struct ifnet * bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) { struct bridge_rtnode *brt; BRIDGE_LOCK_ASSERT(sc); if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) return (NULL); return (brt->brt_ifp); } /* * bridge_rttrim: * * Trim the routine table so that we have a number * of routing entries less than or equal to the * maximum number. */ static void bridge_rttrim(struct bridge_softc *sc) { struct bridge_rtnode *brt, *nbrt; BRIDGE_LOCK_ASSERT(sc); /* Make sure we actually need to do this. */ if (sc->sc_brtcnt <= sc->sc_brtmax) return; /* Force an aging cycle; this might trim enough addresses. */ bridge_rtage(sc); if (sc->sc_brtcnt <= sc->sc_brtmax) return; LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { bridge_rtnode_destroy(sc, brt); if (sc->sc_brtcnt <= sc->sc_brtmax) return; } } } /* * bridge_timer: * * Aging timer for the bridge. */ static void bridge_timer(void *arg) { struct bridge_softc *sc = arg; BRIDGE_LOCK_ASSERT(sc); bridge_rtage(sc); if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz, bridge_timer, sc); } /* * bridge_rtage: * * Perform an aging cycle. */ static void bridge_rtage(struct bridge_softc *sc) { struct bridge_rtnode *brt, *nbrt; BRIDGE_LOCK_ASSERT(sc); LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { if (time_uptime >= brt->brt_expire) bridge_rtnode_destroy(sc, brt); } } } /* * bridge_rtflush: * * Remove all dynamic addresses from the bridge. */ static void bridge_rtflush(struct bridge_softc *sc, int full) { struct bridge_rtnode *brt, *nbrt; BRIDGE_LOCK_ASSERT(sc); LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) bridge_rtnode_destroy(sc, brt); } } /* * bridge_rtdaddr: * * Remove an address from the table. */ static int bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) { struct bridge_rtnode *brt; int found = 0; BRIDGE_LOCK_ASSERT(sc); /* * If vlan is zero then we want to delete for all vlans so the lookup * may return more than one. */ while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) { bridge_rtnode_destroy(sc, brt); found = 1; } return (found ? 0 : ENOENT); } /* * bridge_rtdelete: * * Delete routes to a speicifc member interface. */ static void bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full) { struct bridge_rtnode *brt, *nbrt; BRIDGE_LOCK_ASSERT(sc); LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (brt->brt_ifp == ifp && (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) bridge_rtnode_destroy(sc, brt); } } /* * bridge_rtable_init: * * Initialize the route table for this bridge. */ static void bridge_rtable_init(struct bridge_softc *sc) { int i; sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE, M_DEVBUF, M_WAITOK); for (i = 0; i < BRIDGE_RTHASH_SIZE; i++) LIST_INIT(&sc->sc_rthash[i]); sc->sc_rthash_key = arc4random(); LIST_INIT(&sc->sc_rtlist); } /* * bridge_rtable_fini: * * Deconstruct the route table for this bridge. */ static void bridge_rtable_fini(struct bridge_softc *sc) { KASSERT(sc->sc_brtcnt == 0, ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt)); free(sc->sc_rthash, M_DEVBUF); } /* * The following hash function is adapted from "Hash Functions" by Bob Jenkins * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). */ #define mix(a, b, c) \ do { \ a -= b; a -= c; a ^= (c >> 13); \ b -= c; b -= a; b ^= (a << 8); \ c -= a; c -= b; c ^= (b >> 13); \ a -= b; a -= c; a ^= (c >> 12); \ b -= c; b -= a; b ^= (a << 16); \ c -= a; c -= b; c ^= (b >> 5); \ a -= b; a -= c; a ^= (c >> 3); \ b -= c; b -= a; b ^= (a << 10); \ c -= a; c -= b; c ^= (b >> 15); \ } while (/*CONSTCOND*/0) static __inline uint32_t bridge_rthash(struct bridge_softc *sc, const uint8_t *addr) { uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key; b += addr[5] << 8; b += addr[4]; a += addr[3] << 24; a += addr[2] << 16; a += addr[1] << 8; a += addr[0]; mix(a, b, c); return (c & BRIDGE_RTHASH_MASK); } #undef mix static int bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b) { int i, d; for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) { d = ((int)a[i]) - ((int)b[i]); } return (d); } /* * bridge_rtnode_lookup: * * Look up a bridge route node for the specified destination. Compare the * vlan id or if zero then just return the first match. */ static struct bridge_rtnode * bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) { struct bridge_rtnode *brt; uint32_t hash; int dir; BRIDGE_LOCK_ASSERT(sc); hash = bridge_rthash(sc, addr); LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) { dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr); if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) return (brt); if (dir > 0) return (NULL); } return (NULL); } /* * bridge_rtnode_insert: * * Insert the specified bridge node into the route table. We * assume the entry is not already in the table. */ static int bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt) { struct bridge_rtnode *lbrt; uint32_t hash; int dir; BRIDGE_LOCK_ASSERT(sc); hash = bridge_rthash(sc, brt->brt_addr); lbrt = LIST_FIRST(&sc->sc_rthash[hash]); if (lbrt == NULL) { LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash); goto out; } do { dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr); if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) return (EEXIST); if (dir > 0) { LIST_INSERT_BEFORE(lbrt, brt, brt_hash); goto out; } if (LIST_NEXT(lbrt, brt_hash) == NULL) { LIST_INSERT_AFTER(lbrt, brt, brt_hash); goto out; } lbrt = LIST_NEXT(lbrt, brt_hash); } while (lbrt != NULL); #ifdef DIAGNOSTIC panic("bridge_rtnode_insert: impossible"); #endif out: LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list); sc->sc_brtcnt++; return (0); } /* * bridge_rtnode_destroy: * * Destroy a bridge rtnode. */ static void bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt) { BRIDGE_LOCK_ASSERT(sc); LIST_REMOVE(brt, brt_hash); LIST_REMOVE(brt, brt_list); sc->sc_brtcnt--; brt->brt_dst->bif_addrcnt--; uma_zfree(bridge_rtnode_zone, brt); } /* * bridge_rtable_expire: * * Set the expiry time for all routes on an interface. */ static void bridge_rtable_expire(struct ifnet *ifp, int age) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_rtnode *brt; BRIDGE_LOCK(sc); /* * If the age is zero then flush, otherwise set all the expiry times to * age for the interface */ if (age == 0) bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN); else { LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { /* Cap the expiry time to 'age' */ if (brt->brt_ifp == ifp && brt->brt_expire > time_uptime + age && (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) brt->brt_expire = time_uptime + age; } } BRIDGE_UNLOCK(sc); } /* * bridge_state_change: * * Callback from the bridgestp code when a port changes states. */ static void bridge_state_change(struct ifnet *ifp, int state) { struct bridge_softc *sc = ifp->if_bridge; static const char *stpstates[] = { "disabled", "listening", "learning", "forwarding", "blocking", "discarding" }; CURVNET_SET(ifp->if_vnet); if (V_log_stp) log(LOG_NOTICE, "%s: state changed to %s on %s\n", sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname); CURVNET_RESTORE(); } /* * Send bridge packets through pfil if they are one of the types pfil can deal * with, or if they are ARP or REVARP. (pfil will pass ARP and REVARP without * question.) If *bifp or *ifp are NULL then packet filtering is skipped for * that interface. */ static int bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) { int snap, error, i, hlen; struct ether_header *eh1, eh2; struct ip *ip; struct llc llc1; u_int16_t ether_type; pfil_return_t rv; snap = 0; error = -1; /* Default error if not error == 0 */ #if 0 /* we may return with the IP fields swapped, ensure its not shared */ KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__)); #endif if (V_pfil_bridge == 0 && V_pfil_member == 0 && V_pfil_ipfw == 0) return (0); /* filtering is disabled */ i = min((*mp)->m_pkthdr.len, max_protohdr); if ((*mp)->m_len < i) { *mp = m_pullup(*mp, i); if (*mp == NULL) { printf("%s: m_pullup failed\n", __func__); return (-1); } } eh1 = mtod(*mp, struct ether_header *); ether_type = ntohs(eh1->ether_type); /* * Check for SNAP/LLC. */ if (ether_type < ETHERMTU) { struct llc *llc2 = (struct llc *)(eh1 + 1); if ((*mp)->m_len >= ETHER_HDR_LEN + 8 && llc2->llc_dsap == LLC_SNAP_LSAP && llc2->llc_ssap == LLC_SNAP_LSAP && llc2->llc_control == LLC_UI) { ether_type = htons(llc2->llc_un.type_snap.ether_type); snap = 1; } } /* * If we're trying to filter bridge traffic, don't look at anything * other than IP and ARP traffic. If the filter doesn't understand * IPv6, don't allow IPv6 through the bridge either. This is lame * since if we really wanted, say, an AppleTalk filter, we are hosed, * but of course we don't have an AppleTalk filter to begin with. * (Note that since pfil doesn't understand ARP it will pass *ALL* * ARP traffic.) */ switch (ether_type) { case ETHERTYPE_ARP: case ETHERTYPE_REVARP: if (V_pfil_ipfw_arp == 0) return (0); /* Automatically pass */ break; case ETHERTYPE_IP: #ifdef INET6 case ETHERTYPE_IPV6: #endif /* INET6 */ break; default: /* * Check to see if the user wants to pass non-ip * packets, these will not be checked by pfil(9) and * passed unconditionally so the default is to drop. */ if (V_pfil_onlyip) goto bad; } /* Run the packet through pfil before stripping link headers */ if (PFIL_HOOKED_OUT(V_link_pfil_head) && V_pfil_ipfw != 0 && dir == PFIL_OUT && ifp != NULL) { switch (pfil_run_hooks(V_link_pfil_head, mp, ifp, dir, NULL)) { case PFIL_DROPPED: return (EPERM); case PFIL_CONSUMED: return (0); } } /* Strip off the Ethernet header and keep a copy. */ m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2); m_adj(*mp, ETHER_HDR_LEN); /* Strip off snap header, if present */ if (snap) { m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1); m_adj(*mp, sizeof(struct llc)); } /* * Check the IP header for alignment and errors */ if (dir == PFIL_IN) { switch (ether_type) { case ETHERTYPE_IP: error = bridge_ip_checkbasic(mp); break; #ifdef INET6 case ETHERTYPE_IPV6: error = bridge_ip6_checkbasic(mp); break; #endif /* INET6 */ default: error = 0; } if (error) goto bad; } error = 0; /* * Run the packet through pfil */ rv = PFIL_PASS; switch (ether_type) { case ETHERTYPE_IP: /* * Run pfil on the member interface and the bridge, both can * be skipped by clearing pfil_member or pfil_bridge. * * Keep the order: * in_if -> bridge_if -> out_if */ if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv = pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) != PFIL_PASS) break; if (V_pfil_member && ifp != NULL && (rv = pfil_run_hooks(V_inet_pfil_head, mp, ifp, dir, NULL)) != PFIL_PASS) break; if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv = pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) != PFIL_PASS) break; /* check if we need to fragment the packet */ /* bridge_fragment generates a mbuf chain of packets */ /* that already include eth headers */ if (V_pfil_member && ifp != NULL && dir == PFIL_OUT) { i = (*mp)->m_pkthdr.len; if (i > ifp->if_mtu) { error = bridge_fragment(ifp, mp, &eh2, snap, &llc1); return (error); } } /* Recalculate the ip checksum. */ ip = mtod(*mp, struct ip *); hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) goto bad; if (hlen > (*mp)->m_len) { if ((*mp = m_pullup(*mp, hlen)) == NULL) goto bad; ip = mtod(*mp, struct ip *); if (ip == NULL) goto bad; } ip->ip_sum = 0; if (hlen == sizeof(struct ip)) ip->ip_sum = in_cksum_hdr(ip); else ip->ip_sum = in_cksum(*mp, hlen); break; #ifdef INET6 case ETHERTYPE_IPV6: if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv = pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) != PFIL_PASS) break; if (V_pfil_member && ifp != NULL && (rv = pfil_run_hooks(V_inet6_pfil_head, mp, ifp, dir, NULL)) != PFIL_PASS) break; if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv = pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) != PFIL_PASS) break; break; #endif } switch (rv) { case PFIL_CONSUMED: return (0); case PFIL_DROPPED: return (EPERM); default: break; } error = -1; /* * Finally, put everything back the way it was and return */ if (snap) { M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT); if (*mp == NULL) return (error); bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc)); } M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT); if (*mp == NULL) return (error); bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN); return (0); bad: m_freem(*mp); *mp = NULL; return (error); } /* * Perform basic checks on header size since * pfil assumes ip_input has already processed * it for it. Cut-and-pasted from ip_input.c. * Given how simple the IPv6 version is, * does the IPv4 version really need to be * this complicated? * * XXX Should we update ipstat here, or not? * XXX Right now we update ipstat but not * XXX csum_counter. */ static int bridge_ip_checkbasic(struct mbuf **mp) { struct mbuf *m = *mp; struct ip *ip; int len, hlen; u_short sum; if (*mp == NULL) return (-1); if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) { if ((m = m_copyup(m, sizeof(struct ip), (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ KMOD_IPSTAT_INC(ips_toosmall); goto bad; } } else if (__predict_false(m->m_len < sizeof (struct ip))) { if ((m = m_pullup(m, sizeof (struct ip))) == NULL) { KMOD_IPSTAT_INC(ips_toosmall); goto bad; } } ip = mtod(m, struct ip *); if (ip == NULL) goto bad; if (ip->ip_v != IPVERSION) { KMOD_IPSTAT_INC(ips_badvers); goto bad; } hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { /* minimum header length */ KMOD_IPSTAT_INC(ips_badhlen); goto bad; } if (hlen > m->m_len) { if ((m = m_pullup(m, hlen)) == NULL) { KMOD_IPSTAT_INC(ips_badhlen); goto bad; } ip = mtod(m, struct ip *); if (ip == NULL) goto bad; } if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); } else { if (hlen == sizeof(struct ip)) { sum = in_cksum_hdr(ip); } else { sum = in_cksum(m, hlen); } } if (sum) { KMOD_IPSTAT_INC(ips_badsum); goto bad; } /* Retrieve the packet length. */ len = ntohs(ip->ip_len); /* * Check for additional length bogosity */ if (len < hlen) { KMOD_IPSTAT_INC(ips_badlen); goto bad; } /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Drop packet if shorter than we expect. */ if (m->m_pkthdr.len < len) { KMOD_IPSTAT_INC(ips_tooshort); goto bad; } /* Checks out, proceed */ *mp = m; return (0); bad: *mp = m; return (-1); } #ifdef INET6 /* * Same as above, but for IPv6. * Cut-and-pasted from ip6_input.c. * XXX Should we update ip6stat, or not? */ static int bridge_ip6_checkbasic(struct mbuf **mp) { struct mbuf *m = *mp; struct ip6_hdr *ip6; /* * If the IPv6 header is not aligned, slurp it up into a new * mbuf with space for link headers, in the event we forward * it. Otherwise, if it is aligned, make sure the entire base * IPv6 header is in the first mbuf of the chain. */ if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) { struct ifnet *inifp = m->m_pkthdr.rcvif; if ((m = m_copyup(m, sizeof(struct ip6_hdr), (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ IP6STAT_INC(ip6s_toosmall); in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto bad; } } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) { struct ifnet *inifp = m->m_pkthdr.rcvif; if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { IP6STAT_INC(ip6s_toosmall); in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto bad; } } ip6 = mtod(m, struct ip6_hdr *); if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { IP6STAT_INC(ip6s_badvers); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); goto bad; } /* Checks out, proceed */ *mp = m; return (0); bad: *mp = m; return (-1); } #endif /* INET6 */ /* * bridge_fragment: * * Fragment mbuf chain in multiple packets and prepend ethernet header. */ static int bridge_fragment(struct ifnet *ifp, struct mbuf **mp, struct ether_header *eh, int snap, struct llc *llc) { struct mbuf *m = *mp, *nextpkt = NULL, *mprev = NULL, *mcur = NULL; struct ip *ip; int error = -1; if (m->m_len < sizeof(struct ip) && (m = m_pullup(m, sizeof(struct ip))) == NULL) goto dropit; ip = mtod(m, struct ip *); m->m_pkthdr.csum_flags |= CSUM_IP; error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist); if (error) goto dropit; /* * Walk the chain and re-add the Ethernet header for * each mbuf packet. */ for (mcur = m; mcur; mcur = mcur->m_nextpkt) { nextpkt = mcur->m_nextpkt; mcur->m_nextpkt = NULL; if (snap) { M_PREPEND(mcur, sizeof(struct llc), M_NOWAIT); if (mcur == NULL) { error = ENOBUFS; if (mprev != NULL) mprev->m_nextpkt = nextpkt; goto dropit; } bcopy(llc, mtod(mcur, caddr_t),sizeof(struct llc)); } M_PREPEND(mcur, ETHER_HDR_LEN, M_NOWAIT); if (mcur == NULL) { error = ENOBUFS; if (mprev != NULL) mprev->m_nextpkt = nextpkt; goto dropit; } bcopy(eh, mtod(mcur, caddr_t), ETHER_HDR_LEN); /* * The previous two M_PREPEND could have inserted one or two * mbufs in front so we have to update the previous packet's * m_nextpkt. */ mcur->m_nextpkt = nextpkt; if (mprev != NULL) mprev->m_nextpkt = mcur; else { /* The first mbuf in the original chain needs to be * updated. */ *mp = mcur; } mprev = mcur; } KMOD_IPSTAT_INC(ips_fragmented); return (error); dropit: for (mcur = *mp; mcur; mcur = m) { /* droping the full packet chain */ m = mcur->m_nextpkt; m_freem(mcur); } return (error); } static void bridge_linkstate(struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif; BRIDGE_LOCK(sc); bif = bridge_lookup_member_if(sc, ifp); if (bif == NULL) { BRIDGE_UNLOCK(sc); return; } bridge_linkcheck(sc); BRIDGE_UNLOCK(sc); bstp_linkstate(&bif->bif_stp); } static void bridge_linkcheck(struct bridge_softc *sc) { struct bridge_iflist *bif; int new_link, hasls; BRIDGE_LOCK_ASSERT(sc); new_link = LINK_STATE_DOWN; hasls = 0; /* Our link is considered up if at least one of our ports is active */ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp->if_capabilities & IFCAP_LINKSTATE) hasls++; if (bif->bif_ifp->if_link_state == LINK_STATE_UP) { new_link = LINK_STATE_UP; break; } } if (!LIST_EMPTY(&sc->sc_iflist) && !hasls) { /* If no interfaces support link-state then we default to up */ new_link = LINK_STATE_UP; } if_link_state_change(sc->sc_ifp, new_link); } Index: head/sys/net/if_ethersubr.c =================================================================== --- head/sys/net/if_ethersubr.c (revision 345138) +++ head/sys/net/if_ethersubr.c (revision 345139) @@ -1,1405 +1,1419 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_netgraph.h" #include "opt_mbuf_profiling.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #endif #ifdef INET6 #include #endif #include #ifdef CTASSERT CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2); CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN); #endif VNET_DEFINE(pfil_head_t, link_pfil_head); /* Packet filter hooks */ /* netgraph node hooks for ng_ether(4) */ void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp); void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m); int (*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp); void (*ng_ether_attach_p)(struct ifnet *ifp); void (*ng_ether_detach_p)(struct ifnet *ifp); void (*vlan_input_p)(struct ifnet *, struct mbuf *); /* if_bridge(4) support */ void (*bridge_dn_p)(struct mbuf *, struct ifnet *); /* if_lagg(4) support */ struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static int ether_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); #ifdef VIMAGE static void ether_reassign(struct ifnet *, struct vnet *, char *); #endif static int ether_requestencap(struct ifnet *, struct if_encap_req *); #define senderr(e) do { error = (e); goto bad;} while (0) static void update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst) { int csum_flags = 0; if (src->m_pkthdr.csum_flags & CSUM_IP) csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID); if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA) csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR); if (src->m_pkthdr.csum_flags & CSUM_SCTP) csum_flags |= CSUM_SCTP_VALID; dst->m_pkthdr.csum_flags |= csum_flags; if (csum_flags & CSUM_DATA_VALID) dst->m_pkthdr.csum_data = 0xffff; } /* * Handle link-layer encapsulation requests. */ static int ether_requestencap(struct ifnet *ifp, struct if_encap_req *req) { struct ether_header *eh; struct arphdr *ah; uint16_t etype; const u_char *lladdr; if (req->rtype != IFENCAP_LL) return (EOPNOTSUPP); if (req->bufsize < ETHER_HDR_LEN) return (ENOMEM); eh = (struct ether_header *)req->buf; lladdr = req->lladdr; req->lladdr_off = 0; switch (req->family) { case AF_INET: etype = htons(ETHERTYPE_IP); break; case AF_INET6: etype = htons(ETHERTYPE_IPV6); break; case AF_ARP: ah = (struct arphdr *)req->hdata; ah->ar_hrd = htons(ARPHRD_ETHER); switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: etype = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: etype = htons(ETHERTYPE_ARP); break; } if (req->flags & IFENCAP_FLAG_BROADCAST) lladdr = ifp->if_broadcastaddr; break; default: return (EAFNOSUPPORT); } memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type)); memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); req->bufsize = sizeof(struct ether_header); return (0); } static int ether_resolve_addr(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro, u_char *phdr, uint32_t *pflags, struct llentry **plle) { struct ether_header *eh; uint32_t lleflags = 0; int error = 0; #if defined(INET) || defined(INET6) uint16_t etype; #endif if (plle) *plle = NULL; eh = (struct ether_header *)phdr; switch (dst->sa_family) { #ifdef INET case AF_INET: if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) error = arpresolve(ifp, 0, m, dst, phdr, &lleflags, plle); else { if (m->m_flags & M_BCAST) memcpy(eh->ether_dhost, ifp->if_broadcastaddr, ETHER_ADDR_LEN); else { const struct in_addr *a; a = &(((const struct sockaddr_in *)dst)->sin_addr); ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost); } etype = htons(ETHERTYPE_IP); memcpy(&eh->ether_type, &etype, sizeof(etype)); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); } break; #endif #ifdef INET6 case AF_INET6: if ((m->m_flags & M_MCAST) == 0) error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags, plle); else { const struct in6_addr *a6; a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr); ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost); etype = htons(ETHERTYPE_IPV6); memcpy(&eh->ether_type, &etype, sizeof(etype)); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); } break; #endif default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); if (m != NULL) m_freem(m); return (EAFNOSUPPORT); } if (error == EHOSTDOWN) { if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0) error = EHOSTUNREACH; } if (error != 0) return (error); *pflags = RT_MAY_LOOP; if (lleflags & LLE_IFADDR) *pflags |= RT_L2_ME; return (0); } /* * Ethernet output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first * packet leaves a multiple of 512 bytes of data in remainder. */ int ether_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { int error = 0; char linkhdr[ETHER_HDR_LEN], *phdr; struct ether_header *eh; struct pf_mtag *t; int loop_copy = 1; int hlen; /* link layer header length */ uint32_t pflags; struct llentry *lle = NULL; int addref = 0; phdr = NULL; pflags = 0; if (ro != NULL) { /* XXX BPF uses ro_prepend */ if (ro->ro_prepend != NULL) { phdr = ro->ro_prepend; hlen = ro->ro_plen; } else if (!(m->m_flags & (M_BCAST | M_MCAST))) { if ((ro->ro_flags & RT_LLE_CACHE) != 0) { lle = ro->ro_lle; if (lle != NULL && (lle->la_flags & LLE_VALID) == 0) { LLE_FREE(lle); lle = NULL; /* redundant */ ro->ro_lle = NULL; } if (lle == NULL) { /* if we lookup, keep cache */ addref = 1; } else /* * Notify LLE code that * the entry was used * by datapath. */ llentry_mark_used(lle); } if (lle != NULL) { phdr = lle->r_linkdata; hlen = lle->r_hdrlen; pflags = lle->r_flags; } } } #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif M_PROFILE(m); if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); if (phdr == NULL) { /* No prepend data supplied. Try to calculate ourselves. */ phdr = linkhdr; hlen = ETHER_HDR_LEN; error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags, addref ? &lle : NULL); if (addref && lle != NULL) ro->ro_lle = lle; if (error != 0) return (error == EWOULDBLOCK ? 0 : error); } if ((pflags & RT_L2_ME) != 0) { update_mbuf_csumflags(m, m); return (if_simloop(ifp, m, dst->sa_family, 0)); } loop_copy = pflags & RT_MAY_LOOP; /* * Add local net header. If no space in first mbuf, * allocate another. * * Note that we do prepend regardless of RT_HAS_HEADER flag. * This is done because BPF code shifts m_data pointer * to the end of ethernet header prior to calling if_output(). */ M_PREPEND(m, hlen, M_NOWAIT); if (m == NULL) senderr(ENOBUFS); if ((pflags & RT_HAS_HEADER) == 0) { eh = mtod(m, struct ether_header *); memcpy(eh, phdr, hlen); } /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) && ((t = pf_find_mtag(m)) == NULL || !t->routed)) { struct mbuf *n; /* * Because if_simloop() modifies the packet, we need a * writable copy through m_dup() instead of a readonly * one as m_copy[m] would give us. The alternative would * be to modify if_simloop() to handle the readonly mbuf, * but performancewise it is mostly equivalent (trading * extra data copying vs. extra locking). * * XXX This is a local workaround. A number of less * often used kernel parts suffer from the same bug. * See PR kern/105943 for a proposed general solution. */ if ((n = m_dup(m, M_NOWAIT)) != NULL) { update_mbuf_csumflags(m, n); (void)if_simloop(ifp, n, dst->sa_family, hlen); } else if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); } /* * Bridges require special output handling. */ if (ifp->if_bridge) { BRIDGE_OUTPUT(ifp, m, error); return (error); } #if defined(INET) || defined(INET6) if (ifp->if_carp && (error = (*carp_output_p)(ifp, m, dst))) goto bad; #endif /* Handle ng_ether(4) processing, if any */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_output_p != NULL, ("ng_ether_output_p is NULL")); if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) { bad: if (m != NULL) m_freem(m); return (error); } if (m == NULL) return (0); } /* Continue with link-layer output */ return ether_output_frame(ifp, m); } static bool ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp) { struct ether_header *eh; eh = mtod(*mp, struct ether_header *); if (ntohs(eh->ether_type) == ETHERTYPE_VLAN || ether_8021q_frame(mp, ifp, ifp, 0, pcp)) return (true); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (false); } /* * Ethernet link layer output routine to send a raw frame to the device. * * This assumes that the 14 byte Ethernet header is present and contiguous * in the first mbuf (if BRIDGE'ing). */ int ether_output_frame(struct ifnet *ifp, struct mbuf *m) { uint8_t pcp; pcp = ifp->if_pcp; if (pcp != IFNET_PCP_NONE && ifp->if_type != IFT_L2VLAN && !ether_set_pcp(&m, ifp, pcp)) return (0); if (PFIL_HOOKED_OUT(V_link_pfil_head)) switch (pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_OUT, NULL)) { case PFIL_DROPPED: return (EACCES); case PFIL_CONSUMED: return (0); } #ifdef EXPERIMENTAL #if defined(INET6) && defined(INET) /* draft-ietf-6man-ipv6only-flag */ /* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) { struct ether_header *eh; eh = mtod(m, struct ether_header *); switch (ntohs(eh->ether_type)) { case ETHERTYPE_IP: case ETHERTYPE_ARP: case ETHERTYPE_REVARP: m_freem(m); return (EAFNOSUPPORT); /* NOTREACHED */ break; }; } #endif #endif /* * Queue message on interface, update output statistics if * successful, and start output if interface not yet active. */ return ((ifp->if_transmit)(ifp, m)); } /* * Process a received Ethernet packet; the packet is in the * mbuf chain m with the ethernet header at the front. */ static void ether_input_internal(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; u_short etype; if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } #ifdef DIAGNOSTIC if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n"); m_freem(m); return; } #endif if (m->m_len < ETHER_HDR_LEN) { /* XXX maybe should pullup? */ if_printf(ifp, "discard frame w/o leading ethernet " "header (len %u pkt len %u)\n", m->m_len, m->m_pkthdr.len); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); random_harvest_queue_ether(m, sizeof(*m)); #ifdef EXPERIMENTAL #if defined(INET6) && defined(INET) /* draft-ietf-6man-ipv6only-flag */ /* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) { switch (etype) { case ETHERTYPE_IP: case ETHERTYPE_ARP: case ETHERTYPE_REVARP: m_freem(m); return; /* NOTREACHED */ break; }; } #endif #endif CURVNET_SET_QUIET(ifp->if_vnet); if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (ETHER_IS_BROADCAST(eh->ether_dhost)) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } #ifdef MAC /* * Tag the mbuf with an appropriate MAC label before any other * consumers can get to it. */ mac_ifnet_create_mbuf(ifp, m); #endif /* * Give bpf a chance at the packet. */ ETHER_BPF_MTAP(ifp, m); /* * If the CRC is still on the packet, trim it off. We do this once * and once only in case we are re-entered. Nothing else on the * Ethernet receive path expects to see the FCS. */ if (m->m_flags & M_HASFCS) { m_adj(m, -ETHER_CRC_LEN); m->m_flags &= ~M_HASFCS; } if (!(ifp->if_capenable & IFCAP_HWSTATS)) if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* Allow monitor mode to claim this frame, after stats are updated. */ if (ifp->if_flags & IFF_MONITOR) { m_freem(m); CURVNET_RESTORE(); return; } /* Handle input from a lagg(4) port */ if (ifp->if_type == IFT_IEEE8023ADLAG) { KASSERT(lagg_input_p != NULL, ("%s: if_lagg not loaded!", __func__)); m = (*lagg_input_p)(ifp, m); if (m != NULL) ifp = m->m_pkthdr.rcvif; else { CURVNET_RESTORE(); return; } } /* * If the hardware did not process an 802.1Q tag, do this now, * to allow 802.1P priority frames to be passed to the main input * path correctly. * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels. */ if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) { struct ether_vlan_header *evl; if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { #ifdef DIAGNOSTIC if_printf(ifp, "cannot pullup VLAN header\n"); #endif if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); CURVNET_RESTORE(); return; } evl = mtod(m, struct ether_vlan_header *); m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); m->m_flags |= M_VLANTAG; bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); eh = mtod(m, struct ether_header *); } M_SETFIB(m, ifp->if_fib); /* Allow ng_ether(4) to claim this frame. */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_input_p != NULL, ("%s: ng_ether_input_p is NULL", __func__)); m->m_flags &= ~M_PROMISC; (*ng_ether_input_p)(ifp, &m); if (m == NULL) { CURVNET_RESTORE(); return; } eh = mtod(m, struct ether_header *); } /* * Allow if_bridge(4) to claim this frame. * The BRIDGE_INPUT() macro will update ifp if the bridge changed it * and the frame should be delivered locally. */ if (ifp->if_bridge != NULL) { m->m_flags &= ~M_PROMISC; BRIDGE_INPUT(ifp, m); if (m == NULL) { CURVNET_RESTORE(); return; } eh = mtod(m, struct ether_header *); } #if defined(INET) || defined(INET6) /* * Clear M_PROMISC on frame so that carp(4) will see it when the * mbuf flows up to Layer 3. * FreeBSD's implementation of carp(4) uses the inprotosw * to dispatch IPPROTO_CARP. carp(4) also allocates its own * Ethernet addresses of the form 00:00:5e:00:01:xx, which * is outside the scope of the M_PROMISC test below. * TODO: Maintain a hash table of ethernet addresses other than * ether_dhost which may be active on this ifp. */ if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) { m->m_flags &= ~M_PROMISC; } else #endif { /* * If the frame received was not for our MAC address, set the * M_PROMISC flag on the mbuf chain. The frame may need to * be seen by the rest of the Ethernet input path in case of * re-entry (e.g. bridge, vlan, netgraph) but should not be * seen by upper protocol layers. */ if (!ETHER_IS_MULTICAST(eh->ether_dhost) && bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0) m->m_flags |= M_PROMISC; } ether_demux(ifp, m); CURVNET_RESTORE(); } /* * Ethernet input dispatch; by default, direct dispatch here regardless of * global configuration. However, if RSS is enabled, hook up RSS affinity * so that when deferred or hybrid dispatch is enabled, we can redistribute * load based on RSS. * * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or * not it had already done work distribution via multi-queue. Then we could * direct dispatch in the event load balancing was already complete and * handle the case of interfaces with different capabilities better. * * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions * at multiple layers? * * XXXRW: For now, enable all this only if RSS is compiled in, although it * works fine without RSS. Need to characterise the performance overhead * of the detour through the netisr code in the event the result is always * direct dispatch. */ static void ether_nh_input(struct mbuf *m) { M_ASSERTPKTHDR(m); KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: NULL interface pointer", __func__)); ether_input_internal(m->m_pkthdr.rcvif, m); } static struct netisr_handler ether_nh = { .nh_name = "ether", .nh_handler = ether_nh_input, .nh_proto = NETISR_ETHER, #ifdef RSS .nh_policy = NETISR_POLICY_CPU, .nh_dispatch = NETISR_DISPATCH_DIRECT, .nh_m2cpuid = rss_m2cpuid, #else .nh_policy = NETISR_POLICY_SOURCE, .nh_dispatch = NETISR_DISPATCH_DIRECT, #endif }; static void ether_init(__unused void *arg) { netisr_register(ðer_nh); } SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL); static void vnet_ether_init(__unused void *arg) { struct pfil_head_args args; args.pa_version = PFIL_VERSION; args.pa_flags = PFIL_IN | PFIL_OUT; args.pa_type = PFIL_TYPE_ETHERNET; args.pa_headname = PFIL_ETHER_NAME; V_link_pfil_head = pfil_head_register(&args); #ifdef VIMAGE netisr_register_vnet(ðer_nh); #endif } VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_ether_init, NULL); #ifdef VIMAGE static void vnet_ether_pfil_destroy(__unused void *arg) { pfil_head_unregister(V_link_pfil_head); } VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY, vnet_ether_pfil_destroy, NULL); static void vnet_ether_destroy(__unused void *arg) { netisr_unregister_vnet(ðer_nh); } VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_ether_destroy, NULL); #endif static void ether_input(struct ifnet *ifp, struct mbuf *m) { struct mbuf *mn; /* * The drivers are allowed to pass in a chain of packets linked with * m_nextpkt. We split them up into separate packets here and pass * them up. This allows the drivers to amortize the receive lock. */ while (m) { mn = m->m_nextpkt; m->m_nextpkt = NULL; /* * We will rely on rcvif being set properly in the deferred context, * so assert it is correct here. */ KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p " "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp)); CURVNET_SET_QUIET(ifp->if_vnet); netisr_dispatch(NETISR_ETHER, m); CURVNET_RESTORE(); m = mn; } } /* * Upper layer processing for a received Ethernet packet. */ void ether_demux(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; int i, isr; u_short ether_type; KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__)); /* Do not grab PROMISC frames in case we are re-entered. */ if (PFIL_HOOKED_IN(V_link_pfil_head) && !(m->m_flags & M_PROMISC)) { i = pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_IN, NULL); if (i != 0 || m == NULL) return; } eh = mtod(m, struct ether_header *); ether_type = ntohs(eh->ether_type); /* * If this frame has a VLAN tag other than 0, call vlan_input() * if its module is loaded. Otherwise, drop. */ if ((m->m_flags & M_VLANTAG) && EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) { if (ifp->if_vlantrunk == NULL) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); m_freem(m); return; } KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!", __func__)); /* Clear before possibly re-entering ether_input(). */ m->m_flags &= ~M_PROMISC; (*vlan_input_p)(ifp, m); return; } /* * Pass promiscuously received frames to the upper layer if the user * requested this by setting IFF_PPROMISC. Otherwise, drop them. */ if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) { m_freem(m); return; } /* * Reset layer specific mbuf flags to avoid confusing upper layers. * Strip off Ethernet header. */ m->m_flags &= ~M_VLANTAG; m_clrprotoflags(m); m_adj(m, ETHER_HDR_LEN); /* * Dispatch frame to upper layer. */ switch (ether_type) { #ifdef INET case ETHERTYPE_IP: isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } isr = NETISR_ARP; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif default: goto discard; } netisr_dispatch(isr, m); return; discard: /* * Packet is to be discarded. If netgraph is present, * hand the packet to it for last chance processing; * otherwise dispose of it. */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_input_orphan_p != NULL, ("ng_ether_input_orphan_p is NULL")); /* * Put back the ethernet header so netgraph has a * consistent view of inbound packets. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); (*ng_ether_input_orphan_p)(ifp, m); return; } m_freem(m); } /* * Convert Ethernet address to printable (loggable) representation. * This routine is for compatibility; it's better to just use * * printf("%6D", , ":"); * * since there's no static buffer involved. */ char * ether_sprintf(const u_char *ap) { static char etherbuf[18]; snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":"); return (etherbuf); } /* * Perform common duties while attaching to interface list */ void ether_ifattach(struct ifnet *ifp, const u_int8_t *lla) { int i; struct ifaddr *ifa; struct sockaddr_dl *sdl; ifp->if_addrlen = ETHER_ADDR_LEN; ifp->if_hdrlen = ETHER_HDR_LEN; if_attach(ifp); ifp->if_mtu = ETHERMTU; ifp->if_output = ether_output; ifp->if_input = ether_input; ifp->if_resolvemulti = ether_resolvemulti; ifp->if_requestencap = ether_requestencap; #ifdef VIMAGE ifp->if_reassign = ether_reassign; #endif if (ifp->if_baudrate == 0) ifp->if_baudrate = IF_Mbps(10); /* just a default */ ifp->if_broadcastaddr = etherbroadcastaddr; ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_ETHER; sdl->sdl_alen = ifp->if_addrlen; bcopy(lla, LLADDR(sdl), ifp->if_addrlen); if (ifp->if_hw_addr != NULL) bcopy(lla, ifp->if_hw_addr, ifp->if_addrlen); bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN); if (ng_ether_attach_p != NULL) (*ng_ether_attach_p)(ifp); /* Announce Ethernet MAC address if non-zero. */ for (i = 0; i < ifp->if_addrlen; i++) if (lla[i] != 0) break; if (i != ifp->if_addrlen) if_printf(ifp, "Ethernet address: %6D\n", lla, ":"); uuid_ether_add(LLADDR(sdl)); /* Add necessary bits are setup; announce it now. */ EVENTHANDLER_INVOKE(ether_ifattach_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("ETHERNET", ifp->if_xname, "IFATTACH", NULL); } /* * Perform common duties while detaching an Ethernet interface */ void ether_ifdetach(struct ifnet *ifp) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr); uuid_ether_del(LLADDR(sdl)); if (ifp->if_l2com != NULL) { KASSERT(ng_ether_detach_p != NULL, ("ng_ether_detach_p is NULL")); (*ng_ether_detach_p)(ifp); } bpfdetach(ifp); if_detach(ifp); } #ifdef VIMAGE void ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused) { if (ifp->if_l2com != NULL) { KASSERT(ng_ether_detach_p != NULL, ("ng_ether_detach_p is NULL")); (*ng_ether_detach_p)(ifp); } if (ng_ether_attach_p != NULL) { CURVNET_SET_QUIET(new_vnet); (*ng_ether_attach_p)(ifp); CURVNET_RESTORE(); } } #endif SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet"); #if 0 /* * This is for reference. We have a table-driven version * of the little-endian crc32 generator, which is faster * than the double-loop. */ uint32_t ether_crc32_le(const uint8_t *buf, size_t len) { size_t i; uint32_t crc; int bit; uint8_t data; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) { carry = (crc ^ data) & 1; crc >>= 1; if (carry) crc = (crc ^ ETHER_CRC_POLY_LE); } } return (crc); } #else uint32_t ether_crc32_le(const uint8_t *buf, size_t len) { static const uint32_t crctab[] = { 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; size_t i; uint32_t crc; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { crc ^= buf[i]; crc = (crc >> 4) ^ crctab[crc & 0xf]; crc = (crc >> 4) ^ crctab[crc & 0xf]; } return (crc); } #endif uint32_t ether_crc32_be(const uint8_t *buf, size_t len) { size_t i; uint32_t crc, carry; int bit; uint8_t data; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) { carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01); crc <<= 1; if (carry) crc = (crc ^ ETHER_CRC_POLY_BE) | carry; } } return (crc); } int ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], ETHER_ADDR_LEN); break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > ETHERMTU) { error = EINVAL; } else { ifp->if_mtu = ifr->ifr_mtu; } break; case SIOCSLANPCP: error = priv_check(curthread, PRIV_NET_SETLANPCP); if (error != 0) break; if (ifr->ifr_lan_pcp > 7 && ifr->ifr_lan_pcp != IFNET_PCP_NONE) { error = EINVAL; } else { ifp->if_pcp = ifr->ifr_lan_pcp; /* broadcast event about PCP change */ EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP); } break; case SIOCGLANPCP: ifr->ifr_lan_pcp = ifp->if_pcp; break; default: error = EINVAL; /* XXX netbsd has ENOTTY??? */ break; } return (error); } static int ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if (!ETHER_IS_MULTICAST(e_addr)) return EADDRNOTAVAIL; *llsa = NULL; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ETHER_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = NULL; return 0; } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ETHER_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return EAFNOSUPPORT; } } static moduledata_t ether_mod = { .name = "ether", }; void ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen) { struct ether_vlan_header vlan; struct mbuf mv, mb; KASSERT((m->m_flags & M_VLANTAG) != 0, ("%s: vlan information not present", __func__)); KASSERT(m->m_len >= sizeof(struct ether_header), ("%s: mbuf not large enough for header", __func__)); bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header)); vlan.evl_proto = vlan.evl_encap_proto; vlan.evl_encap_proto = htons(ETHERTYPE_VLAN); vlan.evl_tag = htons(m->m_pkthdr.ether_vtag); m->m_len -= sizeof(struct ether_header); m->m_data += sizeof(struct ether_header); /* * If a data link has been supplied by the caller, then we will need to * re-create a stack allocated mbuf chain with the following structure: * * (1) mbuf #1 will contain the supplied data link * (2) mbuf #2 will contain the vlan header * (3) mbuf #3 will contain the original mbuf's packet data * * Otherwise, submit the packet and vlan header via bpf_mtap2(). */ if (data != NULL) { mv.m_next = m; mv.m_data = (caddr_t)&vlan; mv.m_len = sizeof(vlan); mb.m_next = &mv; mb.m_data = data; mb.m_len = dlen; bpf_mtap(bp, &mb); } else bpf_mtap2(bp, &vlan, sizeof(vlan), m); m->m_len += sizeof(struct ether_header); m->m_data -= sizeof(struct ether_header); } struct mbuf * ether_vlanencap(struct mbuf *m, uint16_t tag) { struct ether_vlan_header *evl; M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); if (m == NULL) return (NULL); /* M_PREPEND takes care of m_len, m_pkthdr.len for us */ if (m->m_len < sizeof(*evl)) { m = m_pullup(m, sizeof(*evl)); if (m == NULL) return (NULL); } /* * Transform the Ethernet header into an Ethernet header * with 802.1Q encapsulation. */ evl = mtod(m, struct ether_vlan_header *); bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); evl->evl_encap_proto = htons(ETHERTYPE_VLAN); evl->evl_tag = htons(tag); return (m); } static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, "IEEE 802.1Q VLAN"); static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency"); VNET_DEFINE_STATIC(int, soft_pad); #define V_soft_pad VNET(soft_pad) SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(soft_pad), 0, "pad short frames before tagging"); /* * For now, make preserving PCP via an mbuf tag optional, as it increases * per-packet memory allocations and frees. In the future, it would be * preferable to reuse ether_vtag for this, or similar. */ int vlan_mtag_pcp = 0; SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW, &vlan_mtag_pcp, 0, "Retain VLAN PCP information as packets are passed up the stack"); bool ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p, uint16_t vid, uint8_t pcp) { struct m_tag *mtag; int n; uint16_t tag; static const char pad[8]; /* just zeros */ /* * Pad the frame to the minimum size allowed if told to. * This option is in accord with IEEE Std 802.1Q, 2003 Ed., * paragraph C.4.4.3.b. It can help to work around buggy * bridges that violate paragraph C.4.4.3.a from the same * document, i.e., fail to pad short frames after untagging. * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but * untagging it will produce a 62-byte frame, which is a runt * and requires padding. There are VLAN-enabled network * devices that just discard such runts instead or mishandle * them somehow. */ if (V_soft_pad && p->if_type == IFT_ETHER) { for (n = ETHERMIN + ETHER_HDR_LEN - (*mp)->m_pkthdr.len; n > 0; n -= sizeof(pad)) { if (!m_append(*mp, min(n, sizeof(pad)), pad)) break; } if (n > 0) { m_freem(*mp); *mp = NULL; if_printf(ife, "cannot pad short frame"); return (false); } } /* * If underlying interface can do VLAN tag insertion itself, * just pass the packet along. However, we need some way to * tell the interface where the packet came from so that it * knows how to find the VLAN tag to use, so we attach a * packet tag that holds it. */ if (vlan_mtag_pcp && (mtag = m_tag_locate(*mp, MTAG_8021Q, MTAG_8021Q_PCP_OUT, NULL)) != NULL) tag = EVL_MAKETAG(vid, *(uint8_t *)(mtag + 1), 0); else tag = EVL_MAKETAG(vid, pcp, 0); if (p->if_capenable & IFCAP_VLAN_HWTAGGING) { (*mp)->m_pkthdr.ether_vtag = tag; (*mp)->m_flags |= M_VLANTAG; } else { *mp = ether_vlanencap(*mp, tag); if (*mp == NULL) { if_printf(ife, "unable to prepend 802.1Q header"); return (false); } } return (true); } +void +ether_fakeaddr(struct ether_addr *hwaddr) +{ + + /* + * Generate a non-multicast, locally administered address. + * + * BMV: Should we use the FreeBSD OUI range instead? + */ + arc4rand(hwaddr->octet, ETHER_ADDR_LEN, 1); + hwaddr->octet[0] &= ~1; + hwaddr->octet[0] |= 2; +} + DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(ether, 1); Index: head/sys/net/if_vxlan.c =================================================================== --- head/sys/net/if_vxlan.c (revision 345138) +++ head/sys/net/if_vxlan.c (revision 345139) @@ -1,3191 +1,3176 @@ /*- * Copyright (c) 2014, Bryan Venteicher * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opt_inet.h" #include "opt_inet6.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct vxlan_softc; LIST_HEAD(vxlan_softc_head, vxlan_softc); struct vxlan_socket_mc_info { union vxlan_sockaddr vxlsomc_saddr; union vxlan_sockaddr vxlsomc_gaddr; int vxlsomc_ifidx; int vxlsomc_users; }; #define VXLAN_SO_MC_MAX_GROUPS 32 #define VXLAN_SO_VNI_HASH_SHIFT 6 #define VXLAN_SO_VNI_HASH_SIZE (1 << VXLAN_SO_VNI_HASH_SHIFT) #define VXLAN_SO_VNI_HASH(_vni) ((_vni) % VXLAN_SO_VNI_HASH_SIZE) struct vxlan_socket { struct socket *vxlso_sock; struct rmlock vxlso_lock; u_int vxlso_refcnt; union vxlan_sockaddr vxlso_laddr; LIST_ENTRY(vxlan_socket) vxlso_entry; struct vxlan_softc_head vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE]; struct vxlan_socket_mc_info vxlso_mc[VXLAN_SO_MC_MAX_GROUPS]; }; #define VXLAN_SO_RLOCK(_vso, _p) rm_rlock(&(_vso)->vxlso_lock, (_p)) #define VXLAN_SO_RUNLOCK(_vso, _p) rm_runlock(&(_vso)->vxlso_lock, (_p)) #define VXLAN_SO_WLOCK(_vso) rm_wlock(&(_vso)->vxlso_lock) #define VXLAN_SO_WUNLOCK(_vso) rm_wunlock(&(_vso)->vxlso_lock) #define VXLAN_SO_LOCK_ASSERT(_vso) \ rm_assert(&(_vso)->vxlso_lock, RA_LOCKED) #define VXLAN_SO_LOCK_WASSERT(_vso) \ rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED) #define VXLAN_SO_ACQUIRE(_vso) refcount_acquire(&(_vso)->vxlso_refcnt) #define VXLAN_SO_RELEASE(_vso) refcount_release(&(_vso)->vxlso_refcnt) struct vxlan_ftable_entry { LIST_ENTRY(vxlan_ftable_entry) vxlfe_hash; uint16_t vxlfe_flags; uint8_t vxlfe_mac[ETHER_ADDR_LEN]; union vxlan_sockaddr vxlfe_raddr; time_t vxlfe_expire; }; #define VXLAN_FE_FLAG_DYNAMIC 0x01 #define VXLAN_FE_FLAG_STATIC 0x02 #define VXLAN_FE_IS_DYNAMIC(_fe) \ ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC) #define VXLAN_SC_FTABLE_SHIFT 9 #define VXLAN_SC_FTABLE_SIZE (1 << VXLAN_SC_FTABLE_SHIFT) #define VXLAN_SC_FTABLE_MASK (VXLAN_SC_FTABLE_SIZE - 1) #define VXLAN_SC_FTABLE_HASH(_sc, _mac) \ (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE) LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry); struct vxlan_statistics { uint32_t ftable_nospace; uint32_t ftable_lock_upgrade_failed; }; struct vxlan_softc { struct ifnet *vxl_ifp; struct vxlan_socket *vxl_sock; uint32_t vxl_vni; union vxlan_sockaddr vxl_src_addr; union vxlan_sockaddr vxl_dst_addr; uint32_t vxl_flags; #define VXLAN_FLAG_INIT 0x0001 #define VXLAN_FLAG_TEARDOWN 0x0002 #define VXLAN_FLAG_LEARN 0x0004 uint32_t vxl_port_hash_key; uint16_t vxl_min_port; uint16_t vxl_max_port; uint8_t vxl_ttl; /* Lookup table from MAC address to forwarding entry. */ uint32_t vxl_ftable_cnt; uint32_t vxl_ftable_max; uint32_t vxl_ftable_timeout; uint32_t vxl_ftable_hash_key; struct vxlan_ftable_head *vxl_ftable; /* Derived from vxl_dst_addr. */ struct vxlan_ftable_entry vxl_default_fe; struct ip_moptions *vxl_im4o; struct ip6_moptions *vxl_im6o; struct rmlock vxl_lock; volatile u_int vxl_refcnt; int vxl_unit; int vxl_vso_mc_index; struct vxlan_statistics vxl_stats; struct sysctl_oid *vxl_sysctl_node; struct sysctl_ctx_list vxl_sysctl_ctx; struct callout vxl_callout; - uint8_t vxl_hwaddr[ETHER_ADDR_LEN]; + struct ether_addr vxl_hwaddr; int vxl_mc_ifindex; struct ifnet *vxl_mc_ifp; struct ifmedia vxl_media; char vxl_mc_ifname[IFNAMSIZ]; LIST_ENTRY(vxlan_softc) vxl_entry; LIST_ENTRY(vxlan_softc) vxl_ifdetach_list; }; #define VXLAN_RLOCK(_sc, _p) rm_rlock(&(_sc)->vxl_lock, (_p)) #define VXLAN_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->vxl_lock, (_p)) #define VXLAN_WLOCK(_sc) rm_wlock(&(_sc)->vxl_lock) #define VXLAN_WUNLOCK(_sc) rm_wunlock(&(_sc)->vxl_lock) #define VXLAN_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->vxl_lock) #define VXLAN_LOCK_ASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_LOCKED) #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED) #define VXLAN_UNLOCK(_sc, _p) do { \ if (VXLAN_LOCK_WOWNED(_sc)) \ VXLAN_WUNLOCK(_sc); \ else \ VXLAN_RUNLOCK(_sc, _p); \ } while (0) #define VXLAN_ACQUIRE(_sc) refcount_acquire(&(_sc)->vxl_refcnt) #define VXLAN_RELEASE(_sc) refcount_release(&(_sc)->vxl_refcnt) #define satoconstsin(sa) ((const struct sockaddr_in *)(sa)) #define satoconstsin6(sa) ((const struct sockaddr_in6 *)(sa)) struct vxlanudphdr { struct udphdr vxlh_udp; struct vxlan_header vxlh_hdr; } __packed; static int vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *); static void vxlan_ftable_init(struct vxlan_softc *); static void vxlan_ftable_fini(struct vxlan_softc *); static void vxlan_ftable_flush(struct vxlan_softc *, int); static void vxlan_ftable_expire(struct vxlan_softc *); static int vxlan_ftable_update_locked(struct vxlan_softc *, const union vxlan_sockaddr *, const uint8_t *, struct rm_priotracker *); static int vxlan_ftable_learn(struct vxlan_softc *, const struct sockaddr *, const uint8_t *); static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS); static struct vxlan_ftable_entry * vxlan_ftable_entry_alloc(void); static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *); static void vxlan_ftable_entry_init(struct vxlan_softc *, struct vxlan_ftable_entry *, const uint8_t *, const struct sockaddr *, uint32_t); static void vxlan_ftable_entry_destroy(struct vxlan_softc *, struct vxlan_ftable_entry *); static int vxlan_ftable_entry_insert(struct vxlan_softc *, struct vxlan_ftable_entry *); static struct vxlan_ftable_entry * vxlan_ftable_entry_lookup(struct vxlan_softc *, const uint8_t *); static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *, struct sbuf *); static struct vxlan_socket * vxlan_socket_alloc(const union vxlan_sockaddr *); static void vxlan_socket_destroy(struct vxlan_socket *); static void vxlan_socket_release(struct vxlan_socket *); static struct vxlan_socket * vxlan_socket_lookup(union vxlan_sockaddr *vxlsa); static void vxlan_socket_insert(struct vxlan_socket *); static int vxlan_socket_init(struct vxlan_socket *, struct ifnet *); static int vxlan_socket_bind(struct vxlan_socket *, struct ifnet *); static int vxlan_socket_create(struct ifnet *, int, const union vxlan_sockaddr *, struct vxlan_socket **); static void vxlan_socket_ifdetach(struct vxlan_socket *, struct ifnet *, struct vxlan_softc_head *); static struct vxlan_socket * vxlan_socket_mc_lookup(const union vxlan_sockaddr *); static int vxlan_sockaddr_mc_info_match( const struct vxlan_socket_mc_info *, const union vxlan_sockaddr *, const union vxlan_sockaddr *, int); static int vxlan_socket_mc_join_group(struct vxlan_socket *, const union vxlan_sockaddr *, const union vxlan_sockaddr *, int *, union vxlan_sockaddr *); static int vxlan_socket_mc_leave_group(struct vxlan_socket *, const union vxlan_sockaddr *, const union vxlan_sockaddr *, int); static int vxlan_socket_mc_add_group(struct vxlan_socket *, const union vxlan_sockaddr *, const union vxlan_sockaddr *, int, int *); static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *, int); static struct vxlan_softc * vxlan_socket_lookup_softc_locked(struct vxlan_socket *, uint32_t); static struct vxlan_softc * vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t); static int vxlan_socket_insert_softc(struct vxlan_socket *, struct vxlan_softc *); static void vxlan_socket_remove_softc(struct vxlan_socket *, struct vxlan_softc *); static struct ifnet * vxlan_multicast_if_ref(struct vxlan_softc *, int); static void vxlan_free_multicast(struct vxlan_softc *); static int vxlan_setup_multicast_interface(struct vxlan_softc *); static int vxlan_setup_multicast(struct vxlan_softc *); static int vxlan_setup_socket(struct vxlan_softc *); static void vxlan_setup_interface(struct vxlan_softc *); static int vxlan_valid_init_config(struct vxlan_softc *); static void vxlan_init_wait(struct vxlan_softc *); static void vxlan_init_complete(struct vxlan_softc *); static void vxlan_init(void *); static void vxlan_release(struct vxlan_softc *); static void vxlan_teardown_wait(struct vxlan_softc *); static void vxlan_teardown_complete(struct vxlan_softc *); static void vxlan_teardown_locked(struct vxlan_softc *); static void vxlan_teardown(struct vxlan_softc *); static void vxlan_ifdetach(struct vxlan_softc *, struct ifnet *, struct vxlan_softc_head *); static void vxlan_timer(void *); static int vxlan_ctrl_get_config(struct vxlan_softc *, void *); static int vxlan_ctrl_set_vni(struct vxlan_softc *, void *); static int vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *); static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *); static int vxlan_ctrl_set_local_port(struct vxlan_softc *, void *); static int vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *); static int vxlan_ctrl_set_port_range(struct vxlan_softc *, void *); static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *); static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *); static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *); static int vxlan_ctrl_set_ttl(struct vxlan_softc *, void *); static int vxlan_ctrl_set_learn(struct vxlan_softc *, void *); static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *); static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *); static int vxlan_ctrl_flush(struct vxlan_softc *, void *); static int vxlan_ioctl_drvspec(struct vxlan_softc *, struct ifdrv *, int); static int vxlan_ioctl_ifflags(struct vxlan_softc *); static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); #if defined(INET) || defined(INET6) static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *); static void vxlan_encap_header(struct vxlan_softc *, struct mbuf *, int, uint16_t, uint16_t); #endif static int vxlan_encap4(struct vxlan_softc *, const union vxlan_sockaddr *, struct mbuf *); static int vxlan_encap6(struct vxlan_softc *, const union vxlan_sockaddr *, struct mbuf *); static int vxlan_transmit(struct ifnet *, struct mbuf *); static void vxlan_qflush(struct ifnet *); static void vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *, const struct sockaddr *, void *); static int vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **, const struct sockaddr *); static void vxlan_set_default_config(struct vxlan_softc *); static int vxlan_set_user_config(struct vxlan_softc *, struct ifvxlanparam *); static int vxlan_clone_create(struct if_clone *, int, caddr_t); static void vxlan_clone_destroy(struct ifnet *); static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *); -static void vxlan_fakeaddr(struct vxlan_softc *); static int vxlan_media_change(struct ifnet *); static void vxlan_media_status(struct ifnet *, struct ifmediareq *); static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *, const struct sockaddr *); static void vxlan_sockaddr_copy(union vxlan_sockaddr *, const struct sockaddr *); static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *, const struct sockaddr *); static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *, const struct sockaddr *); static int vxlan_sockaddr_supported(const union vxlan_sockaddr *, int); static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *); static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *); static int vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *); static int vxlan_can_change_config(struct vxlan_softc *); static int vxlan_check_vni(uint32_t); static int vxlan_check_ttl(int); static int vxlan_check_ftable_timeout(uint32_t); static int vxlan_check_ftable_max(uint32_t); static void vxlan_sysctl_setup(struct vxlan_softc *); static void vxlan_sysctl_destroy(struct vxlan_softc *); static int vxlan_tunable_int(struct vxlan_softc *, const char *, int); static void vxlan_ifdetach_event(void *, struct ifnet *); static void vxlan_load(void); static void vxlan_unload(void); static int vxlan_modevent(module_t, int, void *); static const char vxlan_name[] = "vxlan"; static MALLOC_DEFINE(M_VXLAN, vxlan_name, "Virtual eXtensible LAN Interface"); static struct if_clone *vxlan_cloner; static struct mtx vxlan_list_mtx; #define VXLAN_LIST_LOCK() mtx_lock(&vxlan_list_mtx) #define VXLAN_LIST_UNLOCK() mtx_unlock(&vxlan_list_mtx) static LIST_HEAD(, vxlan_socket) vxlan_socket_list; static eventhandler_tag vxlan_ifdetach_event_tag; SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW, 0, "Virtual eXtensible Local Area Network"); static int vxlan_legacy_port = 0; TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port); static int vxlan_reuse_port = 0; TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port); /* Default maximum number of addresses in the forwarding table. */ #ifndef VXLAN_FTABLE_MAX #define VXLAN_FTABLE_MAX 2000 #endif /* Timeout (in seconds) of addresses learned in the forwarding table. */ #ifndef VXLAN_FTABLE_TIMEOUT #define VXLAN_FTABLE_TIMEOUT (20 * 60) #endif /* * Maximum timeout (in seconds) of addresses learned in the forwarding * table. */ #ifndef VXLAN_FTABLE_MAX_TIMEOUT #define VXLAN_FTABLE_MAX_TIMEOUT (60 * 60 * 24) #endif /* Number of seconds between pruning attempts of the forwarding table. */ #ifndef VXLAN_FTABLE_PRUNE #define VXLAN_FTABLE_PRUNE (5 * 60) #endif static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE; struct vxlan_control { int (*vxlc_func)(struct vxlan_softc *, void *); int vxlc_argsize; int vxlc_flags; #define VXLAN_CTRL_FLAG_COPYIN 0x01 #define VXLAN_CTRL_FLAG_COPYOUT 0x02 #define VXLAN_CTRL_FLAG_SUSER 0x04 }; static const struct vxlan_control vxlan_control_table[] = { [VXLAN_CMD_GET_CONFIG] = { vxlan_ctrl_get_config, sizeof(struct ifvxlancfg), VXLAN_CTRL_FLAG_COPYOUT }, [VXLAN_CMD_SET_VNI] = { vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_LOCAL_ADDR] = { vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_REMOTE_ADDR] = { vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_LOCAL_PORT] = { vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_REMOTE_PORT] = { vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_PORT_RANGE] = { vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_FTABLE_TIMEOUT] = { vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_FTABLE_MAX] = { vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_MULTICAST_IF] = { vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_TTL] = { vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_LEARN] = { vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_FTABLE_ENTRY_ADD] = { vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_FTABLE_ENTRY_REM] = { vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_FLUSH] = { vxlan_ctrl_flush, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, }; static const int vxlan_control_table_size = nitems(vxlan_control_table); static int vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b) { int i, d; for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) d = ((int)a[i]) - ((int)b[i]); return (d); } static void vxlan_ftable_init(struct vxlan_softc *sc) { int i; sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) * VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK); for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) LIST_INIT(&sc->vxl_ftable[i]); sc->vxl_ftable_hash_key = arc4random(); } static void vxlan_ftable_fini(struct vxlan_softc *sc) { int i; for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]), ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i)); } MPASS(sc->vxl_ftable_cnt == 0); free(sc->vxl_ftable, M_VXLAN); sc->vxl_ftable = NULL; } static void vxlan_ftable_flush(struct vxlan_softc *sc, int all) { struct vxlan_ftable_entry *fe, *tfe; int i; for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { if (all || VXLAN_FE_IS_DYNAMIC(fe)) vxlan_ftable_entry_destroy(sc, fe); } } } static void vxlan_ftable_expire(struct vxlan_softc *sc) { struct vxlan_ftable_entry *fe, *tfe; int i; VXLAN_LOCK_WASSERT(sc); for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { if (VXLAN_FE_IS_DYNAMIC(fe) && time_uptime >= fe->vxlfe_expire) vxlan_ftable_entry_destroy(sc, fe); } } } static int vxlan_ftable_update_locked(struct vxlan_softc *sc, const union vxlan_sockaddr *vxlsa, const uint8_t *mac, struct rm_priotracker *tracker) { struct vxlan_ftable_entry *fe; int error __unused; VXLAN_LOCK_ASSERT(sc); again: /* * A forwarding entry for this MAC address might already exist. If * so, update it, otherwise create a new one. We may have to upgrade * the lock if we have to change or create an entry. */ fe = vxlan_ftable_entry_lookup(sc, mac); if (fe != NULL) { fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; if (!VXLAN_FE_IS_DYNAMIC(fe) || vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa)) return (0); if (!VXLAN_LOCK_WOWNED(sc)) { VXLAN_RUNLOCK(sc, tracker); VXLAN_WLOCK(sc); sc->vxl_stats.ftable_lock_upgrade_failed++; goto again; } vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa); return (0); } if (!VXLAN_LOCK_WOWNED(sc)) { VXLAN_RUNLOCK(sc, tracker); VXLAN_WLOCK(sc); sc->vxl_stats.ftable_lock_upgrade_failed++; goto again; } if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) { sc->vxl_stats.ftable_nospace++; return (ENOSPC); } fe = vxlan_ftable_entry_alloc(); if (fe == NULL) return (ENOMEM); vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC); /* The prior lookup failed, so the insert should not. */ error = vxlan_ftable_entry_insert(sc, fe); MPASS(error == 0); return (0); } static int vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa, const uint8_t *mac) { struct rm_priotracker tracker; union vxlan_sockaddr vxlsa; int error; /* * The source port may be randomly selected by the remote host, so * use the port of the default destination address. */ vxlan_sockaddr_copy(&vxlsa, sa); vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { error = vxlan_sockaddr_in6_embedscope(&vxlsa); if (error) return (error); } VXLAN_RLOCK(sc, &tracker); error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker); VXLAN_UNLOCK(sc, &tracker); return (error); } static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS) { struct rm_priotracker tracker; struct sbuf sb; struct vxlan_softc *sc; struct vxlan_ftable_entry *fe; size_t size; int i, error; /* * This is mostly intended for debugging during development. It is * not practical to dump an entire large table this way. */ sc = arg1; size = PAGE_SIZE; /* Calculate later. */ sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN); sbuf_putc(&sb, '\n'); VXLAN_RLOCK(sc, &tracker); for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) { if (sbuf_error(&sb) != 0) break; vxlan_ftable_entry_dump(fe, &sb); } } VXLAN_RUNLOCK(sc, &tracker); if (sbuf_len(&sb) == 1) sbuf_setpos(&sb, 0); sbuf_finish(&sb); error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); sbuf_delete(&sb); return (error); } static struct vxlan_ftable_entry * vxlan_ftable_entry_alloc(void) { struct vxlan_ftable_entry *fe; fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT); return (fe); } static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe) { free(fe, M_VXLAN); } static void vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe, const uint8_t *mac, const struct sockaddr *sa, uint32_t flags) { fe->vxlfe_flags = flags; fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN); vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa); } static void vxlan_ftable_entry_destroy(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe) { sc->vxl_ftable_cnt--; LIST_REMOVE(fe, vxlfe_hash); vxlan_ftable_entry_free(fe); } static int vxlan_ftable_entry_insert(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe) { struct vxlan_ftable_entry *lfe; uint32_t hash; int dir; VXLAN_LOCK_WASSERT(sc); hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac); lfe = LIST_FIRST(&sc->vxl_ftable[hash]); if (lfe == NULL) { LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash); goto out; } do { dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac); if (dir == 0) return (EEXIST); if (dir > 0) { LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash); goto out; } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) { LIST_INSERT_AFTER(lfe, fe, vxlfe_hash); goto out; } else lfe = LIST_NEXT(lfe, vxlfe_hash); } while (lfe != NULL); out: sc->vxl_ftable_cnt++; return (0); } static struct vxlan_ftable_entry * vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac) { struct vxlan_ftable_entry *fe; uint32_t hash; int dir; VXLAN_LOCK_ASSERT(sc); hash = VXLAN_SC_FTABLE_HASH(sc, mac); LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) { dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac); if (dir == 0) return (fe); if (dir > 0) break; } return (NULL); } static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb) { char buf[64]; const union vxlan_sockaddr *sa; const void *addr; int i, len, af, width; sa = &fe->vxlfe_raddr; af = sa->sa.sa_family; len = sbuf_len(sb); sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S', fe->vxlfe_flags); for (i = 0; i < ETHER_ADDR_LEN - 1; i++) sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]); sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]); if (af == AF_INET) { addr = &sa->in4.sin_addr; width = INET_ADDRSTRLEN - 1; } else { addr = &sa->in6.sin6_addr; width = INET6_ADDRSTRLEN - 1; } inet_ntop(af, addr, buf, sizeof(buf)); sbuf_printf(sb, "%*s ", width, buf); sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire); sbuf_putc(sb, '\n'); /* Truncate a partial line. */ if (sbuf_error(sb) != 0) sbuf_setpos(sb, len); } static struct vxlan_socket * vxlan_socket_alloc(const union vxlan_sockaddr *sa) { struct vxlan_socket *vso; int i; vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO); rm_init(&vso->vxlso_lock, "vxlansorm"); refcount_init(&vso->vxlso_refcnt, 0); for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) LIST_INIT(&vso->vxlso_vni_hash[i]); vso->vxlso_laddr = *sa; return (vso); } static void vxlan_socket_destroy(struct vxlan_socket *vso) { struct socket *so; #ifdef INVARIANTS int i; struct vxlan_socket_mc_info *mc; for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { mc = &vso->vxlso_mc[i]; KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC, ("%s: socket %p mc[%d] still has address", __func__, vso, i)); } for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]), ("%s: socket %p vni_hash[%d] not empty", __func__, vso, i)); } #endif so = vso->vxlso_sock; if (so != NULL) { vso->vxlso_sock = NULL; soclose(so); } rm_destroy(&vso->vxlso_lock); free(vso, M_VXLAN); } static void vxlan_socket_release(struct vxlan_socket *vso) { int destroy; VXLAN_LIST_LOCK(); destroy = VXLAN_SO_RELEASE(vso); if (destroy != 0) LIST_REMOVE(vso, vxlso_entry); VXLAN_LIST_UNLOCK(); if (destroy != 0) vxlan_socket_destroy(vso); } static struct vxlan_socket * vxlan_socket_lookup(union vxlan_sockaddr *vxlsa) { struct vxlan_socket *vso; VXLAN_LIST_LOCK(); LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) { if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) { VXLAN_SO_ACQUIRE(vso); break; } } VXLAN_LIST_UNLOCK(); return (vso); } static void vxlan_socket_insert(struct vxlan_socket *vso) { VXLAN_LIST_LOCK(); VXLAN_SO_ACQUIRE(vso); LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry); VXLAN_LIST_UNLOCK(); } static int vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp) { struct thread *td; int error; td = curthread; error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock, SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td); if (error) { if_printf(ifp, "cannot create socket: %d\n", error); return (error); } error = udp_set_kernel_tunneling(vso->vxlso_sock, vxlan_rcv_udp_packet, NULL, vso); if (error) { if_printf(ifp, "cannot set tunneling function: %d\n", error); return (error); } if (vxlan_reuse_port != 0) { struct sockopt sopt; int val = 1; bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_IP; sopt.sopt_name = SO_REUSEPORT; sopt.sopt_val = &val; sopt.sopt_valsize = sizeof(val); error = sosetopt(vso->vxlso_sock, &sopt); if (error) { if_printf(ifp, "cannot set REUSEADDR socket opt: %d\n", error); return (error); } } return (0); } static int vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp) { union vxlan_sockaddr laddr; struct thread *td; int error; td = curthread; laddr = vso->vxlso_laddr; error = sobind(vso->vxlso_sock, &laddr.sa, td); if (error) { if (error != EADDRINUSE) if_printf(ifp, "cannot bind socket: %d\n", error); return (error); } return (0); } static int vxlan_socket_create(struct ifnet *ifp, int multicast, const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop) { union vxlan_sockaddr laddr; struct vxlan_socket *vso; int error; laddr = *saddr; /* * If this socket will be multicast, then only the local port * must be specified when binding. */ if (multicast != 0) { if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) laddr.in4.sin_addr.s_addr = INADDR_ANY; #ifdef INET6 else laddr.in6.sin6_addr = in6addr_any; #endif } vso = vxlan_socket_alloc(&laddr); if (vso == NULL) return (ENOMEM); error = vxlan_socket_init(vso, ifp); if (error) goto fail; error = vxlan_socket_bind(vso, ifp); if (error) goto fail; /* * There is a small window between the bind completing and * inserting the socket, so that a concurrent create may fail. * Let's not worry about that for now. */ vxlan_socket_insert(vso); *vsop = vso; return (0); fail: vxlan_socket_destroy(vso); return (error); } static void vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp, struct vxlan_softc_head *list) { struct rm_priotracker tracker; struct vxlan_softc *sc; int i; VXLAN_SO_RLOCK(vso, &tracker); for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry) vxlan_ifdetach(sc, ifp, list); } VXLAN_SO_RUNLOCK(vso, &tracker); } static struct vxlan_socket * vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa) { union vxlan_sockaddr laddr; struct vxlan_socket *vso; laddr = *vxlsa; if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) laddr.in4.sin_addr.s_addr = INADDR_ANY; #ifdef INET6 else laddr.in6.sin6_addr = in6addr_any; #endif vso = vxlan_socket_lookup(&laddr); return (vso); } static int vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc, const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, int ifidx) { if (!vxlan_sockaddr_in_any(local) && !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa)) return (0); if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa)) return (0); if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx) return (0); return (1); } static int vxlan_socket_mc_join_group(struct vxlan_socket *vso, const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, int *ifidx, union vxlan_sockaddr *source) { struct sockopt sopt; int error; *source = *local; if (VXLAN_SOCKADDR_IS_IPV4(group)) { struct ip_mreq mreq; mreq.imr_multiaddr = group->in4.sin_addr; mreq.imr_interface = local->in4.sin_addr; bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_IP; sopt.sopt_name = IP_ADD_MEMBERSHIP; sopt.sopt_val = &mreq; sopt.sopt_valsize = sizeof(mreq); error = sosetopt(vso->vxlso_sock, &sopt); if (error) return (error); /* * BMV: Ideally, there would be a formal way for us to get * the local interface that was selected based on the * imr_interface address. We could then update *ifidx so * vxlan_sockaddr_mc_info_match() would return a match for * later creates that explicitly set the multicast interface. * * If we really need to, we can of course look in the INP's * membership list: * sotoinpcb(vso->vxlso_sock)->inp_moptions-> * imo_membership[]->inm_ifp * similarly to imo_match_group(). */ source->in4.sin_addr = local->in4.sin_addr; } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { struct ipv6_mreq mreq; mreq.ipv6mr_multiaddr = group->in6.sin6_addr; mreq.ipv6mr_interface = *ifidx; bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_IPV6; sopt.sopt_name = IPV6_JOIN_GROUP; sopt.sopt_val = &mreq; sopt.sopt_valsize = sizeof(mreq); error = sosetopt(vso->vxlso_sock, &sopt); if (error) return (error); /* * BMV: As with IPv4, we would really like to know what * interface in6p_lookup_mcast_ifp() selected. */ } else error = EAFNOSUPPORT; return (error); } static int vxlan_socket_mc_leave_group(struct vxlan_socket *vso, const union vxlan_sockaddr *group, const union vxlan_sockaddr *source, int ifidx) { struct sockopt sopt; int error; bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; if (VXLAN_SOCKADDR_IS_IPV4(group)) { struct ip_mreq mreq; mreq.imr_multiaddr = group->in4.sin_addr; mreq.imr_interface = source->in4.sin_addr; sopt.sopt_level = IPPROTO_IP; sopt.sopt_name = IP_DROP_MEMBERSHIP; sopt.sopt_val = &mreq; sopt.sopt_valsize = sizeof(mreq); error = sosetopt(vso->vxlso_sock, &sopt); } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { struct ipv6_mreq mreq; mreq.ipv6mr_multiaddr = group->in6.sin6_addr; mreq.ipv6mr_interface = ifidx; sopt.sopt_level = IPPROTO_IPV6; sopt.sopt_name = IPV6_LEAVE_GROUP; sopt.sopt_val = &mreq; sopt.sopt_valsize = sizeof(mreq); error = sosetopt(vso->vxlso_sock, &sopt); } else error = EAFNOSUPPORT; return (error); } static int vxlan_socket_mc_add_group(struct vxlan_socket *vso, const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, int ifidx, int *idx) { union vxlan_sockaddr source; struct vxlan_socket_mc_info *mc; int i, empty, error; /* * Within a socket, the same multicast group may be used by multiple * interfaces, each with a different network identifier. But a socket * may only join a multicast group once, so keep track of the users * here. */ VXLAN_SO_WLOCK(vso); for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { mc = &vso->vxlso_mc[i]; if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { empty++; continue; } if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx)) goto out; } VXLAN_SO_WUNLOCK(vso); if (empty == 0) return (ENOSPC); error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source); if (error) return (error); VXLAN_SO_WLOCK(vso); for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { mc = &vso->vxlso_mc[i]; if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa); vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa); mc->vxlsomc_ifidx = ifidx; goto out; } } VXLAN_SO_WUNLOCK(vso); error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx); MPASS(error == 0); return (ENOSPC); out: mc->vxlsomc_users++; VXLAN_SO_WUNLOCK(vso); *idx = i; return (0); } static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx) { union vxlan_sockaddr group, source; struct vxlan_socket_mc_info *mc; int ifidx, leave; KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS, ("%s: vso %p idx %d out of bounds", __func__, vso, idx)); leave = 0; mc = &vso->vxlso_mc[idx]; VXLAN_SO_WLOCK(vso); mc->vxlsomc_users--; if (mc->vxlsomc_users == 0) { group = mc->vxlsomc_gaddr; source = mc->vxlsomc_saddr; ifidx = mc->vxlsomc_ifidx; bzero(mc, sizeof(*mc)); leave = 1; } VXLAN_SO_WUNLOCK(vso); if (leave != 0) { /* * Our socket's membership in this group may have already * been removed if we joined through an interface that's * been detached. */ vxlan_socket_mc_leave_group(vso, &group, &source, ifidx); } } static struct vxlan_softc * vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni) { struct vxlan_softc *sc; uint32_t hash; VXLAN_SO_LOCK_ASSERT(vso); hash = VXLAN_SO_VNI_HASH(vni); LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) { if (sc->vxl_vni == vni) { VXLAN_ACQUIRE(sc); break; } } return (sc); } static struct vxlan_softc * vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni) { struct rm_priotracker tracker; struct vxlan_softc *sc; VXLAN_SO_RLOCK(vso, &tracker); sc = vxlan_socket_lookup_softc_locked(vso, vni); VXLAN_SO_RUNLOCK(vso, &tracker); return (sc); } static int vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) { struct vxlan_softc *tsc; uint32_t vni, hash; vni = sc->vxl_vni; hash = VXLAN_SO_VNI_HASH(vni); VXLAN_SO_WLOCK(vso); tsc = vxlan_socket_lookup_softc_locked(vso, vni); if (tsc != NULL) { VXLAN_SO_WUNLOCK(vso); vxlan_release(tsc); return (EEXIST); } VXLAN_ACQUIRE(sc); LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry); VXLAN_SO_WUNLOCK(vso); return (0); } static void vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) { VXLAN_SO_WLOCK(vso); LIST_REMOVE(sc, vxl_entry); VXLAN_SO_WUNLOCK(vso); vxlan_release(sc); } static struct ifnet * vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4) { struct ifnet *ifp; VXLAN_LOCK_ASSERT(sc); if (ipv4 && sc->vxl_im4o != NULL) ifp = sc->vxl_im4o->imo_multicast_ifp; else if (!ipv4 && sc->vxl_im6o != NULL) ifp = sc->vxl_im6o->im6o_multicast_ifp; else ifp = NULL; if (ifp != NULL) if_ref(ifp); return (ifp); } static void vxlan_free_multicast(struct vxlan_softc *sc) { if (sc->vxl_mc_ifp != NULL) { if_rele(sc->vxl_mc_ifp); sc->vxl_mc_ifp = NULL; sc->vxl_mc_ifindex = 0; } if (sc->vxl_im4o != NULL) { free(sc->vxl_im4o, M_VXLAN); sc->vxl_im4o = NULL; } if (sc->vxl_im6o != NULL) { free(sc->vxl_im6o, M_VXLAN); sc->vxl_im6o = NULL; } } static int vxlan_setup_multicast_interface(struct vxlan_softc *sc) { struct ifnet *ifp; ifp = ifunit_ref(sc->vxl_mc_ifname); if (ifp == NULL) { if_printf(sc->vxl_ifp, "multicast interface %s does " "not exist\n", sc->vxl_mc_ifname); return (ENOENT); } if ((ifp->if_flags & IFF_MULTICAST) == 0) { if_printf(sc->vxl_ifp, "interface %s does not support " "multicast\n", sc->vxl_mc_ifname); if_rele(ifp); return (ENOTSUP); } sc->vxl_mc_ifp = ifp; sc->vxl_mc_ifindex = ifp->if_index; return (0); } static int vxlan_setup_multicast(struct vxlan_softc *sc) { const union vxlan_sockaddr *group; int error; group = &sc->vxl_dst_addr; error = 0; if (sc->vxl_mc_ifname[0] != '\0') { error = vxlan_setup_multicast_interface(sc); if (error) return (error); } /* * Initialize an multicast options structure that is sufficiently * populated for use in the respective IP output routine. This * structure is typically stored in the socket, but our sockets * may be shared among multiple interfaces. */ if (VXLAN_SOCKADDR_IS_IPV4(group)) { sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN, M_ZERO | M_WAITOK); sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp; sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; sc->vxl_im4o->imo_multicast_vif = -1; } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN, M_ZERO | M_WAITOK); sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp; sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; } return (error); } static int vxlan_setup_socket(struct vxlan_softc *sc) { struct vxlan_socket *vso; struct ifnet *ifp; union vxlan_sockaddr *saddr, *daddr; int multicast, error; vso = NULL; ifp = sc->vxl_ifp; saddr = &sc->vxl_src_addr; daddr = &sc->vxl_dst_addr; multicast = vxlan_sockaddr_in_multicast(daddr); MPASS(multicast != -1); sc->vxl_vso_mc_index = -1; /* * Try to create the socket. If that fails, attempt to use an * existing socket. */ error = vxlan_socket_create(ifp, multicast, saddr, &vso); if (error) { if (multicast != 0) vso = vxlan_socket_mc_lookup(saddr); else vso = vxlan_socket_lookup(saddr); if (vso == NULL) { if_printf(ifp, "cannot create socket (error: %d), " "and no existing socket found\n", error); goto out; } } if (multicast != 0) { error = vxlan_setup_multicast(sc); if (error) goto out; error = vxlan_socket_mc_add_group(vso, daddr, saddr, sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index); if (error) goto out; } sc->vxl_sock = vso; error = vxlan_socket_insert_softc(vso, sc); if (error) { sc->vxl_sock = NULL; if_printf(ifp, "network identifier %d already exists in " "this socket\n", sc->vxl_vni); goto out; } return (0); out: if (vso != NULL) { if (sc->vxl_vso_mc_index != -1) { vxlan_socket_mc_release_group_by_idx(vso, sc->vxl_vso_mc_index); sc->vxl_vso_mc_index = -1; } if (multicast != 0) vxlan_free_multicast(sc); vxlan_socket_release(vso); } return (error); } static void vxlan_setup_interface(struct vxlan_softc *sc) { struct ifnet *ifp; ifp = sc->vxl_ifp; ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr); if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0) ifp->if_hdrlen += sizeof(struct ip); else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0) ifp->if_hdrlen += sizeof(struct ip6_hdr); } static int vxlan_valid_init_config(struct vxlan_softc *sc) { const char *reason; if (vxlan_check_vni(sc->vxl_vni) != 0) { reason = "invalid virtual network identifier specified"; goto fail; } if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) { reason = "source address type is not supported"; goto fail; } if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) { reason = "destination address type is not supported"; goto fail; } if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) { reason = "no valid destination address specified"; goto fail; } if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 && sc->vxl_mc_ifname[0] != '\0') { reason = "can only specify interface with a group address"; goto fail; } if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^ VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) { reason = "source and destination address must both " "be either IPv4 or IPv6"; goto fail; } } if (sc->vxl_src_addr.in4.sin_port == 0) { reason = "local port not specified"; goto fail; } if (sc->vxl_dst_addr.in4.sin_port == 0) { reason = "remote port not specified"; goto fail; } return (0); fail: if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason); return (EINVAL); } static void vxlan_init_wait(struct vxlan_softc *sc) { VXLAN_LOCK_WASSERT(sc); while (sc->vxl_flags & VXLAN_FLAG_INIT) rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz); } static void vxlan_init_complete(struct vxlan_softc *sc) { VXLAN_WLOCK(sc); sc->vxl_flags &= ~VXLAN_FLAG_INIT; wakeup(sc); VXLAN_WUNLOCK(sc); } static void vxlan_init(void *xsc) { static const uint8_t empty_mac[ETHER_ADDR_LEN]; struct vxlan_softc *sc; struct ifnet *ifp; sc = xsc; ifp = sc->vxl_ifp; VXLAN_WLOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { VXLAN_WUNLOCK(sc); return; } sc->vxl_flags |= VXLAN_FLAG_INIT; VXLAN_WUNLOCK(sc); if (vxlan_valid_init_config(sc) != 0) goto out; vxlan_setup_interface(sc); if (vxlan_setup_socket(sc) != 0) goto out; /* Initialize the default forwarding entry. */ vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac, &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC); VXLAN_WLOCK(sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz, vxlan_timer, sc); VXLAN_WUNLOCK(sc); if_link_state_change(ifp, LINK_STATE_UP); out: vxlan_init_complete(sc); } static void vxlan_release(struct vxlan_softc *sc) { /* * The softc may be destroyed as soon as we release our reference, * so we cannot serialize the wakeup with the softc lock. We use a * timeout in our sleeps so a missed wakeup is unfortunate but not * fatal. */ if (VXLAN_RELEASE(sc) != 0) wakeup(sc); } static void vxlan_teardown_wait(struct vxlan_softc *sc) { VXLAN_LOCK_WASSERT(sc); while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz); } static void vxlan_teardown_complete(struct vxlan_softc *sc) { VXLAN_WLOCK(sc); sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN; wakeup(sc); VXLAN_WUNLOCK(sc); } static void vxlan_teardown_locked(struct vxlan_softc *sc) { struct ifnet *ifp; struct vxlan_socket *vso; ifp = sc->vxl_ifp; VXLAN_LOCK_WASSERT(sc); MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN); ifp->if_flags &= ~IFF_UP; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; callout_stop(&sc->vxl_callout); vso = sc->vxl_sock; sc->vxl_sock = NULL; VXLAN_WUNLOCK(sc); if_link_state_change(ifp, LINK_STATE_DOWN); if (vso != NULL) { vxlan_socket_remove_softc(vso, sc); if (sc->vxl_vso_mc_index != -1) { vxlan_socket_mc_release_group_by_idx(vso, sc->vxl_vso_mc_index); sc->vxl_vso_mc_index = -1; } } VXLAN_WLOCK(sc); while (sc->vxl_refcnt != 0) rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz); VXLAN_WUNLOCK(sc); callout_drain(&sc->vxl_callout); vxlan_free_multicast(sc); if (vso != NULL) vxlan_socket_release(vso); vxlan_teardown_complete(sc); } static void vxlan_teardown(struct vxlan_softc *sc) { VXLAN_WLOCK(sc); if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) { vxlan_teardown_wait(sc); VXLAN_WUNLOCK(sc); return; } sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; vxlan_teardown_locked(sc); } static void vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp, struct vxlan_softc_head *list) { VXLAN_WLOCK(sc); if (sc->vxl_mc_ifp != ifp) goto out; if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) goto out; sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list); out: VXLAN_WUNLOCK(sc); } static void vxlan_timer(void *xsc) { struct vxlan_softc *sc; sc = xsc; VXLAN_LOCK_WASSERT(sc); vxlan_ftable_expire(sc); callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz); } static int vxlan_ioctl_ifflags(struct vxlan_softc *sc) { struct ifnet *ifp; ifp = sc->vxl_ifp; if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) vxlan_init(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vxlan_teardown(sc); } return (0); } static int vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg) { struct rm_priotracker tracker; struct ifvxlancfg *cfg; cfg = arg; bzero(cfg, sizeof(*cfg)); VXLAN_RLOCK(sc, &tracker); cfg->vxlc_vni = sc->vxl_vni; memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr, sizeof(union vxlan_sockaddr)); memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr, sizeof(union vxlan_sockaddr)); cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex; cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt; cfg->vxlc_ftable_max = sc->vxl_ftable_max; cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout; cfg->vxlc_port_min = sc->vxl_min_port; cfg->vxlc_port_max = sc->vxl_max_port; cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0; cfg->vxlc_ttl = sc->vxl_ttl; VXLAN_RUNLOCK(sc, &tracker); #ifdef INET6 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa)) sa6_recoverscope(&cfg->vxlc_local_sa.in6); if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa)) sa6_recoverscope(&cfg->vxlc_remote_sa.in6); #endif return (0); } static int vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; if (vxlan_check_vni(cmd->vxlcmd_vni) != 0) return (EINVAL); VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { sc->vxl_vni = cmd->vxlcmd_vni; error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; union vxlan_sockaddr *vxlsa; int error; cmd = arg; vxlsa = &cmd->vxlcmd_sa; if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) return (EINVAL); if (vxlan_sockaddr_in_multicast(vxlsa) != 0) return (EINVAL); if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { error = vxlan_sockaddr_in6_embedscope(vxlsa); if (error) return (error); } VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa); error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; union vxlan_sockaddr *vxlsa; int error; cmd = arg; vxlsa = &cmd->vxlcmd_sa; if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) return (EINVAL); if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { error = vxlan_sockaddr_in6_embedscope(vxlsa); if (error) return (error); } VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa); error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; if (cmd->vxlcmd_port == 0) return (EINVAL); VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port); error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; if (cmd->vxlcmd_port == 0) return (EINVAL); VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port); error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; uint16_t min, max; int error; cmd = arg; min = cmd->vxlcmd_port_min; max = cmd->vxlcmd_port_max; if (max < min) return (EINVAL); VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { sc->vxl_min_port = min; sc->vxl_max_port = max; error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; VXLAN_WLOCK(sc); if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) { sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout; error = 0; } else error = EINVAL; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; VXLAN_WLOCK(sc); if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) { sc->vxl_ftable_max = cmd->vxlcmd_ftable_max; error = 0; } else error = EINVAL; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ); error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; VXLAN_WLOCK(sc); if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) { sc->vxl_ttl = cmd->vxlcmd_ttl; if (sc->vxl_im4o != NULL) sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; if (sc->vxl_im6o != NULL) sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; error = 0; } else error = EINVAL; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; cmd = arg; VXLAN_WLOCK(sc); if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN) sc->vxl_flags |= VXLAN_FLAG_LEARN; else sc->vxl_flags &= ~VXLAN_FLAG_LEARN; VXLAN_WUNLOCK(sc); return (0); } static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg) { union vxlan_sockaddr vxlsa; struct ifvxlancmd *cmd; struct vxlan_ftable_entry *fe; int error; cmd = arg; vxlsa = cmd->vxlcmd_sa; if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa)) return (EINVAL); if (vxlan_sockaddr_in_any(&vxlsa) != 0) return (EINVAL); if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) return (EINVAL); /* BMV: We could support both IPv4 and IPv6 later. */ if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family) return (EAFNOSUPPORT); if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { error = vxlan_sockaddr_in6_embedscope(&vxlsa); if (error) return (error); } fe = vxlan_ftable_entry_alloc(); if (fe == NULL) return (ENOMEM); if (vxlsa.in4.sin_port == 0) vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa, VXLAN_FE_FLAG_STATIC); VXLAN_WLOCK(sc); error = vxlan_ftable_entry_insert(sc, fe); VXLAN_WUNLOCK(sc); if (error) vxlan_ftable_entry_free(fe); return (error); } static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; struct vxlan_ftable_entry *fe; int error; cmd = arg; VXLAN_WLOCK(sc); fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac); if (fe != NULL) { vxlan_ftable_entry_destroy(sc, fe); error = 0; } else error = ENOENT; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int all; cmd = arg; all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL; VXLAN_WLOCK(sc); vxlan_ftable_flush(sc, all); VXLAN_WUNLOCK(sc); return (0); } static int vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get) { const struct vxlan_control *vc; union { struct ifvxlancfg cfg; struct ifvxlancmd cmd; } args; int out, error; if (ifd->ifd_cmd >= vxlan_control_table_size) return (EINVAL); bzero(&args, sizeof(args)); vc = &vxlan_control_table[ifd->ifd_cmd]; out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0; if ((get != 0 && out == 0) || (get == 0 && out != 0)) return (EINVAL); if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) { error = priv_check(curthread, PRIV_NET_VXLAN); if (error) return (error); } if (ifd->ifd_len != vc->vxlc_argsize || ifd->ifd_len > sizeof(args)) return (EINVAL); if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) { error = copyin(ifd->ifd_data, &args, ifd->ifd_len); if (error) return (error); } error = vc->vxlc_func(sc, &args); if (error) return (error); if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) { error = copyout(&args, ifd->ifd_data, ifd->ifd_len); if (error) return (error); } return (0); } static int vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct vxlan_softc *sc; struct ifreq *ifr; struct ifdrv *ifd; int error; sc = ifp->if_softc; ifr = (struct ifreq *) data; ifd = (struct ifdrv *) data; switch (cmd) { case SIOCADDMULTI: case SIOCDELMULTI: error = 0; break; case SIOCGDRVSPEC: case SIOCSDRVSPEC: error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC); break; case SIOCSIFFLAGS: error = vxlan_ioctl_ifflags(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } #if defined(INET) || defined(INET6) static uint16_t vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m) { int range; uint32_t hash; range = sc->vxl_max_port - sc->vxl_min_port + 1; if (M_HASHTYPE_ISHASH(m)) hash = m->m_pkthdr.flowid; else hash = jenkins_hash(m->m_data, ETHER_HDR_LEN, sc->vxl_port_hash_key); return (sc->vxl_min_port + (hash % range)); } static void vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff, uint16_t srcport, uint16_t dstport) { struct vxlanudphdr *hdr; struct udphdr *udph; struct vxlan_header *vxh; int len; len = m->m_pkthdr.len - ipoff; MPASS(len >= sizeof(struct vxlanudphdr)); hdr = mtodo(m, ipoff); udph = &hdr->vxlh_udp; udph->uh_sport = srcport; udph->uh_dport = dstport; udph->uh_ulen = htons(len); udph->uh_sum = 0; vxh = &hdr->vxlh_hdr; vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI); vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT); } #endif static int vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, struct mbuf *m) { #ifdef INET struct ifnet *ifp; struct ip *ip; struct in_addr srcaddr, dstaddr; uint16_t srcport, dstport; int len, mcast, error; ifp = sc->vxl_ifp; srcaddr = sc->vxl_src_addr.in4.sin_addr; srcport = vxlan_pick_source_port(sc, m); dstaddr = fvxlsa->in4.sin_addr; dstport = fvxlsa->in4.sin_port; M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr), M_NOWAIT); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENOBUFS); } len = m->m_pkthdr.len; ip = mtod(m, struct ip *); ip->ip_tos = 0; ip->ip_len = htons(len); ip->ip_off = 0; ip->ip_ttl = sc->vxl_ttl; ip->ip_p = IPPROTO_UDP; ip->ip_sum = 0; ip->ip_src = srcaddr; ip->ip_dst = dstaddr; vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport); mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; m->m_flags &= ~(M_MCAST | M_BCAST); error = ip_output(m, NULL, NULL, 0, sc->vxl_im4o, NULL); if (error == 0) { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if (mcast != 0) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); #else m_freem(m); return (ENOTSUP); #endif } static int vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, struct mbuf *m) { #ifdef INET6 struct ifnet *ifp; struct ip6_hdr *ip6; const struct in6_addr *srcaddr, *dstaddr; uint16_t srcport, dstport; int len, mcast, error; ifp = sc->vxl_ifp; srcaddr = &sc->vxl_src_addr.in6.sin6_addr; srcport = vxlan_pick_source_port(sc, m); dstaddr = &fvxlsa->in6.sin6_addr; dstport = fvxlsa->in6.sin6_port; M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr), M_NOWAIT); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENOBUFS); } len = m->m_pkthdr.len; ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; /* BMV: Keep in forwarding entry? */ ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_plen = 0; ip6->ip6_nxt = IPPROTO_UDP; ip6->ip6_hlim = sc->vxl_ttl; ip6->ip6_src = *srcaddr; ip6->ip6_dst = *dstaddr; vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport); /* * XXX BMV We need support for RFC6935 before we can send and * receive IPv6 UDP packets with a zero checksum. */ { struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr)); hdr->uh_sum = in6_cksum_pseudo(ip6, m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0); m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; m->m_flags &= ~(M_MCAST | M_BCAST); error = ip6_output(m, NULL, NULL, 0, sc->vxl_im6o, NULL, NULL); if (error == 0) { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if (mcast != 0) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); #else m_freem(m); return (ENOTSUP); #endif } static int vxlan_transmit(struct ifnet *ifp, struct mbuf *m) { struct rm_priotracker tracker; union vxlan_sockaddr vxlsa; struct vxlan_softc *sc; struct vxlan_ftable_entry *fe; struct ifnet *mcifp; struct ether_header *eh; int ipv4, error; sc = ifp->if_softc; eh = mtod(m, struct ether_header *); fe = NULL; mcifp = NULL; ETHER_BPF_MTAP(ifp, m); VXLAN_RLOCK(sc, &tracker); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VXLAN_RUNLOCK(sc, &tracker); m_freem(m); return (ENETDOWN); } if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost); if (fe == NULL) fe = &sc->vxl_default_fe; vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa); ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0; if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) mcifp = vxlan_multicast_if_ref(sc, ipv4); VXLAN_ACQUIRE(sc); VXLAN_RUNLOCK(sc, &tracker); if (ipv4 != 0) error = vxlan_encap4(sc, &vxlsa, m); else error = vxlan_encap6(sc, &vxlsa, m); vxlan_release(sc); if (mcifp != NULL) if_rele(mcifp); return (error); } static void vxlan_qflush(struct ifnet *ifp __unused) { } static void vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb, const struct sockaddr *srcsa, void *xvso) { struct vxlan_socket *vso; struct vxlan_header *vxh, vxlanhdr; uint32_t vni; int error __unused; M_ASSERTPKTHDR(m); vso = xvso; offset += sizeof(struct udphdr); if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header)) goto out; if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) { m_copydata(m, offset, sizeof(struct vxlan_header), (caddr_t) &vxlanhdr); vxh = &vxlanhdr; } else vxh = mtodo(m, offset); /* * Drop if there is a reserved bit set in either the flags or VNI * fields of the header. This goes against the specification, but * a bit set may indicate an unsupported new feature. This matches * the behavior of the Linux implementation. */ if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) || vxh->vxlh_vni & ~htonl(VXLAN_VNI_MASK)) goto out; vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT; /* Adjust to the start of the inner Ethernet frame. */ m_adj(m, offset + sizeof(struct vxlan_header)); error = vxlan_input(vso, vni, &m, srcsa); MPASS(error != 0 || m == NULL); out: if (m != NULL) m_freem(m); } static int vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0, const struct sockaddr *sa) { struct vxlan_softc *sc; struct ifnet *ifp; struct mbuf *m; struct ether_header *eh; int error; sc = vxlan_socket_lookup_softc(vso, vni); if (sc == NULL) return (ENOENT); ifp = sc->vxl_ifp; m = *m0; eh = mtod(m, struct ether_header *); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { error = ENETDOWN; goto out; } else if (ifp == m->m_pkthdr.rcvif) { /* XXX Does not catch more complex loops. */ error = EDEADLK; goto out; } if (sc->vxl_flags & VXLAN_FLAG_LEARN) vxlan_ftable_learn(sc, sa, eh->ether_shost); m_clrprotoflags(m); m->m_pkthdr.rcvif = ifp; M_SETFIB(m, ifp->if_fib); error = netisr_queue_src(NETISR_ETHER, 0, m); *m0 = NULL; out: vxlan_release(sc); return (error); } static void vxlan_set_default_config(struct vxlan_softc *sc) { sc->vxl_flags |= VXLAN_FLAG_LEARN; sc->vxl_vni = VXLAN_VNI_MAX; sc->vxl_ttl = IPDEFTTL; if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) { sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT); sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT); } else { sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); } sc->vxl_min_port = V_ipport_firstauto; sc->vxl_max_port = V_ipport_lastauto; sc->vxl_ftable_max = VXLAN_FTABLE_MAX; sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT; } static int vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp) { #ifndef INET if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 | VXLAN_PARAM_WITH_REMOTE_ADDR4)) return (EAFNOSUPPORT); #endif #ifndef INET6 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 | VXLAN_PARAM_WITH_REMOTE_ADDR6)) return (EAFNOSUPPORT); #else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa); if (error) return (error); } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { int error = vxlan_sockaddr_in6_embedscope( &vxlp->vxlp_remote_sa); if (error) return (error); } #endif if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) { if (vxlan_check_vni(vxlp->vxlp_vni) == 0) sc->vxl_vni = vxlp->vxlp_vni; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) { sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in); sc->vxl_src_addr.in4.sin_family = AF_INET; sc->vxl_src_addr.in4.sin_addr = vxlp->vxlp_local_sa.in4.sin_addr; } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6); sc->vxl_src_addr.in6.sin6_family = AF_INET6; sc->vxl_src_addr.in6.sin6_addr = vxlp->vxlp_local_sa.in6.sin6_addr; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) { sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in); sc->vxl_dst_addr.in4.sin_family = AF_INET; sc->vxl_dst_addr.in4.sin_addr = vxlp->vxlp_remote_sa.in4.sin_addr; } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6); sc->vxl_dst_addr.in6.sin6_family = AF_INET6; sc->vxl_dst_addr.in6.sin6_addr = vxlp->vxlp_remote_sa.in6.sin6_addr; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT) sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port); if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT) sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port); if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) { if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) { sc->vxl_min_port = vxlp->vxlp_min_port; sc->vxl_max_port = vxlp->vxlp_max_port; } } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF) strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ); if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) { if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0) sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) { if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0) sc->vxl_ftable_max = vxlp->vxlp_ftable_max; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) { if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0) sc->vxl_ttl = vxlp->vxlp_ttl; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) { if (vxlp->vxlp_learn == 0) sc->vxl_flags &= ~VXLAN_FLAG_LEARN; } return (0); } static int vxlan_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct vxlan_softc *sc; struct ifnet *ifp; struct ifvxlanparam vxlp; int error; sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO); sc->vxl_unit = unit; vxlan_set_default_config(sc); if (params != 0) { error = copyin(params, &vxlp, sizeof(vxlp)); if (error) goto fail; error = vxlan_set_user_config(sc, &vxlp); if (error) goto fail; } ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { error = ENOSPC; goto fail; } sc->vxl_ifp = ifp; rm_init(&sc->vxl_lock, "vxlanrm"); callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0); sc->vxl_port_hash_key = arc4random(); vxlan_ftable_init(sc); vxlan_sysctl_setup(sc); ifp->if_softc = sc; if_initname(ifp, vxlan_name, unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = vxlan_init; ifp->if_ioctl = vxlan_ioctl; ifp->if_transmit = vxlan_transmit; ifp->if_qflush = vxlan_qflush; ifp->if_capabilities |= IFCAP_LINKSTATE; ifp->if_capenable |= IFCAP_LINKSTATE; ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status); ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO); - vxlan_fakeaddr(sc); - ether_ifattach(ifp, sc->vxl_hwaddr); + ether_fakeaddr(&sc->vxl_hwaddr); + ether_ifattach(ifp, sc->vxl_hwaddr.octet); ifp->if_baudrate = 0; ifp->if_hdrlen = 0; return (0); fail: free(sc, M_VXLAN); return (error); } static void vxlan_clone_destroy(struct ifnet *ifp) { struct vxlan_softc *sc; sc = ifp->if_softc; vxlan_teardown(sc); vxlan_ftable_flush(sc, 1); ether_ifdetach(ifp); if_free(ifp); ifmedia_removeall(&sc->vxl_media); vxlan_ftable_fini(sc); vxlan_sysctl_destroy(sc); rm_destroy(&sc->vxl_lock); free(sc, M_VXLAN); } /* BMV: Taken from if_bridge. */ static uint32_t vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr) { uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key; b += addr[5] << 8; b += addr[4]; a += addr[3] << 24; a += addr[2] << 16; a += addr[1] << 8; a += addr[0]; /* * The following hash function is adapted from "Hash Functions" by Bob Jenkins * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). */ #define mix(a, b, c) \ do { \ a -= b; a -= c; a ^= (c >> 13); \ b -= c; b -= a; b ^= (a << 8); \ c -= a; c -= b; c ^= (b >> 13); \ a -= b; a -= c; a ^= (c >> 12); \ b -= c; b -= a; b ^= (a << 16); \ c -= a; c -= b; c ^= (b >> 5); \ a -= b; a -= c; a ^= (c >> 3); \ b -= c; b -= a; b ^= (a << 10); \ c -= a; c -= b; c ^= (b >> 15); \ } while (0) mix(a, b, c); #undef mix return (c); -} - -static void -vxlan_fakeaddr(struct vxlan_softc *sc) -{ - - /* - * Generate a non-multicast, locally administered address. - * - * BMV: Should we use the FreeBSD OUI range instead? - */ - arc4rand(sc->vxl_hwaddr, ETHER_ADDR_LEN, 1); - sc->vxl_hwaddr[0] &= ~1; - sc->vxl_hwaddr[0] |= 2; } static int vxlan_media_change(struct ifnet *ifp) { /* Ignore. */ return (0); } static void vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID; ifmr->ifm_active = IFM_ETHER | IFM_FDX; } static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr, const struct sockaddr *sa) { return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len)); } static void vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr, const struct sockaddr *sa) { MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); bzero(vxladdr, sizeof(*vxladdr)); if (sa->sa_family == AF_INET) { vxladdr->in4 = *satoconstsin(sa); vxladdr->in4.sin_len = sizeof(struct sockaddr_in); } else if (sa->sa_family == AF_INET6) { vxladdr->in6 = *satoconstsin6(sa); vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); } } static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr, const struct sockaddr *sa) { int equal; if (sa->sa_family == AF_INET) { const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr; } else if (sa->sa_family == AF_INET6) { const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr); } else equal = 0; return (equal); } static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr, const struct sockaddr *sa) { MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); if (sa->sa_family == AF_INET) { const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; vxladdr->in4.sin_family = AF_INET; vxladdr->in4.sin_len = sizeof(struct sockaddr_in); vxladdr->in4.sin_addr = *in4; } else if (sa->sa_family == AF_INET6) { const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; vxladdr->in6.sin6_family = AF_INET6; vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); vxladdr->in6.sin6_addr = *in6; } } static int vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec) { const struct sockaddr *sa; int supported; sa = &vxladdr->sa; supported = 0; if (sa->sa_family == AF_UNSPEC && unspec != 0) { supported = 1; } else if (sa->sa_family == AF_INET) { #ifdef INET supported = 1; #endif } else if (sa->sa_family == AF_INET6) { #ifdef INET6 supported = 1; #endif } return (supported); } static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr) { const struct sockaddr *sa; int any; sa = &vxladdr->sa; if (sa->sa_family == AF_INET) { const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; any = in4->s_addr == INADDR_ANY; } else if (sa->sa_family == AF_INET6) { const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; any = IN6_IS_ADDR_UNSPECIFIED(in6); } else any = -1; return (any); } static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr) { const struct sockaddr *sa; int mc; sa = &vxladdr->sa; if (sa->sa_family == AF_INET) { const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; mc = IN_MULTICAST(ntohl(in4->s_addr)); } else if (sa->sa_family == AF_INET6) { const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; mc = IN6_IS_ADDR_MULTICAST(in6); } else mc = -1; return (mc); } static int vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr) { int error; MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr)); #ifdef INET6 error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone); #else error = EAFNOSUPPORT; #endif return (error); } static int vxlan_can_change_config(struct vxlan_softc *sc) { struct ifnet *ifp; ifp = sc->vxl_ifp; VXLAN_LOCK_ASSERT(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return (0); if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN)) return (0); return (1); } static int vxlan_check_vni(uint32_t vni) { return (vni >= VXLAN_VNI_MAX); } static int vxlan_check_ttl(int ttl) { return (ttl > MAXTTL); } static int vxlan_check_ftable_timeout(uint32_t timeout) { return (timeout > VXLAN_FTABLE_MAX_TIMEOUT); } static int vxlan_check_ftable_max(uint32_t max) { return (max > VXLAN_FTABLE_MAX); } static void vxlan_sysctl_setup(struct vxlan_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct vxlan_statistics *stats; char namebuf[8]; ctx = &sc->vxl_sysctl_ctx; stats = &sc->vxl_stats; snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit); sysctl_ctx_init(ctx); sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf, CTLFLAG_RD, NULL, ""); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), OID_AUTO, "ftable", CTLFLAG_RD, NULL, ""); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count", CTLFLAG_RD, &sc->vxl_ftable_cnt, 0, "Number of entries in fowarding table"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max", CTLFLAG_RD, &sc->vxl_ftable_max, 0, "Maximum number of entries allowed in fowarding table"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout", CTLFLAG_RD, &sc->vxl_ftable_timeout, 0, "Number of seconds between prunes of the forwarding table"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP, sc, 0, vxlan_ftable_sysctl_dump, "A", "Dump the forwarding table entries"); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), OID_AUTO, "stats", CTLFLAG_RD, NULL, ""); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0, "Fowarding table reached maximum entries"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "ftable_lock_upgrade_failed", CTLFLAG_RD, &stats->ftable_lock_upgrade_failed, 0, "Forwarding table update required lock upgrade"); } static void vxlan_sysctl_destroy(struct vxlan_softc *sc) { sysctl_ctx_free(&sc->vxl_sysctl_ctx); sc->vxl_sysctl_node = NULL; } static int vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def) { char path[64]; snprintf(path, sizeof(path), "net.link.vxlan.%d.%s", sc->vxl_unit, knob); TUNABLE_INT_FETCH(path, &def); return (def); } static void vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp) { struct vxlan_softc_head list; struct vxlan_socket *vso; struct vxlan_softc *sc, *tsc; LIST_INIT(&list); if (ifp->if_flags & IFF_RENAMING) return; if ((ifp->if_flags & IFF_MULTICAST) == 0) return; VXLAN_LIST_LOCK(); LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) vxlan_socket_ifdetach(vso, ifp, &list); VXLAN_LIST_UNLOCK(); LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) { LIST_REMOVE(sc, vxl_ifdetach_list); VXLAN_WLOCK(sc); if (sc->vxl_flags & VXLAN_FLAG_INIT) vxlan_init_wait(sc); vxlan_teardown_locked(sc); } } static void vxlan_load(void) { mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF); LIST_INIT(&vxlan_socket_list); vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY); vxlan_cloner = if_clone_simple(vxlan_name, vxlan_clone_create, vxlan_clone_destroy, 0); } static void vxlan_unload(void) { EVENTHANDLER_DEREGISTER(ifnet_departure_event, vxlan_ifdetach_event_tag); if_clone_detach(vxlan_cloner); mtx_destroy(&vxlan_list_mtx); MPASS(LIST_EMPTY(&vxlan_socket_list)); } static int vxlan_modevent(module_t mod, int type, void *unused) { int error; error = 0; switch (type) { case MOD_LOAD: vxlan_load(); break; case MOD_UNLOAD: vxlan_unload(); break; default: error = ENOTSUP; break; } return (error); } static moduledata_t vxlan_mod = { "if_vxlan", vxlan_modevent, 0 }; DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_vxlan, 1);