Index: head/net/openbgpd/Makefile =================================================================== --- head/net/openbgpd/Makefile (revision 354183) +++ head/net/openbgpd/Makefile (revision 354184) @@ -1,41 +1,46 @@ # $FreeBSD$ PORTNAME= openbgpd PORTVERSION= 5.2.20121209 +PORTREVISION= 1 CATEGORIES= net MASTER_SITES= ${MASTER_SITE_OPENBSD} MASTER_SITE_SUBDIR= OpenBGPD DISTNAME= ${PORTNAME}-4.6 EXTRACT_SUFX= .tgz DIST_SUBDIR= ${PORTNAME} MAINTAINER= hrs@FreeBSD.org COMMENT= Free implementation of the Border Gateway Protocol, Version 4 +LICENSE= ISCL +LICENSE_FILE= ${FILESDIR}/COPYING + CONFLICTS= zebra-[0-9]* quagga-[0-9]* WRKSRC= ${WRKDIR} +MAKE_ARGS= -DFREEBSDPORTS USE_RC_SUBR= ${PORTNAME} PLIST_FILES= sbin/bgpctl sbin/bgpd man/man5/bgpd.conf.5.gz \ man/man8/bgpctl.8.gz man/man8/bgpd.8.gz SUB_FILES= pkg-message USERS= _bgpd GROUPS= _bgpd OPTIONS_DEFINE= IPV6LLPEER OPTIONS_DEFAULT=IPV6LLPEER IPV6LLPEER_DESC=Support nexthop using IPv6 link-local address .include .if ${PORT_OPTIONS:MIPV6LLPEER} -MAKE_ARGS= -DIPV6_LINKLOCAL_PEER +MAKE_ARGS+= -DIPV6_LINKLOCAL_PEER .endif post-patch: @${REINPLACE_CMD} -e "s|%%PREFIX%%|${PREFIX}|g" \ ${WRKSRC}/bgpd/bgpd.8 \ ${WRKSRC}/bgpd/bgpd.conf.5 \ ${WRKSRC}/bgpctl/bgpctl.8 .include Index: head/net/openbgpd/files/COPYING =================================================================== --- head/net/openbgpd/files/COPYING (nonexistent) +++ head/net/openbgpd/files/COPYING (revision 354184) @@ -0,0 +1,21 @@ +$FreeBSD$ + +Copyright (c) 2003-2007 Henning Brauer +Copyright (c) 2002 Daniel Hartmeier +Copyright (c) 2003, 2004, 2006 Claudio Jeker +Copyright (c) 2003, 2004 Markus Friedl +Copyright (c) 2004 Damien Miller +Copyright (c) 2006, 2007, 2008 Reyk Floeter +Copyright (c) 2006, 2007 Pierre-Yves Ritschard + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN +AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. Property changes on: head/net/openbgpd/files/COPYING ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/net/openbgpd/files/patch-Makefile.inc =================================================================== --- head/net/openbgpd/files/patch-Makefile.inc (revision 354183) +++ head/net/openbgpd/files/patch-Makefile.inc (revision 354184) @@ -1,12 +1,15 @@ Index: Makefile.inc =================================================================== RCS file: Makefile.inc diff -N Makefile.inc --- /dev/null 1 Jan 1970 00:00:00 -0000 -+++ Makefile.inc 30 Jun 2009 07:19:13 -0000 1.2 -@@ -0,0 +1,5 @@ -+# $hrs: openbgpd/Makefile.inc,v 1.2 2009/06/30 07:19:13 hrs Exp $ ++++ Makefile.inc 16 May 2014 01:06:14 -0000 1.5 +@@ -0,0 +1,8 @@ ++# $hrs: openbgpd/Makefile.inc,v 1.5 2014/05/16 01:06:14 hrs Exp $ + +PREFIX?= /usr/local +BINDIR?= ${PREFIX}/sbin +MANDIR?= ${PREFIX}/man/man ++.if defined(FREEBSDPORTS) ++WITH_INSTALL_AS_USER= yes ++.endif Index: head/net/openbgpd/files/patch-bgpd_Makefile =================================================================== --- head/net/openbgpd/files/patch-bgpd_Makefile (revision 354183) +++ head/net/openbgpd/files/patch-bgpd_Makefile (revision 354184) @@ -1,37 +1,36 @@ Index: bgpd/Makefile =================================================================== RCS file: /home/cvs/private/hrs/openbgpd/bgpd/Makefile,v retrieving revision 1.1.1.2 -retrieving revision 1.9 -diff -u -p -r1.1.1.2 -r1.9 +diff -u -p -r1.1.1.2 Makefile --- bgpd/Makefile 9 Jul 2009 16:49:54 -0000 1.1.1.2 -+++ bgpd/Makefile 13 Oct 2012 18:36:00 -0000 1.9 ++++ bgpd/Makefile 16 May 2014 00:42:48 -0000 @@ -1,15 +1,25 @@ # $OpenBSD: Makefile,v 1.28 2009/06/25 14:14:54 deraadt Exp $ +.PATH: ${.CURDIR}/.. ${.CURDIR}/../openbsd-compat + +CONFFILE?= ${PREFIX}/etc/bgpd.conf + PROG= bgpd -SRCS= bgpd.c buffer.c session.c log.c parse.y config.c imsg.c \ +SRCS= bgpd.c session.c log.c parse.y config.c \ rde.c rde_rib.c rde_decide.c rde_prefix.c mrt.c kroute.c \ - control.c pfkey.c rde_update.c rde_attr.c printconf.c \ - rde_filter.c pftable.c name2id.c util.c carp.c timer.c + control.c pfkey_compat.c rde_update.c rde_attr.c printconf.c \ + rde_filter.c pftable.c name2id.c util.c carp.c timer.c \ + imsg.c imsg-buffer.c CFLAGS+= -Wall -I${.CURDIR} +CFLAGS+= -I${.CURDIR}/../openbsd-compat CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes CFLAGS+= -Wmissing-declarations CFLAGS+= -Wshadow -Wpointer-arith -Wcast-qual CFLAGS+= -Wsign-compare +CFLAGS+= -DCONFFILE=\"${CONFFILE}\" +.if defined(IPV6_LINKLOCAL_PEER) +CFLAGS+= -DIPV6_LINKLOCAL_PEER +.endif YFLAGS= MAN= bgpd.8 bgpd.conf.5 Index: head/net/openbgpd/files/patch-bgpd_bgpd.h =================================================================== --- head/net/openbgpd/files/patch-bgpd_bgpd.h (revision 354183) +++ head/net/openbgpd/files/patch-bgpd_bgpd.h (revision 354184) @@ -1,872 +1,872 @@ Index: bgpd/bgpd.h =================================================================== RCS file: /home/cvs/private/hrs/openbgpd/bgpd/bgpd.h,v retrieving revision 1.1.1.8 -retrieving revision 1.14 -diff -u -p -r1.1.1.8 -r1.14 +retrieving revision 1.15 +diff -u -p -r1.1.1.8 -r1.15 --- bgpd/bgpd.h 14 Feb 2010 20:19:57 -0000 1.1.1.8 -+++ bgpd/bgpd.h 8 Dec 2012 20:17:59 -0000 1.14 ++++ bgpd/bgpd.h 16 May 2014 00:36:26 -0000 1.15 @@ -1,4 +1,4 @@ -/* $OpenBSD: bgpd.h,v 1.241 2009/06/12 16:42:53 claudio Exp $ */ +/* $OpenBSD: bgpd.h,v 1.273 2012/09/18 10:10:00 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -30,11 +31,16 @@ #include #include -#include +#if defined(__FreeBSD__) /* compat */ +#include "openbsd-compat.h" +#endif /* defined(__FreeBSD__) */ +#include "imsg.h" #define BGP_VERSION 4 #define BGP_PORT 179 +#ifndef CONFFILE #define CONFFILE "/etc/bgpd.conf" +#endif /* !CONFFILE */ #define BGPD_USER "_bgpd" #define PEER_DESCR_LEN 32 #define PFTABLE_LEN 16 @@ -42,8 +48,6 @@ #define IPSEC_ENC_KEY_LEN 32 #define IPSEC_AUTH_KEY_LEN 20 -#define ASNUM_MAX 0xffffffff - #define MAX_PKTSIZE 4096 #define MIN_HOLDTIME 3 #define READ_BUF_SIZE 65535 @@ -55,13 +59,8 @@ #define BGPD_OPT_NOACTION 0x0004 #define BGPD_OPT_FORCE_DEMOTE 0x0008 -#define BGPD_FLAG_NO_FIB_UPDATE 0x0001 #define BGPD_FLAG_NO_EVALUATE 0x0002 #define BGPD_FLAG_REFLECTOR 0x0004 -#define BGPD_FLAG_REDIST_STATIC 0x0008 -#define BGPD_FLAG_REDIST_CONNECTED 0x0010 -#define BGPD_FLAG_REDIST6_STATIC 0x0020 -#define BGPD_FLAG_REDIST6_CONNECTED 0x0040 #define BGPD_FLAG_NEXTHOP_BGP 0x0080 #define BGPD_FLAG_NEXTHOP_DEFAULT 0x1000 #define BGPD_FLAG_DECISION_MASK 0x0f00 @@ -83,9 +82,12 @@ #define F_REJECT 0x0080 #define F_BLACKHOLE 0x0100 #define F_LONGER 0x0200 +#define F_MPLS 0x0400 +#define F_REDISTRIBUTED 0x0800 #define F_CTL_DETAIL 0x1000 /* only used by bgpctl */ #define F_CTL_ADJ_IN 0x2000 #define F_CTL_ADJ_OUT 0x4000 +#define F_CTL_ACTIVE 0x8000 /* * Limit the number of control messages generated by the RDE and queued in @@ -109,18 +111,75 @@ enum reconf_action { RECONF_DELETE }; +/* Address Family Numbers as per RFC 1700 */ +#define AFI_UNSPEC 0 +#define AFI_IPv4 1 +#define AFI_IPv6 2 + +/* Subsequent Address Family Identifier as per RFC 4760 */ +#define SAFI_NONE 0 +#define SAFI_UNICAST 1 +#define SAFI_MULTICAST 2 +#define SAFI_MPLS 4 +#define SAFI_MPLSVPN 128 + +struct aid { + u_int16_t afi; + sa_family_t af; + u_int8_t safi; + char *name; +}; + +extern const struct aid aid_vals[]; + +#define AID_UNSPEC 0 +#define AID_INET 1 +#define AID_INET6 2 +#define AID_VPN_IPv4 3 +#define AID_MAX 4 +#define AID_MIN 1 /* skip AID_UNSPEC since that is a dummy */ + +#define AID_VALS { \ + /* afi, af, safii, name */ \ + { AFI_UNSPEC, AF_UNSPEC, SAFI_NONE, "unspec"}, \ + { AFI_IPv4, AF_INET, SAFI_UNICAST, "IPv4 unicast" }, \ + { AFI_IPv6, AF_INET6, SAFI_UNICAST, "IPv6 unicast" }, \ + { AFI_IPv4, AF_INET, SAFI_MPLSVPN, "IPv4 vpn" } \ +} + +#define AID_PTSIZE { \ + 0, \ + sizeof(struct pt_entry4), \ + sizeof(struct pt_entry6), \ + sizeof(struct pt_entry_vpn4) \ +} + +struct vpn4_addr { + u_int64_t rd; + struct in_addr addr; + u_int8_t labelstack[21]; /* max that makes sense */ + u_int8_t labellen; + u_int8_t pad1; + u_int8_t pad2; +}; + +#define BGP_MPLS_BOS 0x01 + struct bgpd_addr { - sa_family_t af; union { struct in_addr v4; struct in6_addr v6; - u_int8_t addr8[16]; - u_int16_t addr16[8]; - u_int32_t addr32[4]; + struct vpn4_addr vpn4; + /* maximum size for a prefix is 256 bits */ + u_int8_t addr8[32]; + u_int16_t addr16[16]; + u_int32_t addr32[8]; } ba; /* 128-bit address */ u_int32_t scope_id; /* iface scope id for v6 */ + u_int8_t aid; #define v4 ba.v4 #define v6 ba.v6 +#define vpn4 ba.vpn4 #define addr8 ba.addr8 #define addr16 ba.addr16 #define addr32 ba.addr32 @@ -141,17 +200,12 @@ TAILQ_HEAD(listen_addrs, listen_addr); TAILQ_HEAD(filter_set_head, filter_set); struct bgpd_config { - struct filter_set_head connectset; - struct filter_set_head connectset6; - struct filter_set_head staticset; - struct filter_set_head staticset6; struct listen_addrs *listen_addrs; char *csock; char *rcsock; int opts; int flags; int log; - u_int rtableid; u_int32_t bgpid; u_int32_t clusterid; u_int32_t as; @@ -205,12 +259,24 @@ struct peer_auth { }; struct capabilities { - u_int8_t mp_v4; /* multiprotocol extensions, RFC 4760 */ - u_int8_t mp_v6; - u_int8_t refresh; /* route refresh, RFC 2918 */ - u_int8_t restart; /* graceful restart, RFC 4724 */ - u_int8_t as4byte; /* draft-ietf-idr-as4bytes-13 */ -}; + struct { + int16_t timeout; /* graceful restart timeout */ + int8_t flags[AID_MAX]; /* graceful restart per AID flags */ + int8_t restart; /* graceful restart, RFC 4724 */ + } grestart; + int8_t mp[AID_MAX]; /* multiprotocol extensions, RFC 4760 */ + int8_t refresh; /* route refresh, RFC 2918 */ + int8_t as4byte; /* 4-byte ASnum, RFC 4893 */ +}; + +#define CAPA_GR_PRESENT 0x01 +#define CAPA_GR_RESTART 0x02 +#define CAPA_GR_FORWARD 0x04 +#define CAPA_GR_RESTARTING 0x08 + +#define CAPA_GR_TIMEMASK 0x0fff +#define CAPA_GR_R_FLAG 0x8000 +#define CAPA_GR_F_FLAG 0x80 struct peer_config { struct bgpd_addr remote_addr; @@ -237,7 +303,7 @@ struct peer_config { u_int8_t template; u_int8_t remote_masklen; u_int8_t cloned; - u_int8_t ebgp; /* 1 = ebgp, 0 = ibgp */ + u_int8_t ebgp; /* 0 = ibgp else ebgp */ u_int8_t distance; /* 1 = direct, >1 = multihop */ u_int8_t passive; u_int8_t down; @@ -248,21 +314,33 @@ struct peer_config { u_int8_t ttlsec; /* TTL security hack */ u_int8_t flags; u_int8_t pad[3]; + char lliface[IFNAMSIZ]; }; #define PEERFLAG_TRANS_AS 0x01 +enum network_type { + NETWORK_DEFAULT, + NETWORK_STATIC, + NETWORK_CONNECTED, + NETWORK_MRTCLONE +}; + struct network_config { - struct bgpd_addr prefix; - struct filter_set_head attrset; - u_int8_t prefixlen; + struct bgpd_addr prefix; + struct filter_set_head attrset; + struct rde_aspath *asp; + u_int rtableid; + enum network_type type; + u_int8_t prefixlen; + u_int8_t old; /* used for reloading */ }; TAILQ_HEAD(network_head, network); struct network { - struct network_config net; - TAILQ_ENTRY(network) entry; + struct network_config net; + TAILQ_ENTRY(network) entry; }; enum imsg_type { @@ -276,7 +354,6 @@ enum imsg_type { IMSG_CTL_NEIGHBOR_CLEAR, IMSG_CTL_NEIGHBOR_RREFRESH, IMSG_CTL_KROUTE, - IMSG_CTL_KROUTE6, IMSG_CTL_KROUTE_ADDR, IMSG_CTL_RESULT, IMSG_CTL_SHOW_NEIGHBOR, @@ -288,11 +365,14 @@ enum imsg_type { IMSG_CTL_SHOW_RIB_ATTR, IMSG_CTL_SHOW_RIB_COMMUNITY, IMSG_CTL_SHOW_NETWORK, - IMSG_CTL_SHOW_NETWORK6, IMSG_CTL_SHOW_RIB_MEM, IMSG_CTL_SHOW_TERSE, IMSG_CTL_SHOW_TIMER, + IMSG_CTL_LOG_VERBOSE, + IMSG_CTL_SHOW_FIB_TABLES, IMSG_NETWORK_ADD, + IMSG_NETWORK_ASPATH, + IMSG_NETWORK_ATTR, IMSG_NETWORK_REMOVE, IMSG_NETWORK_FLUSH, IMSG_NETWORK_DONE, @@ -302,19 +382,25 @@ enum imsg_type { IMSG_RECONF_PEER, IMSG_RECONF_FILTER, IMSG_RECONF_LISTENER, + IMSG_RECONF_CTRL, + IMSG_RECONF_RDOMAIN, + IMSG_RECONF_RDOMAIN_EXPORT, + IMSG_RECONF_RDOMAIN_IMPORT, + IMSG_RECONF_RDOMAIN_DONE, IMSG_RECONF_DONE, IMSG_UPDATE, IMSG_UPDATE_ERR, IMSG_SESSION_ADD, IMSG_SESSION_UP, IMSG_SESSION_DOWN, + IMSG_SESSION_STALE, + IMSG_SESSION_FLUSH, + IMSG_SESSION_RESTARTED, IMSG_MRT_OPEN, IMSG_MRT_REOPEN, IMSG_MRT_CLOSE, IMSG_KROUTE_CHANGE, IMSG_KROUTE_DELETE, - IMSG_KROUTE6_CHANGE, - IMSG_KROUTE6_DELETE, IMSG_NEXTHOP_ADD, IMSG_NEXTHOP_REMOVE, IMSG_NEXTHOP_UPDATE, @@ -337,6 +423,7 @@ enum ctl_results { CTL_RES_DENIED, CTL_RES_NOCAP, CTL_RES_PARSE_ERROR, + CTL_RES_PENDING, CTL_RES_NOMEM }; @@ -379,9 +466,43 @@ enum suberr_cease { ERR_CEASE_RSRC_EXHAUST }; +struct kroute_node; +struct kroute6_node; +struct knexthop_node; +RB_HEAD(kroute_tree, kroute_node); +RB_HEAD(kroute6_tree, kroute6_node); +RB_HEAD(knexthop_tree, knexthop_node); + +struct ktable { + char descr[PEER_DESCR_LEN]; + char ifmpe[IFNAMSIZ]; + struct kroute_tree krt; + struct kroute6_tree krt6; + struct knexthop_tree knt; + struct network_head krn; + u_int rtableid; + u_int nhtableid; /* rdomain id for nexthop lookup */ + u_int ifindex; /* ifindex of ifmpe */ + int nhrefcnt; /* refcnt for nexthop table */ + enum reconf_action state; + u_int8_t fib_conf; /* configured FIB sync flag */ + u_int8_t fib_sync; /* is FIB synced with kernel? */ +}; + +struct kroute_full { + struct bgpd_addr prefix; + struct bgpd_addr nexthop; + char label[RTLABEL_LEN]; + u_int16_t flags; + u_short ifindex; + u_int8_t prefixlen; + u_int8_t priority; +}; + struct kroute { struct in_addr prefix; struct in_addr nexthop; + u_int32_t mplslabel; u_int16_t flags; u_int16_t labelid; u_short ifindex; @@ -400,14 +521,12 @@ struct kroute6 { }; struct kroute_nexthop { - union { - struct kroute kr4; - struct kroute6 kr6; - } kr; struct bgpd_addr nexthop; struct bgpd_addr gateway; + struct bgpd_addr net; u_int8_t valid; u_int8_t connected; + u_int8_t netlen; }; struct kif { @@ -423,8 +542,7 @@ struct kif { struct session_up { struct bgpd_addr local_addr; struct bgpd_addr remote_addr; - struct capabilities capa_announced; - struct capabilities capa_received; + struct capabilities capa; u_int32_t remote_bgpid; u_int16_t short_as; }; @@ -437,8 +555,13 @@ struct pftable_msg { struct ctl_show_nexthop { struct bgpd_addr addr; - u_int8_t valid; struct kif kif; + union { + struct kroute kr4; + struct kroute6 kr6; + } kr; + u_int8_t valid; + u_int8_t krvalid; }; struct ctl_neighbor { @@ -447,20 +570,11 @@ struct ctl_neighbor { int show_timers; }; -struct kroute_label { - struct kroute kr; - char label[RTLABEL_LEN]; -}; - -struct kroute6_label { - struct kroute6 kr; - char label[RTLABEL_LEN]; -}; - -#define F_RIB_ELIGIBLE 0x01 -#define F_RIB_ACTIVE 0x02 -#define F_RIB_INTERNAL 0x04 -#define F_RIB_ANNOUNCE 0x08 +#define F_PREF_ELIGIBLE 0x01 +#define F_PREF_ACTIVE 0x02 +#define F_PREF_INTERNAL 0x04 +#define F_PREF_ANNOUNCE 0x08 +#define F_PREF_STALE 0x10 struct ctl_show_rib { struct bgpd_addr true_nexthop; @@ -472,9 +586,7 @@ struct ctl_show_rib { u_int32_t remote_id; u_int32_t local_pref; u_int32_t med; - u_int32_t prefix_cnt; - u_int32_t active_cnt; - u_int32_t rib_cnt; + u_int32_t weight; u_int16_t aspath_len; u_int16_t flags; u_int8_t prefixlen; @@ -482,13 +594,6 @@ struct ctl_show_rib { /* plus a aspath_len bytes long aspath */ }; -struct ctl_show_rib_prefix { - struct bgpd_addr prefix; - time_t lastchange; - u_int16_t flags; - u_int8_t prefixlen; -}; - enum as_spec { AS_NONE, AS_ALL, @@ -498,16 +603,52 @@ enum as_spec { AS_EMPTY }; +enum aslen_spec { + ASLEN_NONE, + ASLEN_MAX, + ASLEN_SEQ +}; + struct filter_as { - enum as_spec type; u_int32_t as; + u_int16_t flags; + enum as_spec type; }; +struct filter_aslen { + u_int aslen; + enum aslen_spec type; +}; + +#define AS_FLAG_NEIGHBORAS 0x01 + struct filter_community { - int as; - int type; + int as; + int type; }; +struct filter_extcommunity { + u_int16_t flags; + u_int8_t type; + u_int8_t subtype; /* if extended type */ + union { + struct ext_as { + u_int16_t as; + u_int32_t val; + } ext_as; + struct ext_as4 { + u_int32_t as4; + u_int16_t val; + } ext_as4; + struct ext_ip { + struct in_addr addr; + u_int16_t val; + } ext_ip; + u_int64_t ext_opaq; /* only 48 bits */ + } data; +}; + + struct ctl_show_rib_request { char rib[PEER_DESCR_LEN]; struct ctl_neighbor neighbor; @@ -518,8 +659,8 @@ struct ctl_show_rib_request { pid_t pid; u_int16_t flags; enum imsg_type type; - sa_family_t af; u_int8_t prefixlen; + u_int8_t aid; }; enum filter_actions { @@ -585,6 +726,28 @@ struct filter_peers { #define EXT_COMMUNITY_OSPF_RTR_TYPE 6 /* RFC 4577 */ #define EXT_COMMUNITY_OSPF_RTR_ID 7 /* RFC 4577 */ #define EXT_COMMUNITY_BGP_COLLECT 8 /* RFC 4384 */ +/* other handy defines */ +#define EXT_COMMUNITY_OPAQUE_MAX 0xffffffffffffULL +#define EXT_COMMUNITY_FLAG_VALID 0x01 + +struct ext_comm_pairs { + u_int8_t type; + u_int8_t subtype; + u_int8_t transitive; /* transitive bit needs to be set */ +}; + +#define IANA_EXT_COMMUNITIES { \ + { EXT_COMMUNITY_TWO_AS, EXT_COMMUNITY_ROUTE_TGT, 0 }, \ + { EXT_COMMUNITY_TWO_AS, EXT_CUMMUNITY_ROUTE_ORIG, 0 }, \ + { EXT_COMMUNITY_TWO_AS, EXT_COMMUNITY_OSPF_DOM_ID, 0 }, \ + { EXT_COMMUNITY_TWO_AS, EXT_COMMUNITY_BGP_COLLECT, 0 }, \ + { EXT_COMMUNITY_FOUR_AS, EXT_COMMUNITY_ROUTE_TGT, 0 }, \ + { EXT_COMMUNITY_FOUR_AS, EXT_CUMMUNITY_ROUTE_ORIG, 0 }, \ + { EXT_COMMUNITY_IPV4, EXT_COMMUNITY_ROUTE_TGT, 0 }, \ + { EXT_COMMUNITY_IPV4, EXT_CUMMUNITY_ROUTE_ORIG, 0 }, \ + { EXT_COMMUNITY_IPV4, EXT_COMMUNITY_OSPF_RTR_ID, 0 }, \ + { EXT_COMMUNITY_OPAQUE, EXT_COMMUNITY_OSPF_RTR_TYPE, 0 } \ +} struct filter_prefix { @@ -592,18 +755,28 @@ struct filter_prefix { u_int8_t len; }; +struct filter_nexthop { + struct bgpd_addr addr; + u_int8_t flags; +#define FILTER_NEXTHOP_ADDR 1 +#define FILTER_NEXTHOP_NEIGHBOR 2 +}; + struct filter_prefixlen { enum comp_ops op; - sa_family_t af; + u_int8_t aid; u_int8_t len_min; u_int8_t len_max; }; struct filter_match { - struct filter_prefix prefix; - struct filter_prefixlen prefixlen; - struct filter_as as; - struct filter_community community; + struct filter_prefix prefix; + struct filter_prefixlen prefixlen; + struct filter_nexthop nexthop; + struct filter_as as; + struct filter_aslen aslen; + struct filter_community community; + struct filter_extcommunity ext_community; }; TAILQ_HEAD(filter_head, filter_rule); @@ -635,10 +808,13 @@ enum action_types { ACTION_SET_NEXTHOP_SELF, ACTION_SET_COMMUNITY, ACTION_DEL_COMMUNITY, + ACTION_SET_EXT_COMMUNITY, + ACTION_DEL_EXT_COMMUNITY, ACTION_PFTABLE, ACTION_PFTABLE_ID, ACTION_RTLABEL, - ACTION_RTLABEL_ID + ACTION_RTLABEL_ID, + ACTION_SET_ORIGIN }; struct filter_set { @@ -650,23 +826,53 @@ struct filter_set { int32_t relative; struct bgpd_addr nexthop; struct filter_community community; + struct filter_extcommunity ext_community; char pftable[PFTABLE_LEN]; char rtlabel[RTLABEL_LEN]; + u_int8_t origin; } action; enum action_types type; }; -struct rrefresh { - u_int16_t afi; - u_int8_t safi; +struct rdomain { + SIMPLEQ_ENTRY(rdomain) entry; + char descr[PEER_DESCR_LEN]; + char ifmpe[IFNAMSIZ]; + struct filter_set_head import; + struct filter_set_head export; + struct network_head net_l; + u_int64_t rd; + u_int rtableid; + u_int label; + int flags; }; +SIMPLEQ_HEAD(rdomain_head, rdomain); + +struct rde_rib { + SIMPLEQ_ENTRY(rde_rib) entry; + char name[PEER_DESCR_LEN]; + u_int rtableid; + u_int16_t id; + u_int16_t flags; +}; +SIMPLEQ_HEAD(rib_names, rde_rib); +extern struct rib_names ribnames; + +/* rde_rib flags */ +#define F_RIB_ENTRYLOCK 0x0001 +#define F_RIB_NOEVALUATE 0x0002 +#define F_RIB_NOFIB 0x0004 +#define F_RIB_NOFIBSYNC 0x0008 +#define F_RIB_HASNOFIB (F_RIB_NOFIB | F_RIB_NOEVALUATE) + +/* 4-byte magic AS number */ +#define AS_TRANS 23456 struct rde_memstats { int64_t path_cnt; int64_t prefix_cnt; int64_t rib_cnt; - int64_t pt4_cnt; - int64_t pt6_cnt; + int64_t pt_cnt[AID_MAX]; int64_t nexthop_cnt; int64_t aspath_cnt; int64_t aspath_size; @@ -677,82 +883,117 @@ struct rde_memstats { int64_t attr_dcnt; }; -struct rde_rib { - SIMPLEQ_ENTRY(rde_rib) entry; - char name[PEER_DESCR_LEN]; - u_int16_t id; - u_int16_t flags; +/* macros for IPv6 link-local address */ -+#if defined(__KAME__) && defined(IPV6_LINKLOCAL_PEER) ++#ifdef __KAME__ +#define IN6_LINKLOCAL_IFINDEX(addr) \ + ((addr).s6_addr[2] << 8 | (addr).s6_addr[3]) + +#define SET_IN6_LINKLOCAL_IFINDEX(addr, index) \ + do { \ + (addr).s6_addr[2] = ((index) >> 8) & 0xff; \ + (addr).s6_addr[3] = (index) & 0xff; \ + } while (0) +#endif + +#define MRT_FILE_LEN 512 +#define MRT2MC(x) ((struct mrt_config *)(x)) +#define MRT_MAX_TIMEOUT 7200 + +enum mrt_type { + MRT_NONE, + MRT_TABLE_DUMP, + MRT_TABLE_DUMP_MP, + MRT_TABLE_DUMP_V2, + MRT_ALL_IN, + MRT_ALL_OUT, + MRT_UPDATE_IN, + MRT_UPDATE_OUT +}; + +enum mrt_state { + MRT_STATE_RUNNING, + MRT_STATE_OPEN, + MRT_STATE_REOPEN, + MRT_STATE_REMOVE }; -SIMPLEQ_HEAD(rib_names, rde_rib); -extern struct rib_names ribnames; -/* Address Family Numbers as per RFC 1700 */ -#define AFI_IPv4 1 -#define AFI_IPv6 2 -#define AFI_ALL 0xffff - -/* Subsequent Address Family Identifier as per RFC 4760 */ -#define SAFI_NONE 0x00 -#define SAFI_UNICAST 0x01 -#define SAFI_MULTICAST 0x02 -#define SAFI_ALL 0xff +struct mrt { + char rib[PEER_DESCR_LEN]; + struct msgbuf wbuf; + LIST_ENTRY(mrt) entry; + u_int32_t peer_id; + u_int32_t group_id; + enum mrt_type type; + enum mrt_state state; + u_int16_t seqnum; +}; -/* 4-byte magic AS number */ -#define AS_TRANS 23456 +struct mrt_config { + struct mrt conf; + char name[MRT_FILE_LEN]; /* base file name */ + char file[MRT_FILE_LEN]; /* actual file name */ + time_t ReopenTimer; + time_t ReopenTimerInterval; +}; /* prototypes */ /* bgpd.c */ void send_nexthop_update(struct kroute_nexthop *); void send_imsg_session(int, pid_t, void *, u_int16_t); -int bgpd_redistribute(int, struct kroute *, struct kroute6 *); +int send_network(int, struct network_config *, + struct filter_set_head *); int bgpd_filternexthop(struct kroute *, struct kroute6 *); -/* log.c */ -void log_init(int); -void vlog(int, const char *, va_list); -void log_peer_warn(const struct peer_config *, const char *, ...); -void log_peer_warnx(const struct peer_config *, const char *, ...); -void log_warn(const char *, ...); -void log_warnx(const char *, ...); -void log_info(const char *, ...); -void log_debug(const char *, ...); -void fatal(const char *) __dead; -void fatalx(const char *) __dead; - -/* parse.y */ -int cmdline_symset(char *); +/* control.c */ +void control_cleanup(const char *); +int control_imsg_relay(struct imsg *); /* config.c */ int host(const char *, struct bgpd_addr *, u_int8_t *); /* kroute.c */ -int kr_init(int, u_int); -int kr_change(struct kroute_label *); -int kr_delete(struct kroute_label *); -int kr6_change(struct kroute6_label *); -int kr6_delete(struct kroute6_label *); +int kr_init(void); +int ktable_update(u_int, char *, char *, int); +void ktable_preload(void); +void ktable_postload(void); +int ktable_exists(u_int, u_int *); +int kr_change(u_int, struct kroute_full *); +int kr_delete(u_int, struct kroute_full *); void kr_shutdown(void); -void kr_fib_couple(void); -void kr_fib_decouple(void); +void kr_fib_couple(u_int); +void kr_fib_decouple(u_int); int kr_dispatch_msg(void); -int kr_nexthop_add(struct bgpd_addr *); -void kr_nexthop_delete(struct bgpd_addr *); +int kr_nexthop_add(u_int32_t, struct bgpd_addr *); +void kr_nexthop_delete(u_int32_t, struct bgpd_addr *); void kr_show_route(struct imsg *); void kr_ifinfo(char *); +int kr_net_reload(u_int, struct network_head *); int kr_reload(void); struct in6_addr *prefixlen2mask6(u_int8_t prefixlen); -/* control.c */ -void control_cleanup(const char *); -int control_imsg_relay(struct imsg *); +/* log.c */ +void log_init(int); +void log_verbose(int); +void vlog(int, const char *, va_list); +void log_peer_warn(const struct peer_config *, const char *, ...); +void log_peer_warnx(const struct peer_config *, const char *, ...); +void log_warn(const char *, ...); +void log_warnx(const char *, ...); +void log_info(const char *, ...); +void log_debug(const char *, ...); +void fatal(const char *) __dead; +void fatalx(const char *) __dead; -/* pftable.c */ -int pftable_exists(const char *); -int pftable_add(const char *); -int pftable_clear_all(void); -int pftable_addr_add(struct pftable_msg *); -int pftable_addr_remove(struct pftable_msg *); -int pftable_commit(void); +/* mrt.c */ +void mrt_clear_seq(void); +void mrt_write(struct mrt *); +void mrt_clean(struct mrt *); +void mrt_init(struct imsgbuf *, struct imsgbuf *); +int mrt_timeout(struct mrt_head *); +void mrt_reconfigure(struct mrt_head *); +void mrt_handler(struct mrt_head *); +struct mrt *mrt_get(struct mrt_head *, struct mrt *); +int mrt_mergeconfig(struct mrt_head *, struct mrt_head *); /* name2id.c */ u_int16_t rib_name2id(const char *); @@ -768,10 +1009,22 @@ const char *pftable_id2name(u_int16_t); void pftable_unref(u_int16_t); void pftable_ref(u_int16_t); +/* parse.y */ +int cmdline_symset(char *); + +/* pftable.c */ +int pftable_exists(const char *); +int pftable_add(const char *); +int pftable_clear_all(void); +int pftable_addr_add(struct pftable_msg *); +int pftable_addr_remove(struct pftable_msg *); +int pftable_commit(void); /* rde_filter.c */ void filterset_free(struct filter_set_head *); int filterset_cmp(struct filter_set *, struct filter_set *); +void filterset_move(struct filter_set_head *, + struct filter_set_head *); const char *filterset_name(enum action_types); /* util.c */ @@ -779,11 +1032,24 @@ const char *log_addr(const struct bgpd_a const char *log_in6addr(const struct in6_addr *); const char *log_sockaddr(struct sockaddr *); const char *log_as(u_int32_t); +const char *log_rd(u_int64_t); +const char *log_ext_subtype(u_int8_t); int aspath_snprint(char *, size_t, void *, u_int16_t); int aspath_asprint(char **, void *, u_int16_t); size_t aspath_strlen(void *, u_int16_t); +int aspath_match(void *, u_int16_t, enum as_spec, u_int32_t); +u_int32_t aspath_extract(const void *, int); +int prefix_compare(const struct bgpd_addr *, + const struct bgpd_addr *, int); in_addr_t prefixlen2mask(u_int8_t); void inet6applymask(struct in6_addr *, const struct in6_addr *, int); +const char *aid2str(u_int8_t); +int aid2afi(u_int8_t, u_int16_t *, u_int8_t *); +int afi2aid(u_int16_t, u_int8_t, u_int8_t *); +sa_family_t aid2af(u_int8_t); +int af2aid(sa_family_t, u_int8_t, u_int8_t *); +struct sockaddr *addr2sa(struct bgpd_addr *, u_int16_t); +void sa2addr(struct sockaddr *, struct bgpd_addr *); #endif /* __BGPD_H__ */ Index: head/net/openbgpd/files/patch-bgpd_kroute.c =================================================================== --- head/net/openbgpd/files/patch-bgpd_kroute.c (revision 354183) +++ head/net/openbgpd/files/patch-bgpd_kroute.c (revision 354184) @@ -1,3048 +1,3140 @@ Index: bgpd/kroute.c =================================================================== RCS file: /home/cvs/private/hrs/openbgpd/bgpd/kroute.c,v retrieving revision 1.1.1.7 -retrieving revision 1.14 -diff -u -p -r1.1.1.7 -r1.14 +retrieving revision 1.15 +diff -u -p -r1.1.1.7 -r1.15 --- bgpd/kroute.c 14 Feb 2010 20:19:57 -0000 1.1.1.7 -+++ bgpd/kroute.c 8 Dec 2012 20:17:59 -0000 1.14 ++++ bgpd/kroute.c 16 May 2014 00:36:26 -0000 1.15 @@ -1,4 +1,4 @@ -/* $OpenBSD: kroute.c,v 1.169 2009/06/25 15:54:22 claudio Exp $ */ +/* $OpenBSD: kroute.c,v 1.190 2012/07/13 16:57:35 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -27,6 +27,9 @@ #include #include #include +#if !defined(__FreeBSD__) /* FreeBSD has no mpls support. */ +#include +#endif #include #include #include @@ -37,11 +40,12 @@ #include "bgpd.h" +struct ktable **krt; +u_int krt_size; + struct { u_int32_t rtseq; pid_t pid; - u_int rtableid; - int fib_sync; int fd; } kr_state; @@ -83,32 +87,52 @@ struct kif_node { struct kif_kr6_head kroute6_l; }; -int kr_redistribute(int, struct kroute *); -int kr_redistribute6(int, struct kroute6 *); +int ktable_new(u_int, u_int, char *, char *, int); +void ktable_free(u_int); +void ktable_destroy(struct ktable *); +struct ktable *ktable_get(u_int); + +int kr4_change(struct ktable *, struct kroute_full *); +int kr6_change(struct ktable *, struct kroute_full *); +int krVPN4_change(struct ktable *, struct kroute_full *); +int kr4_delete(struct ktable *, struct kroute_full *); +int kr6_delete(struct ktable *, struct kroute_full *); +int krVPN4_delete(struct ktable *, struct kroute_full *); +void kr_net_delete(struct network *); +struct network *kr_net_match(struct ktable *, struct kroute *); +struct network *kr_net_match6(struct ktable *, struct kroute6 *); +struct network *kr_net_find(struct ktable *, struct network *); +int kr_redistribute(int, struct ktable *, struct kroute *); +int kr_redistribute6(int, struct ktable *, struct kroute6 *); +struct kroute_full *kr_tofull(struct kroute *); +struct kroute_full *kr6_tofull(struct kroute6 *); int kroute_compare(struct kroute_node *, struct kroute_node *); int kroute6_compare(struct kroute6_node *, struct kroute6_node *); int knexthop_compare(struct knexthop_node *, struct knexthop_node *); int kif_compare(struct kif_node *, struct kif_node *); -struct kroute_node *kroute_find(in_addr_t, u_int8_t, u_int8_t); +struct kroute_node *kroute_find(struct ktable *, in_addr_t, u_int8_t, + u_int8_t); struct kroute_node *kroute_matchgw(struct kroute_node *, struct sockaddr_in *); -int kroute_insert(struct kroute_node *); -int kroute_remove(struct kroute_node *); -void kroute_clear(void); +int kroute_insert(struct ktable *, struct kroute_node *); +int kroute_remove(struct ktable *, struct kroute_node *); +void kroute_clear(struct ktable *); -struct kroute6_node *kroute6_find(const struct in6_addr *, u_int8_t, - u_int8_t); +struct kroute6_node *kroute6_find(struct ktable *, const struct in6_addr *, + u_int8_t, u_int8_t); struct kroute6_node *kroute6_matchgw(struct kroute6_node *, struct sockaddr_in6 *); -int kroute6_insert(struct kroute6_node *); -int kroute6_remove(struct kroute6_node *); -void kroute6_clear(void); - -struct knexthop_node *knexthop_find(struct bgpd_addr *); -int knexthop_insert(struct knexthop_node *); -int knexthop_remove(struct knexthop_node *); -void knexthop_clear(void); +int kroute6_insert(struct ktable *, struct kroute6_node *); +int kroute6_remove(struct ktable *, struct kroute6_node *); +void kroute6_clear(struct ktable *); + +struct knexthop_node *knexthop_find(struct ktable *, struct bgpd_addr *); +int knexthop_insert(struct ktable *, + struct knexthop_node *); +int knexthop_remove(struct ktable *, + struct knexthop_node *); +void knexthop_clear(struct ktable *); struct kif_node *kif_find(int); int kif_insert(struct kif_node *); @@ -124,13 +148,16 @@ int kif_kr6_remove(struct kroute6_nod int kif_validate(struct kif *); int kroute_validate(struct kroute *); int kroute6_validate(struct kroute6 *); -void knexthop_validate(struct knexthop_node *); -void knexthop_track(void *); -struct kroute_node *kroute_match(in_addr_t, int); -struct kroute6_node *kroute6_match(struct in6_addr *, int); -void kroute_detach_nexthop(struct knexthop_node *); +void knexthop_validate(struct ktable *, + struct knexthop_node *); +void knexthop_track(struct ktable *, void *); +void knexthop_send_update(struct knexthop_node *); +struct kroute_node *kroute_match(struct ktable *, in_addr_t, int); +struct kroute6_node *kroute6_match(struct ktable *, struct in6_addr *, int); +void kroute_detach_nexthop(struct ktable *, + struct knexthop_node *); -int protect_lo(void); +int protect_lo(struct ktable *); u_int8_t prefixlen_classful(in_addr_t); u_int8_t mask2prefixlen(in_addr_t); u_int8_t mask2prefixlen6(struct sockaddr_in6 *); @@ -138,23 +165,20 @@ void get_rtaddrs(int, struct sockaddr * void if_change(u_short, int, struct if_data *); void if_announce(void *); -int send_rtmsg(int, int, struct kroute *); -int send_rt6msg(int, int, struct kroute6 *); +int send_rtmsg(int, int, struct ktable *, struct kroute *); +int send_rt6msg(int, int, struct ktable *, struct kroute6 *); int dispatch_rtmsg(void); -int fetchtable(u_int, int); +int fetchtable(struct ktable *); int fetchifs(int); int dispatch_rtmsg_addr(struct rt_msghdr *, - struct sockaddr *[RTAX_MAX], int); + struct sockaddr *[RTAX_MAX], struct ktable *); -RB_HEAD(kroute_tree, kroute_node) krt; RB_PROTOTYPE(kroute_tree, kroute_node, entry, kroute_compare) RB_GENERATE(kroute_tree, kroute_node, entry, kroute_compare) -RB_HEAD(kroute6_tree, kroute6_node) krt6; RB_PROTOTYPE(kroute6_tree, kroute6_node, entry, kroute6_compare) RB_GENERATE(kroute6_tree, kroute6_node, entry, kroute6_compare) -RB_HEAD(knexthop_tree, knexthop_node) knt; RB_PROTOTYPE(knexthop_tree, knexthop_node, entry, knexthop_compare) RB_GENERATE(knexthop_tree, knexthop_node, entry, knexthop_compare) @@ -162,19 +186,21 @@ RB_HEAD(kif_tree, kif_node) kit; RB_PROTOTYPE(kif_tree, kif_node, entry, kif_compare) RB_GENERATE(kif_tree, kif_node, entry, kif_compare) +#define KT2KNT(x) (&(ktable_get((x)->nhtableid)->knt)) + /* * exported functions */ int -kr_init(int fs, u_int rtableid) +kr_init(void) { int opt = 0, rcvbuf, default_rcvbuf; +#if !defined(__FreeBSD__) /* FreeBSD does not have ROUTE_TABLEFILTER. */ + unsigned int tid = RTABLE_ANY; +#endif socklen_t optlen; - kr_state.rtableid = rtableid; - kr_state.fib_sync = fs; - if ((kr_state.fd = socket(AF_ROUTE, SOCK_RAW, 0)) == -1) { log_warn("kr_init: socket"); return (-1); @@ -198,194 +224,533 @@ kr_init(int fs, u_int rtableid) rcvbuf /= 2) ; /* nothing */ +#if !defined(__FreeBSD__) /* FreeBSD does not have ROUTE_TABLEFILTER. */ + if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_TABLEFILTER, &tid, + sizeof(tid)) == -1) { + log_warn("kr_init: setsockopt AF_ROUTE ROUTE_TABLEFILTER"); + return (-1); + } +#endif + kr_state.pid = getpid(); kr_state.rtseq = 1; - RB_INIT(&krt); - RB_INIT(&krt6); - RB_INIT(&knt); RB_INIT(&kit); if (fetchifs(0) == -1) return (-1); - if (fetchtable(kr_state.rtableid, 0) == -1) - return (-1); - if (kr_state.rtableid != 0) - if (fetchtable(0, 1) == -1) + return (kr_state.fd); +} + +int +ktable_new(u_int rtableid, u_int rdomid, char *name, char *ifname, int fs) +{ + struct ktable **xkrt; + struct ktable *kt; + size_t newsize, oldsize; + + /* resize index table if needed */ + if (rtableid >= krt_size) { + oldsize = sizeof(struct ktable *) * krt_size; + newsize = sizeof(struct ktable *) * (rtableid + 1); + if ((xkrt = realloc(krt, newsize)) == NULL) { + log_warn("ktable_new"); return (-1); + } + krt = xkrt; + krt_size = rtableid + 1; + bzero((char *)krt + oldsize, newsize - oldsize); + } + + if (krt[rtableid]) + fatalx("ktable_new: table already exists."); - if (protect_lo() == -1) + /* allocate new element */ + kt = krt[rtableid] = calloc(1, sizeof(struct ktable)); + if (kt == NULL) { + log_warn("ktable_new"); return (-1); + } - return (kr_state.fd); + /* initialize structure ... */ + strlcpy(kt->descr, name, sizeof(kt->descr)); + RB_INIT(&kt->krt); + RB_INIT(&kt->krt6); + RB_INIT(&kt->knt); + TAILQ_INIT(&kt->krn); + kt->fib_conf = kt->fib_sync = fs; + kt->rtableid = rtableid; + kt->nhtableid = rdomid; + /* bump refcount of rdomain table for the nexthop lookups */ + ktable_get(kt->nhtableid)->nhrefcnt++; + if (ifname) { + strlcpy(kt->ifmpe, ifname, IFNAMSIZ); + kt->ifindex = if_nametoindex(ifname); + } + + /* ... and load it */ + if (fetchtable(kt) == -1) + return (-1); + if (protect_lo(kt) == -1) + return (-1); + + /* everything is up and running */ + kt->state = RECONF_REINIT; + log_debug("new ktable %s for rtableid %d", name, rtableid); + return (0); +} + +void +ktable_free(u_int rtableid) +{ + struct ktable *kt, *nkt; + + if ((kt = ktable_get(rtableid)) == NULL) + return; + + /* decouple from kernel, no new routes will be entered from here */ + kr_fib_decouple(kt->rtableid); + + /* first unhook from the nexthop table */ + nkt = ktable_get(kt->nhtableid); + nkt->nhrefcnt--; + + /* + * Evil little details: + * If kt->nhrefcnt > 0 then kt == nkt and nothing needs to be done. + * If kt != nkt then kt->nhrefcnt must be 0 and kt must be killed. + * If nkt is no longer referenced it must be killed (possible double + * free so check that kt != nkt). + */ + if (kt != nkt && nkt->nhrefcnt <= 0) + ktable_destroy(nkt); + if (kt->nhrefcnt <= 0) + ktable_destroy(kt); +} + +void +ktable_destroy(struct ktable *kt) +{ + /* decouple just to be sure, does not hurt */ + kr_fib_decouple(kt->rtableid); + + log_debug("freeing ktable %s rtableid %u", kt->descr, kt->rtableid); + knexthop_clear(kt); + kroute_clear(kt); + kroute6_clear(kt); + + krt[kt->rtableid] = NULL; + free(kt); +} + +struct ktable * +ktable_get(u_int rtableid) +{ + if (rtableid >= krt_size) + return (NULL); + return (krt[rtableid]); +} + +int +ktable_update(u_int rtableid, char *name, char *ifname, int flags) +{ + struct ktable *kt, *rkt; + u_int rdomid; + + if (!ktable_exists(rtableid, &rdomid)) + fatalx("King Bula lost a table"); /* may not happen */ + + if (rdomid != rtableid || flags & F_RIB_NOFIB) { + rkt = ktable_get(rdomid); + if (rkt == NULL) { + char buf[32]; + snprintf(buf, sizeof(buf), "rdomain_%d", rdomid); + if (ktable_new(rdomid, rdomid, buf, NULL, 0)) + return (-1); + } else { + /* there is no need for full fib synchronisation if + * the table is only used for nexthop lookups. + */ + if (rkt->state == RECONF_DELETE) { + rkt->fib_conf = 0; + rkt->state = RECONF_KEEP; + } + } + } + + if (flags & (F_RIB_NOEVALUATE | F_RIB_NOFIB)) + /* only rdomain table must exist */ + return (0); + + kt = ktable_get(rtableid); + if (kt == NULL) { + if (ktable_new(rtableid, rdomid, name, ifname, + !(flags & F_RIB_NOFIBSYNC))) + return (-1); + } else { + /* fib sync has higher preference then no sync */ + if (kt->state == RECONF_DELETE) { + kt->fib_conf = !(flags & F_RIB_NOFIBSYNC); + kt->state = RECONF_KEEP; + } else if (!kt->fib_conf) + kt->fib_conf = !(flags & F_RIB_NOFIBSYNC); + + strlcpy(kt->descr, name, sizeof(kt->descr)); + } + return (0); +} + +void +ktable_preload(void) +{ + struct ktable *kt; + u_int i; + + for (i = 0; i < krt_size; i++) { + if ((kt = ktable_get(i)) == NULL) + continue; + kt->state = RECONF_DELETE; + } +} + +void +ktable_postload(void) +{ + struct ktable *kt; + u_int i; + + for (i = krt_size; i > 0; i--) { + if ((kt = ktable_get(i - 1)) == NULL) + continue; + if (kt->state == RECONF_DELETE) + ktable_free(i - 1); + else if (kt->state == RECONF_REINIT) + kt->fib_sync = kt->fib_conf; + } - } - - int --kr_change(struct kroute_label *kl) ++} ++ ++int +ktable_exists(u_int rtableid, u_int *rdomid) +{ +#if !defined(__FreeBSD__) /* FreeBSD does not have NET_RT_TABLE. */ + size_t len; + struct rt_tableinfo info; + int mib[6]; + + mib[0] = CTL_NET; + mib[1] = AF_ROUTE; + mib[2] = 0; + mib[3] = 0; + mib[4] = NET_RT_TABLE; + mib[5] = rtableid; + + len = sizeof(info); + if (sysctl(mib, 6, &info, &len, NULL, 0) == -1) { + if (errno == ENOENT) + /* table nonexistent */ + return (0); + log_warn("sysctl"); + /* must return 0 so that the table is considered non-existent */ + return (0); + } + if (rdomid) + *rdomid = info.rti_domainid; +#else + *rdomid = 0; +#endif + return (1); -+} -+ -+int + } + + int +-kr_change(struct kroute_label *kl) +kr_change(u_int rtableid, struct kroute_full *kl) +{ + struct ktable *kt; + + if ((kt = ktable_get(rtableid)) == NULL) + /* too noisy during reloads, just ignore */ + return (0); + switch (kl->prefix.aid) { + case AID_INET: + return (kr4_change(kt, kl)); + case AID_INET6: + return (kr6_change(kt, kl)); + case AID_VPN_IPv4: + return (krVPN4_change(kt, kl)); + } + log_warnx("kr_change: not handled AID"); + return (-1); +} + +int +kr4_change(struct ktable *kt, struct kroute_full *kl) { struct kroute_node *kr; int action = RTM_ADD; + u_int16_t labelid; - if ((kr = kroute_find(kl->kr.prefix.s_addr, kl->kr.prefixlen, RTP_BGP)) - != NULL) + if ((kr = kroute_find(kt, kl->prefix.v4.s_addr, kl->prefixlen, + RTP_BGP)) != NULL) action = RTM_CHANGE; /* nexthop within 127/8 -> ignore silently */ - if ((kl->kr.nexthop.s_addr & htonl(IN_CLASSA_NET)) == + if ((kl->nexthop.v4.s_addr & htonl(IN_CLASSA_NET)) == htonl(INADDR_LOOPBACK & IN_CLASSA_NET)) return (0); - if (kr) - rtlabel_unref(kr->r.labelid); - kl->kr.labelid = rtlabel_name2id(kl->label); + labelid = rtlabel_name2id(kl->label); /* for blackhole and reject routes nexthop needs to be 127.0.0.1 */ - if (kl->kr.flags & (F_BLACKHOLE|F_REJECT)) - kl->kr.nexthop.s_addr = htonl(INADDR_LOOPBACK); - - if (send_rtmsg(kr_state.fd, action, &kl->kr) == -1) - return (-1); + if (kl->flags & (F_BLACKHOLE|F_REJECT)) + kl->nexthop.v4.s_addr = htonl(INADDR_LOOPBACK); if (action == RTM_ADD) { if ((kr = calloc(1, sizeof(struct kroute_node))) == NULL) { log_warn("kr_change"); return (-1); } - kr->r.prefix.s_addr = kl->kr.prefix.s_addr; - kr->r.prefixlen = kl->kr.prefixlen; - kr->r.nexthop.s_addr = kl->kr.nexthop.s_addr; - kr->r.flags = kl->kr.flags | F_BGPD_INSERTED; + kr->r.prefix.s_addr = kl->prefix.v4.s_addr; + kr->r.prefixlen = kl->prefixlen; + kr->r.nexthop.s_addr = kl->nexthop.v4.s_addr; + kr->r.flags = kl->flags | F_BGPD_INSERTED; kr->r.priority = RTP_BGP; - kr->r.labelid = kl->kr.labelid; + kr->r.labelid = labelid; - if (kroute_insert(kr) == -1) + if (kroute_insert(kt, kr) == -1) free(kr); } else { - kr->r.nexthop.s_addr = kl->kr.nexthop.s_addr; - kr->r.labelid = kl->kr.labelid; - if (kl->kr.flags & F_BLACKHOLE) + kr->r.nexthop.s_addr = kl->nexthop.v4.s_addr; + rtlabel_unref(kr->r.labelid); + kr->r.labelid = labelid; + if (kl->flags & F_BLACKHOLE) kr->r.flags |= F_BLACKHOLE; else kr->r.flags &= ~F_BLACKHOLE; - if (kl->kr.flags & F_REJECT) + if (kl->flags & F_REJECT) kr->r.flags |= F_REJECT; else kr->r.flags &= ~F_REJECT; } + if (send_rtmsg(kr_state.fd, action, kt, &kr->r) == -1) + return (-1); + return (0); } int -kr_delete(struct kroute_label *kl) +kr6_change(struct ktable *kt, struct kroute_full *kl) { - struct kroute_node *kr; + struct kroute6_node *kr6; + struct in6_addr lo6 = IN6ADDR_LOOPBACK_INIT; + int action = RTM_ADD; + u_int16_t labelid; - if ((kr = kroute_find(kl->kr.prefix.s_addr, kl->kr.prefixlen, RTP_BGP)) - == NULL) - return (0); + if ((kr6 = kroute6_find(kt, &kl->prefix.v6, kl->prefixlen, RTP_BGP)) != + NULL) + action = RTM_CHANGE; - if (!(kr->r.flags & F_BGPD_INSERTED)) + /* nexthop to loopback -> ignore silently */ + if (IN6_IS_ADDR_LOOPBACK(&kl->nexthop.v6)) return (0); - /* nexthop within 127/8 -> ignore silently */ - if ((kl->kr.nexthop.s_addr & htonl(IN_CLASSA_NET)) == - htonl(INADDR_LOOPBACK & IN_CLASSA_NET)) - return (0); + labelid = rtlabel_name2id(kl->label); - if (send_rtmsg(kr_state.fd, RTM_DELETE, &kl->kr) == -1) - return (-1); + /* for blackhole and reject routes nexthop needs to be ::1 */ + if (kl->flags & (F_BLACKHOLE|F_REJECT)) + bcopy(&lo6, &kl->nexthop.v6, sizeof(kl->nexthop.v6)); -+ + +- rtlabel_unref(kl->kr.labelid); + if (action == RTM_ADD) { + if ((kr6 = calloc(1, sizeof(struct kroute6_node))) == NULL) { + log_warn("kr_change"); + return (-1); + } + memcpy(&kr6->r.prefix, &kl->prefix.v6, sizeof(struct in6_addr)); + kr6->r.prefixlen = kl->prefixlen; + memcpy(&kr6->r.nexthop, &kl->nexthop.v6, + sizeof(struct in6_addr)); + kr6->r.flags = kl->flags | F_BGPD_INSERTED; + kr6->r.priority = RTP_BGP; + kr6->r.labelid = labelid; -- rtlabel_unref(kl->kr.labelid); +- if (kroute_remove(kr) == -1) + if (kroute6_insert(kt, kr6) == -1) + free(kr6); + } else { + memcpy(&kr6->r.nexthop, &kl->nexthop.v6, + sizeof(struct in6_addr)); + rtlabel_unref(kr6->r.labelid); + kr6->r.labelid = labelid; + if (kl->flags & F_BLACKHOLE) + kr6->r.flags |= F_BLACKHOLE; + else + kr6->r.flags &= ~F_BLACKHOLE; + if (kl->flags & F_REJECT) + kr6->r.flags |= F_REJECT; + else + kr6->r.flags &= ~F_REJECT; + } - -- if (kroute_remove(kr) == -1) ++ + if (send_rt6msg(kr_state.fd, action, kt, &kr6->r) == -1) return (-1); return (0); } int -kr6_change(struct kroute6_label *kl) +krVPN4_change(struct ktable *kt, struct kroute_full *kl) { - struct kroute6_node *kr6; + struct kroute_node *kr; int action = RTM_ADD; - struct in6_addr lo6 = IN6ADDR_LOOPBACK_INIT; + u_int32_t mplslabel = 0; + u_int16_t labelid; - if ((kr6 = kroute6_find(&kl->kr.prefix, kl->kr.prefixlen, RTP_BGP)) - != NULL) + if ((kr = kroute_find(kt, kl->prefix.vpn4.addr.s_addr, kl->prefixlen, + RTP_BGP)) != NULL) action = RTM_CHANGE; - /* nexthop to loopback -> ignore silently */ - if (IN6_IS_ADDR_LOOPBACK(&kl->kr.nexthop)) + /* nexthop within 127/8 -> ignore silently */ + if ((kl->nexthop.v4.s_addr & htonl(IN_CLASSA_NET)) == + htonl(INADDR_LOOPBACK & IN_CLASSA_NET)) return (0); - if (kr6) - rtlabel_unref(kr6->r.labelid); - kl->kr.labelid = rtlabel_name2id(kl->label); + /* only single MPLS label are supported for now */ + if (kl->prefix.vpn4.labellen != 3) { + log_warnx("krVPN4_change: %s/%u has not a single label", + log_addr(&kl->prefix), kl->prefixlen); + return (0); + } + mplslabel = (kl->prefix.vpn4.labelstack[0] << 24) | + (kl->prefix.vpn4.labelstack[1] << 16) | + (kl->prefix.vpn4.labelstack[2] << 8); + mplslabel = htonl(mplslabel); - /* for blackhole and reject routes nexthop needs to be ::1 */ - if (kl->kr.flags & (F_BLACKHOLE|F_REJECT)) - bcopy(&lo6, &kl->kr.nexthop, sizeof(kl->kr.nexthop)); + labelid = rtlabel_name2id(kl->label); - if (send_rt6msg(kr_state.fd, action, &kl->kr) == -1) - return (-1); + /* for blackhole and reject routes nexthop needs to be 127.0.0.1 */ + if (kl->flags & (F_BLACKHOLE|F_REJECT)) + kl->nexthop.v4.s_addr = htonl(INADDR_LOOPBACK); if (action == RTM_ADD) { - if ((kr6 = calloc(1, sizeof(struct kroute6_node))) == NULL) { + if ((kr = calloc(1, sizeof(struct kroute_node))) == NULL) { log_warn("kr_change"); return (-1); } - memcpy(&kr6->r.prefix, &kl->kr.prefix, - sizeof(struct in6_addr)); - kr6->r.prefixlen = kl->kr.prefixlen; - memcpy(&kr6->r.nexthop, &kl->kr.nexthop, - sizeof(struct in6_addr)); - kr6->r.flags = kl->kr.flags | F_BGPD_INSERTED; - kr6->r.priority = RTP_BGP; - kr6->r.labelid = kl->kr.labelid; + kr->r.prefix.s_addr = kl->prefix.vpn4.addr.s_addr; + kr->r.prefixlen = kl->prefixlen; + kr->r.nexthop.s_addr = kl->nexthop.v4.s_addr; + kr->r.flags = kl->flags | F_BGPD_INSERTED | F_MPLS; + kr->r.priority = RTP_BGP; + kr->r.labelid = labelid; + kr->r.mplslabel = mplslabel; - if (kroute6_insert(kr6) == -1) - free(kr6); + if (kroute_insert(kt, kr) == -1) + free(kr); } else { - memcpy(&kr6->r.nexthop, &kl->kr.nexthop, - sizeof(struct in6_addr)); - kr6->r.labelid = kl->kr.labelid; - if (kl->kr.flags & F_BLACKHOLE) - kr6->r.flags |= F_BLACKHOLE; + kr->r.mplslabel = mplslabel; + kr->r.nexthop.s_addr = kl->nexthop.v4.s_addr; + rtlabel_unref(kr->r.labelid); + kr->r.labelid = labelid; + if (kl->flags & F_BLACKHOLE) + kr->r.flags |= F_BLACKHOLE; else - kr6->r.flags &= ~F_BLACKHOLE; - if (kl->kr.flags & F_REJECT) - kr6->r.flags |= F_REJECT; + kr->r.flags &= ~F_BLACKHOLE; + if (kl->flags & F_REJECT) + kr->r.flags |= F_REJECT; else - kr6->r.flags &= ~F_REJECT; + kr->r.flags &= ~F_REJECT; } + if (send_rtmsg(kr_state.fd, action, kt, &kr->r) == -1) + return (-1); + -+ return (0); -+} -+ -+int + return (0); + } + + int +-kr6_delete(struct kroute6_label *kl) +kr_delete(u_int rtableid, struct kroute_full *kl) +{ + struct ktable *kt; + + if ((kt = ktable_get(rtableid)) == NULL) + /* too noisy during reloads, just ignore */ + return (0); + + switch (kl->prefix.aid) { + case AID_INET: + return (kr4_delete(kt, kl)); + case AID_INET6: + return (kr6_delete(kt, kl)); + case AID_VPN_IPv4: + return (krVPN4_delete(kt, kl)); + } + log_warnx("kr_change: not handled AID"); + return (-1); +} + +int +kr4_delete(struct ktable *kt, struct kroute_full *kl) +{ + struct kroute_node *kr; + + if ((kr = kroute_find(kt, kl->prefix.v4.s_addr, kl->prefixlen, + RTP_BGP)) == NULL) + return (0); + + if (!(kr->r.flags & F_BGPD_INSERTED)) + return (0); + + if (send_rtmsg(kr_state.fd, RTM_DELETE, kt, &kr->r) == -1) + return (-1); + + rtlabel_unref(kr->r.labelid); + + if (kroute_remove(kt, kr) == -1) + return (-1); + - return (0); - } - - int --kr6_delete(struct kroute6_label *kl) ++ return (0); ++} ++ ++int +kr6_delete(struct ktable *kt, struct kroute_full *kl) { struct kroute6_node *kr6; - if ((kr6 = kroute6_find(&kl->kr.prefix, kl->kr.prefixlen, RTP_BGP)) - == NULL) + if ((kr6 = kroute6_find(kt, &kl->prefix.v6, kl->prefixlen, RTP_BGP)) == + NULL) return (0); if (!(kr6->r.flags & F_BGPD_INSERTED)) return (0); - /* nexthop to loopback -> ignore silently */ - if (IN6_IS_ADDR_LOOPBACK(&kl->kr.nexthop)) + if (send_rt6msg(kr_state.fd, RTM_DELETE, kt, &kr6->r) == -1) + return (-1); + + rtlabel_unref(kr6->r.labelid); + + if (kroute6_remove(kt, kr6) == -1) + return (-1); + + return (0); +} + +int +krVPN4_delete(struct ktable *kt, struct kroute_full *kl) +{ + struct kroute_node *kr; + + if ((kr = kroute_find(kt, kl->prefix.vpn4.addr.s_addr, kl->prefixlen, + RTP_BGP)) == NULL) return (0); - if (send_rt6msg(kr_state.fd, RTM_DELETE, &kl->kr) == -1) + if (!(kr->r.flags & F_BGPD_INSERTED)) + return (0); + + if (send_rtmsg(kr_state.fd, RTM_DELETE, kt, &kr->r) == -1) return (-1); - rtlabel_unref(kl->kr.labelid); + rtlabel_unref(kr->r.labelid); - if (kroute6_remove(kr6) == -1) + if (kroute_remove(kt, kr) == -1) return (-1); return (0); @@ -394,53 +759,63 @@ kr6_delete(struct kroute6_label *kl) void kr_shutdown(void) { - kr_fib_decouple(); - knexthop_clear(); - kroute_clear(); - kroute6_clear(); + u_int i; + + for (i = krt_size; i > 0; i--) + ktable_free(i - 1); kif_clear(); } void -kr_fib_couple(void) +kr_fib_couple(u_int rtableid) { + struct ktable *kt; struct kroute_node *kr; struct kroute6_node *kr6; - if (kr_state.fib_sync == 1) /* already coupled */ + if ((kt = ktable_get(rtableid)) == NULL) /* table does not exist */ return; - kr_state.fib_sync = 1; + if (kt->fib_sync) /* already coupled */ + return; -+ -+ kt->fib_sync = 1; - RB_FOREACH(kr, kroute_tree, &krt) ++ kt->fib_sync = 1; ++ + RB_FOREACH(kr, kroute_tree, &kt->krt) if ((kr->r.flags & F_BGPD_INSERTED)) - send_rtmsg(kr_state.fd, RTM_ADD, &kr->r); - RB_FOREACH(kr6, kroute6_tree, &krt6) + send_rtmsg(kr_state.fd, RTM_ADD, kt, &kr->r); + RB_FOREACH(kr6, kroute6_tree, &kt->krt6) if ((kr6->r.flags & F_BGPD_INSERTED)) - send_rt6msg(kr_state.fd, RTM_ADD, &kr6->r); + send_rt6msg(kr_state.fd, RTM_ADD, kt, &kr6->r); - log_info("kernel routing table coupled"); + log_info("kernel routing table %u (%s) coupled", kt->rtableid, + kt->descr); } void -kr_fib_decouple(void) +kr_fib_decouple(u_int rtableid) { + struct ktable *kt; struct kroute_node *kr; struct kroute6_node *kr6; - if (kr_state.fib_sync == 0) /* already decoupled */ + if ((kt = ktable_get(rtableid)) == NULL) /* table does not exist */ + return; + + if (!kt->fib_sync) /* already decoupled */ return; - RB_FOREACH(kr, kroute_tree, &krt) + RB_FOREACH(kr, kroute_tree, &kt->krt) if ((kr->r.flags & F_BGPD_INSERTED)) - send_rtmsg(kr_state.fd, RTM_DELETE, &kr->r); - RB_FOREACH(kr6, kroute6_tree, &krt6) + send_rtmsg(kr_state.fd, RTM_DELETE, kt, &kr->r); + RB_FOREACH(kr6, kroute6_tree, &kt->krt6) if ((kr6->r.flags & F_BGPD_INSERTED)) - send_rt6msg(kr_state.fd, RTM_DELETE, &kr6->r); + send_rt6msg(kr_state.fd, RTM_DELETE, kt, &kr6->r); - kr_state.fib_sync = 0; + kt->fib_sync = 0; - log_info("kernel routing table decoupled"); + log_info("kernel routing table %u (%s) decoupled", kt->rtableid, + kt->descr); } int @@ -450,41 +825,18 @@ kr_dispatch_msg(void) } int -kr_nexthop_add(struct bgpd_addr *addr) +kr_nexthop_add(u_int rtableid, struct bgpd_addr *addr) { + struct ktable *kt; struct knexthop_node *h; - if ((h = knexthop_find(addr)) != NULL) { + if ((kt = ktable_get(rtableid)) == NULL) { + log_warnx("kr_nexthop_add: non-existent rtableid %d", rtableid); + return (0); + } + if ((h = knexthop_find(kt, addr)) != NULL) { /* should not happen... this is actually an error path */ - struct kroute_nexthop nh; - struct kroute_node *k; - struct kroute6_node *k6; - - bzero(&nh, sizeof(nh)); - memcpy(&nh.nexthop, addr, sizeof(nh.nexthop)); - nh.valid = 1; - if (h->kroute != NULL && addr->af == AF_INET) { - k = h->kroute; - nh.connected = k->r.flags & F_CONNECTED; - if (k->r.nexthop.s_addr != 0) { - nh.gateway.af = AF_INET; - nh.gateway.v4.s_addr = - k->r.nexthop.s_addr; - } - memcpy(&nh.kr.kr4, &k->r, sizeof(nh.kr.kr4)); - } else if (h->kroute != NULL && addr->af == AF_INET6) { - k6 = h->kroute; - nh.connected = k6->r.flags & F_CONNECTED; - if (memcmp(&k6->r.nexthop, &in6addr_any, - sizeof(struct in6_addr)) != 0) { - nh.gateway.af = AF_INET6; - memcpy(&nh.gateway.v6, &k6->r.nexthop, - sizeof(struct in6_addr)); - } - memcpy(&nh.kr.kr6, &k6->r, sizeof(nh.kr.kr6)); - } - - send_nexthop_update(&nh); + knexthop_send_update(h); } else { if ((h = calloc(1, sizeof(struct knexthop_node))) == NULL) { log_warn("kr_nexthop_add"); @@ -492,7 +844,7 @@ kr_nexthop_add(struct bgpd_addr *addr) } memcpy(&h->nexthop, addr, sizeof(h->nexthop)); - if (knexthop_insert(h) == -1) + if (knexthop_insert(kt, h) == -1) return (-1); } @@ -500,19 +852,26 @@ kr_nexthop_add(struct bgpd_addr *addr) } void -kr_nexthop_delete(struct bgpd_addr *addr) +kr_nexthop_delete(u_int rtableid, struct bgpd_addr *addr) { + struct ktable *kt; struct knexthop_node *kn; - if ((kn = knexthop_find(addr)) == NULL) + if ((kt = ktable_get(rtableid)) == NULL) { + log_warnx("kr_nexthop_delete: non-existent rtableid %d", + rtableid); + return; + } + if ((kn = knexthop_find(kt, addr)) == NULL) return; - knexthop_remove(kn); + knexthop_remove(kt, kn); } void kr_show_route(struct imsg *imsg) { + struct ktable *kt; struct kroute_node *kr, *kn; struct kroute6_node *kr6, *kn6; struct bgpd_addr *addr; @@ -521,6 +880,7 @@ kr_show_route(struct imsg *imsg) struct ctl_show_nexthop snh; struct knexthop_node *h; struct kif_node *kif; + u_int i; u_short ifindex = 0; switch (imsg->hdr.type) { @@ -528,70 +888,96 @@ kr_show_route(struct imsg *imsg) if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(flags) + sizeof(af)) { log_warnx("kr_show_route: wrong imsg len"); - return; + break; + } + kt = ktable_get(imsg->hdr.peerid); + if (kt == NULL) { + log_warnx("kr_show_route: table %u does not exist", + imsg->hdr.peerid); + break; } memcpy(&flags, imsg->data, sizeof(flags)); memcpy(&af, (char *)imsg->data + sizeof(flags), sizeof(af)); if (!af || af == AF_INET) - RB_FOREACH(kr, kroute_tree, &krt) - if (!flags || kr->r.flags & flags) { - kn = kr; - do { - send_imsg_session( - IMSG_CTL_KROUTE, - imsg->hdr.pid, &kn->r, - sizeof(kn->r)); - } while ((kn = kn->next) != NULL); - } + RB_FOREACH(kr, kroute_tree, &kt->krt) { + if (flags && (kr->r.flags & flags) == 0) + continue; + kn = kr; + do { + send_imsg_session(IMSG_CTL_KROUTE, + imsg->hdr.pid, kr_tofull(&kn->r), + sizeof(struct kroute_full)); + } while ((kn = kn->next) != NULL); + } if (!af || af == AF_INET6) - RB_FOREACH(kr6, kroute6_tree, &krt6) - if (!flags || kr6->r.flags & flags) { - kn6 = kr6; - do { - send_imsg_session( - IMSG_CTL_KROUTE6, - imsg->hdr.pid, &kn6->r, - sizeof(kn6->r)); - } while ((kn6 = kn6->next) != NULL); - } + RB_FOREACH(kr6, kroute6_tree, &kt->krt6) { + if (flags && (kr6->r.flags & flags) == 0) + continue; + kn6 = kr6; + do { + send_imsg_session(IMSG_CTL_KROUTE, + imsg->hdr.pid, kr6_tofull(&kn6->r), + sizeof(struct kroute_full)); + } while ((kn6 = kn6->next) != NULL); + } break; case IMSG_CTL_KROUTE_ADDR: if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(struct bgpd_addr)) { log_warnx("kr_show_route: wrong imsg len"); - return; + break; + } + kt = ktable_get(imsg->hdr.peerid); + if (kt == NULL) { + log_warnx("kr_show_route: table %u does not exist", + imsg->hdr.peerid); + break; } addr = imsg->data; kr = NULL; - switch (addr->af) { - case AF_INET: - kr = kroute_match(addr->v4.s_addr, 1); + switch (addr->aid) { + case AID_INET: + kr = kroute_match(kt, addr->v4.s_addr, 1); if (kr != NULL) send_imsg_session(IMSG_CTL_KROUTE, - imsg->hdr.pid, &kr->r, sizeof(kr->r)); + imsg->hdr.pid, kr_tofull(&kr->r), + sizeof(struct kroute_full)); break; - case AF_INET6: - kr6 = kroute6_match(&addr->v6, 1); + case AID_INET6: + kr6 = kroute6_match(kt, &addr->v6, 1); if (kr6 != NULL) - send_imsg_session(IMSG_CTL_KROUTE6, - imsg->hdr.pid, &kr6->r, sizeof(kr6->r)); + send_imsg_session(IMSG_CTL_KROUTE, + imsg->hdr.pid, kr6_tofull(&kr6->r), + sizeof(struct kroute_full)); break; } break; case IMSG_CTL_SHOW_NEXTHOP: - RB_FOREACH(h, knexthop_tree, &knt) { + kt = ktable_get(imsg->hdr.peerid); + if (kt == NULL) { + log_warnx("kr_show_route: table %u does not exist", + imsg->hdr.peerid); + break; + } + RB_FOREACH(h, knexthop_tree, KT2KNT(kt)) { bzero(&snh, sizeof(snh)); memcpy(&snh.addr, &h->nexthop, sizeof(snh.addr)); if (h->kroute != NULL) { - switch (h->nexthop.af) { - case AF_INET: + switch (h->nexthop.aid) { + case AID_INET: kr = h->kroute; snh.valid = kroute_validate(&kr->r); + snh.krvalid = 1; + memcpy(&snh.kr.kr4, &kr->r, + sizeof(snh.kr.kr4)); ifindex = kr->r.ifindex; break; - case AF_INET6: + case AID_INET6: kr6 = h->kroute; snh.valid = kroute6_validate(&kr6->r); + snh.krvalid = 1; + memcpy(&snh.kr.kr6, &kr6->r, + sizeof(snh.kr.kr6)); ifindex = kr6->r.ifindex; break; } -@@ -608,6 +994,24 @@ kr_show_route(struct imsg *imsg) +@@ -608,41 +994,190 @@ kr_show_route(struct imsg *imsg) send_imsg_session(IMSG_CTL_SHOW_INTERFACE, imsg->hdr.pid, &kif->k, sizeof(kif->k)); break; + case IMSG_CTL_SHOW_FIB_TABLES: + for (i = 0; i < krt_size; i++) { + struct ktable ktab; + + if ((kt = ktable_get(i)) == NULL) + continue; + + ktab = *kt; + /* do not leak internal information */ + RB_INIT(&ktab.krt); + RB_INIT(&ktab.krt6); + RB_INIT(&ktab.knt); + TAILQ_INIT(&ktab.krn); + + send_imsg_session(IMSG_CTL_SHOW_FIB_TABLES, + imsg->hdr.pid, &ktab, sizeof(ktab)); + } + break; default: /* nada */ break; } -@@ -628,21 +1032,152 @@ kr_ifinfo(char *ifname) - } - } - --struct redist_node { -- LIST_ENTRY(redist_node) entry; -- struct kroute *kr; -- struct kroute6 *kr6; --}; +- +- send_imsg_session(IMSG_CTL_END, imsg->hdr.pid, NULL, 0); ++ ++ send_imsg_session(IMSG_CTL_END, imsg->hdr.pid, NULL, 0); ++} ++ +void ++kr_ifinfo(char *ifname) ++{ ++ struct kif_node *kif; ++ ++ RB_FOREACH(kif, kif_tree, &kit) ++ if (!strcmp(ifname, kif->k.ifname)) { ++ send_imsg_session(IMSG_IFINFO, 0, ++ &kif->k, sizeof(kif->k)); ++ return; ++ } ++} ++ ++void +kr_net_delete(struct network *n) +{ + filterset_free(&n->net.attrset); + free(n); +} + +struct network * +kr_net_match(struct ktable *kt, struct kroute *kr) +{ + struct network *xn; + + TAILQ_FOREACH(xn, &kt->krn, entry) { + if (xn->net.prefix.aid != AID_INET) + continue; + switch (xn->net.type) { + case NETWORK_DEFAULT: + if (xn->net.prefixlen == kr->prefixlen && + xn->net.prefix.v4.s_addr == kr->prefix.s_addr) + /* static match already redistributed */ + return (NULL); + break; + case NETWORK_STATIC: + if (kr->flags & F_STATIC) + return (xn); + break; + case NETWORK_CONNECTED: + if (kr->flags & F_CONNECTED) + return (xn); + break; + case NETWORK_MRTCLONE: + /* can not happen */ + break; + } + } + return (NULL); -+} -+ + } + +-void +-kr_ifinfo(char *ifname) +struct network * +kr_net_match6(struct ktable *kt, struct kroute6 *kr6) -+{ + { +- struct kif_node *kif; + struct network *xn; +- RB_FOREACH(kif, kif_tree, &kit) +- if (!strcmp(ifname, kif->k.ifname)) { +- send_imsg_session(IMSG_IFINFO, 0, +- &kif->k, sizeof(kif->k)); +- return; + TAILQ_FOREACH(xn, &kt->krn, entry) { + if (xn->net.prefix.aid != AID_INET6) + continue; + switch (xn->net.type) { + case NETWORK_DEFAULT: + if (xn->net.prefixlen == kr6->prefixlen && + memcmp(&xn->net.prefix.v6, &kr6->prefix, + sizeof(struct in6_addr)) == 0) + /* static match already redistributed */ + return (NULL); + break; + case NETWORK_STATIC: + if (kr6->flags & F_STATIC) + return (xn); + break; + case NETWORK_CONNECTED: + if (kr6->flags & F_CONNECTED) + return (xn); + break; + case NETWORK_MRTCLONE: + /* can not happen */ + break; -+ } + } + } + return (NULL); -+} + } --LIST_HEAD(, redist_node) redistlist; +-struct redist_node { +- LIST_ENTRY(redist_node) entry; +- struct kroute *kr; +- struct kroute6 *kr6; +-}; +struct network * +kr_net_find(struct ktable *kt, struct network *n) +{ + struct network *xn; + + TAILQ_FOREACH(xn, &kt->krn, entry) { + if (n->net.type != xn->net.type || + n->net.prefixlen != xn->net.prefixlen || + n->net.rtableid != xn->net.rtableid) + continue; + if (memcmp(&n->net.prefix, &xn->net.prefix, + sizeof(n->net.prefix)) == 0) + return (xn); + } + return (NULL); +} - - int --kr_redistribute(int type, struct kroute *kr) ++ ++int +kr_net_reload(u_int rtableid, struct network_head *nh) - { -- struct redist_node *rn; ++{ + struct network *n, *xn; + struct ktable *kt; + + if ((kt = ktable_get(rtableid)) == NULL) { + log_warnx("kr_net_reload: non-existent rtableid %d", rtableid); + return (-1); + } + + TAILQ_FOREACH(n, &kt->krn, entry) + n->net.old = 1; + + while ((n = TAILQ_FIRST(nh)) != NULL) { + TAILQ_REMOVE(nh, n, entry); + n->net.old = 0; + n->net.rtableid = rtableid; + xn = kr_net_find(kt, n); + if (xn) { + xn->net.old = 0; + filterset_free(&xn->net.attrset); + filterset_move(&n->net.attrset, &xn->net.attrset); + kr_net_delete(n); + } else + TAILQ_INSERT_TAIL(&kt->krn, n, entry); + } -+ + + for (n = TAILQ_FIRST(&kt->krn); n != NULL; n = xn) { + xn = TAILQ_NEXT(n, entry); + if (n->net.old) { + if (n->net.type == NETWORK_DEFAULT) + if (send_network(IMSG_NETWORK_REMOVE, &n->net, + NULL)) + return (-1); + TAILQ_REMOVE(&kt->krn, n, entry); + kr_net_delete(n); + } + } -+ + +-LIST_HEAD(, redist_node) redistlist; + return (0); +} -+ -+int + + int +-kr_redistribute(int type, struct kroute *kr) +kr_redistribute(int type, struct ktable *kt, struct kroute *kr) -+{ + { +- struct redist_node *rn; + struct network *match; + struct network_config net; u_int32_t a; + /* shortcut for removals */ + if (type == IMSG_NETWORK_REMOVE) { + if (!(kr->flags & F_REDISTRIBUTED)) + return (0); /* no match, don't redistribute */ + kr->flags &= ~F_REDISTRIBUTED; + match = NULL; + goto sendit; + } + if (!(kr->flags & F_KERNEL)) return (0); @@ -670,41 +1205,40 @@ kr_redistribute(int type, struct kroute if (kr->prefix.s_addr == INADDR_ANY && kr->prefixlen == 0) return (0); - /* Add or delete kr from list ... */ - LIST_FOREACH(rn, &redistlist, entry) - if (rn->kr == kr) - break; - - switch (type) { - case IMSG_NETWORK_ADD: - if (rn == NULL) { - if ((rn = calloc(1, sizeof(struct redist_node))) == - NULL) { - log_warn("kr_redistribute"); - return (-1); - } - rn->kr = kr; - LIST_INSERT_HEAD(&redistlist, rn, entry); - } - break; - case IMSG_NETWORK_REMOVE: - if (rn != NULL) { - LIST_REMOVE(rn, entry); - free(rn); - } - break; - default: - errno = EINVAL; - return (-1); - } - - return (bgpd_redistribute(type, kr, NULL)); + match = kr_net_match(kt, kr); + if (match == NULL) { + if (!(kr->flags & F_REDISTRIBUTED)) + return (0); /* no match, don't redistribute */ + /* route no longer matches but is redistributed, so remove */ + kr->flags &= ~F_REDISTRIBUTED; + type = IMSG_NETWORK_REMOVE; + } else + kr->flags |= F_REDISTRIBUTED; + +sendit: + bzero(&net, sizeof(net)); + net.prefix.aid = AID_INET; + net.prefix.v4.s_addr = kr->prefix.s_addr; + net.prefixlen = kr->prefixlen; + net.rtableid = kt->rtableid; + + return (send_network(type, &net, match ? &match->net.attrset : NULL)); } int -kr_redistribute6(int type, struct kroute6 *kr6) -{ - struct redist_node *rn; +kr_redistribute6(int type, struct ktable *kt, struct kroute6 *kr6) +{ + struct network *match; + struct network_config net; + + /* shortcut for removals */ + if (type == IMSG_NETWORK_REMOVE) { + if (!(kr6->flags & F_REDISTRIBUTED)) + return (0); /* no match, don't redistribute */ + kr6->flags &= ~F_REDISTRIBUTED; + match = NULL; + goto sendit; + } if (!(kr6->flags & F_KERNEL)) return (0); @@ -736,60 +1270,107 @@ kr_redistribute6(int type, struct kroute * never allow ::/0 the default route can only be redistributed * with announce default. */ - if (memcmp(&kr6->prefix, &in6addr_any, sizeof(struct in6_addr)) == 0 && - kr6->prefixlen == 0) + if (kr6->prefixlen == 0 && + memcmp(&kr6->prefix, &in6addr_any, sizeof(struct in6_addr)) == 0) return (0); - /* Add or delete kr from list ... - * using a linear list to store the redistributed networks will hurt - * as soon as redistribute ospf comes but until then keep it simple. - */ - LIST_FOREACH(rn, &redistlist, entry) - if (rn->kr6 == kr6) - break; - - switch (type) { - case IMSG_NETWORK_ADD: - if (rn == NULL) { - if ((rn = calloc(1, sizeof(struct redist_node))) == - NULL) { - log_warn("kr_redistribute"); - return (-1); - } - rn->kr6 = kr6; - LIST_INSERT_HEAD(&redistlist, rn, entry); - } - break; - case IMSG_NETWORK_REMOVE: - if (rn != NULL) { - LIST_REMOVE(rn, entry); - free(rn); - } - break; - default: - errno = EINVAL; - return (-1); - } + match = kr_net_match6(kt, kr6); + if (match == NULL) { + if (!(kr6->flags & F_REDISTRIBUTED)) + return (0); /* no match, don't redistribute */ + /* route no longer matches but is redistributed, so remove */ + kr6->flags &= ~F_REDISTRIBUTED; + type = IMSG_NETWORK_REMOVE; + } else + kr6->flags |= F_REDISTRIBUTED; +sendit: + bzero(&net, sizeof(net)); + net.prefix.aid = AID_INET6; + memcpy(&net.prefix.v6, &kr6->prefix, sizeof(struct in6_addr)); + net.prefixlen = kr6->prefixlen; + net.rtableid = kt->rtableid; - return (bgpd_redistribute(type, NULL, kr6)); + return (send_network(type, &net, match ? &match->net.attrset : NULL)); } int kr_reload(void) { - struct redist_node *rn; + struct ktable *kt; + struct kroute_node *kr; + struct kroute6_node *kr6; struct knexthop_node *nh; + struct network *n; + u_int rid; + int hasdyn = 0; - LIST_FOREACH(rn, &redistlist, entry) - if (bgpd_redistribute(IMSG_NETWORK_ADD, rn->kr, rn->kr6) == -1) - return (-1); + for (rid = 0; rid < krt_size; rid++) { + if ((kt = ktable_get(rid)) == NULL) + continue; - RB_FOREACH(nh, knexthop_tree, &knt) - knexthop_validate(nh); + RB_FOREACH(nh, knexthop_tree, KT2KNT(kt)) + knexthop_validate(kt, nh); + + TAILQ_FOREACH(n, &kt->krn, entry) + if (n->net.type == NETWORK_DEFAULT) { + if (send_network(IMSG_NETWORK_ADD, &n->net, + &n->net.attrset)) + return (-1); + } else + hasdyn = 1; + + if (hasdyn) { + /* only evaluate the full tree if we need */ + RB_FOREACH(kr, kroute_tree, &kt->krt) + kr_redistribute(IMSG_NETWORK_ADD, kt, &kr->r); + RB_FOREACH(kr6, kroute6_tree, &kt->krt6) + kr_redistribute6(IMSG_NETWORK_ADD, kt, &kr6->r); + } + } return (0); } +struct kroute_full * +kr_tofull(struct kroute *kr) +{ + static struct kroute_full kf; + + bzero(&kf, sizeof(kf)); + + kf.prefix.aid = AID_INET; + kf.prefix.v4.s_addr = kr->prefix.s_addr; + kf.nexthop.aid = AID_INET; + kf.nexthop.v4.s_addr = kr->nexthop.s_addr; + strlcpy(kf.label, rtlabel_id2name(kr->labelid), sizeof(kf.label)); + kf.flags = kr->flags; + kf.ifindex = kr->ifindex; + kf.prefixlen = kr->prefixlen; + kf.priority = kr->priority; + + return (&kf); +} + +struct kroute_full * +kr6_tofull(struct kroute6 *kr6) +{ + static struct kroute_full kf; + + bzero(&kf, sizeof(kf)); + + kf.prefix.aid = AID_INET6; + memcpy(&kf.prefix.v6, &kr6->prefix, sizeof(struct in6_addr)); + kf.nexthop.aid = AID_INET6; + memcpy(&kf.nexthop.v6, &kr6->nexthop, sizeof(struct in6_addr)); + strlcpy(kf.label, rtlabel_id2name(kr6->labelid), sizeof(kf.label)); + kf.flags = kr6->flags; + kf.ifindex = kr6->ifindex; + kf.prefixlen = kr6->prefixlen; + kf.priority = kr6->priority; + + return (&kf); +} + /* * RB-tree compare functions */ @@ -846,26 +1427,28 @@ kroute6_compare(struct kroute6_node *a, int knexthop_compare(struct knexthop_node *a, struct knexthop_node *b) { - u_int32_t r; + int i; - if (a->nexthop.af != b->nexthop.af) - return (b->nexthop.af - a->nexthop.af); + if (a->nexthop.aid != b->nexthop.aid) + return (b->nexthop.aid - a->nexthop.aid); - switch (a->nexthop.af) { - case AF_INET: - if ((r = b->nexthop.addr32[0] - a->nexthop.addr32[0]) != 0) - return (r); + switch (a->nexthop.aid) { + case AID_INET: + if (ntohl(a->nexthop.v4.s_addr) < ntohl(b->nexthop.v4.s_addr)) + return (-1); + if (ntohl(a->nexthop.v4.s_addr) > ntohl(b->nexthop.v4.s_addr)) + return (1); break; - case AF_INET6: - if ((r = b->nexthop.addr32[3] - a->nexthop.addr32[3]) != 0) - return (r); - if ((r = b->nexthop.addr32[2] - a->nexthop.addr32[2]) != 0) - return (r); - if ((r = b->nexthop.addr32[1] - a->nexthop.addr32[1]) != 0) - return (r); - if ((r = b->nexthop.addr32[0] - a->nexthop.addr32[0]) != 0) - return (r); + case AID_INET6: + for (i = 0; i < 16; i++) { + if (a->nexthop.v6.s6_addr[i] < b->nexthop.v6.s6_addr[i]) + return (-1); + if (a->nexthop.v6.s6_addr[i] > b->nexthop.v6.s6_addr[i]) + return (1); + } break; + default: + fatalx("knexthop_compare: unknown AF"); } return (0); @@ -883,7 +1466,8 @@ kif_compare(struct kif_node *a, struct k */ struct kroute_node * -kroute_find(in_addr_t prefix, u_int8_t prefixlen, u_int8_t prio) +kroute_find(struct ktable *kt, in_addr_t prefix, u_int8_t prefixlen, + u_int8_t prio) { struct kroute_node s; struct kroute_node *kn, *tmp; @@ -892,15 +1476,15 @@ kroute_find(in_addr_t prefix, u_int8_t p s.r.prefixlen = prefixlen; s.r.priority = prio; - kn = RB_FIND(kroute_tree, &krt, &s); + kn = RB_FIND(kroute_tree, &kt->krt, &s); if (kn && prio == RTP_ANY) { - tmp = RB_PREV(kroute_tree, &krt, kn); + tmp = RB_PREV(kroute_tree, &kt->krt, kn); while (tmp) { if (kroute_compare(&s, tmp) == 0) kn = tmp; else break; - tmp = RB_PREV(kroute_tree, &krt, kn); + tmp = RB_PREV(kroute_tree, &kt->krt, kn); } } return (kn); @@ -927,13 +1511,13 @@ kroute_matchgw(struct kroute_node *kr, s } int -kroute_insert(struct kroute_node *kr) +kroute_insert(struct ktable *kt, struct kroute_node *kr) { struct kroute_node *krm; struct knexthop_node *h; in_addr_t mask, ina; - if ((krm = RB_INSERT(kroute_tree, &krt, kr)) != NULL) { + if ((krm = RB_INSERT(kroute_tree, &kt->krt, kr)) != NULL) { /* multipath route, add at end of list */ while (krm->next != NULL) krm = krm->next; @@ -941,13 +1525,14 @@ kroute_insert(struct kroute_node *kr) kr->next = NULL; /* to be sure */ } + /* XXX this is wrong for nexthop validated via BGP */ if (kr->r.flags & F_KERNEL) { mask = prefixlen2mask(kr->r.prefixlen); ina = ntohl(kr->r.prefix.s_addr); - RB_FOREACH(h, knexthop_tree, &knt) - if (h->nexthop.af == AF_INET && + RB_FOREACH(h, knexthop_tree, KT2KNT(kt)) + if (h->nexthop.aid == AID_INET && (ntohl(h->nexthop.v4.s_addr) & mask) == ina) - knexthop_validate(h); + knexthop_validate(kt, h); if (kr->r.flags & F_CONNECTED) if (kif_kr_insert(kr) == -1) @@ -955,19 +1540,19 @@ kroute_insert(struct kroute_node *kr) if (krm == NULL) /* redistribute multipath routes only once */ - kr_redistribute(IMSG_NETWORK_ADD, &kr->r); + kr_redistribute(IMSG_NETWORK_ADD, kt, &kr->r); } return (0); } int -kroute_remove(struct kroute_node *kr) +kroute_remove(struct ktable *kt, struct kroute_node *kr) { struct kroute_node *krm; struct knexthop_node *s; - if ((krm = RB_FIND(kroute_tree, &krt, kr)) == NULL) { + if ((krm = RB_FIND(kroute_tree, &kt->krt, kr)) == NULL) { log_warnx("kroute_remove failed to find %s/%u", inet_ntoa(kr->r.prefix), kr->r.prefixlen); return (-1); @@ -975,13 +1560,14 @@ kroute_remove(struct kroute_node *kr) if (krm == kr) { /* head element */ - if (RB_REMOVE(kroute_tree, &krt, kr) == NULL) { + if (RB_REMOVE(kroute_tree, &kt->krt, kr) == NULL) { log_warnx("kroute_remove failed for %s/%u", inet_ntoa(kr->r.prefix), kr->r.prefixlen); return (-1); } if (kr->next != NULL) { - if (RB_INSERT(kroute_tree, &krt, kr->next) != NULL) { + if (RB_INSERT(kroute_tree, &kt->krt, kr->next) != + NULL) { log_warnx("kroute_remove failed to add %s/%u", inet_ntoa(kr->r.prefix), kr->r.prefixlen); return (-1); @@ -1001,14 +1587,14 @@ kroute_remove(struct kroute_node *kr) } /* check whether a nexthop depends on this kroute */ - if ((kr->r.flags & F_KERNEL) && (kr->r.flags & F_NEXTHOP)) - RB_FOREACH(s, knexthop_tree, &knt) + if (kr->r.flags & F_NEXTHOP) + RB_FOREACH(s, knexthop_tree, KT2KNT(kt)) if (s->kroute == kr) - knexthop_validate(s); + knexthop_validate(kt, s); if (kr->r.flags & F_KERNEL && kr == krm && kr->next == NULL) /* again remove only once */ - kr_redistribute(IMSG_NETWORK_REMOVE, &kr->r); + kr_redistribute(IMSG_NETWORK_REMOVE, kt, &kr->r); if (kr->r.flags & F_CONNECTED) if (kif_kr_remove(kr) == -1) { @@ -1021,16 +1607,17 @@ kroute_remove(struct kroute_node *kr) } void -kroute_clear(void) +kroute_clear(struct ktable *kt) { struct kroute_node *kr; - while ((kr = RB_MIN(kroute_tree, &krt)) != NULL) - kroute_remove(kr); + while ((kr = RB_MIN(kroute_tree, &kt->krt)) != NULL) + kroute_remove(kt, kr); } struct kroute6_node * -kroute6_find(const struct in6_addr *prefix, u_int8_t prefixlen, u_int8_t prio) +kroute6_find(struct ktable *kt, const struct in6_addr *prefix, + u_int8_t prefixlen, u_int8_t prio) { struct kroute6_node s; struct kroute6_node *kn6, *tmp; @@ -1039,15 +1626,15 @@ kroute6_find(const struct in6_addr *pref s.r.prefixlen = prefixlen; s.r.priority = prio; - kn6 = RB_FIND(kroute6_tree, &krt6, &s); + kn6 = RB_FIND(kroute6_tree, &kt->krt6, &s); if (kn6 && prio == RTP_ANY) { - tmp = RB_PREV(kroute6_tree, &krt6, kn6); + tmp = RB_PREV(kroute6_tree, &kt->krt6, kn6); while (tmp) { if (kroute6_compare(&s, tmp) == 0) kn6 = tmp; - else + else break; - tmp = RB_PREV(kroute6_tree, &krt6, kn6); + tmp = RB_PREV(kroute6_tree, &kt->krt6, kn6); } } return (kn6); -@@ -1065,7 +1652,7 @@ kroute6_matchgw(struct kroute6_node *kr, - memcpy(&nexthop, &sa_in6->sin6_addr, sizeof(nexthop)); +@@ -1056,17 +1643,29 @@ kroute6_find(const struct in6_addr *pref + struct kroute6_node * + kroute6_matchgw(struct kroute6_node *kr, struct sockaddr_in6 *sa_in6) + { +- struct in6_addr nexthop; ++ struct sockaddr_in6 nexthop; + if (sa_in6 == NULL) { + log_warnx("kroute6_matchgw: no nexthop defined"); + return (NULL); + } +- memcpy(&nexthop, &sa_in6->sin6_addr, sizeof(nexthop)); ++ memcpy(&nexthop, sa_in6, sizeof(nexthop)); ++#if defined(__KAME__) && defined(IPV6_LINKLOCAL_PEER) ++ if (IN6_IS_ADDR_LINKLOCAL(&nexthop.sin6_addr)) { ++ /* Embed scope id and set sin6_scope_id. */ ++ if (nexthop.sin6_scope_id == 0) ++ nexthop.sin6_scope_id = ++ IN6_LINKLOCAL_IFINDEX(nexthop.sin6_addr); ++ else ++ SET_IN6_LINKLOCAL_IFINDEX(nexthop.sin6_addr, ++ nexthop.sin6_scope_id); ++ } ++#endif + while (kr) { - if (memcmp(&kr->r.nexthop, &nexthop, sizeof(nexthop)) == NULL) -+ if (memcmp(&kr->r.nexthop, &nexthop, sizeof(nexthop)) == 0) - return (kr); +- return (kr); ++ if (memcmp(&kr->r.nexthop, &nexthop.sin6_addr, ++ sizeof(nexthop.sin6_addr)) == 0) ++ return (kr); kr = kr->next; } -@@ -1074,13 +1661,13 @@ kroute6_matchgw(struct kroute6_node *kr, + +@@ -1074,13 +1673,13 @@ kroute6_matchgw(struct kroute6_node *kr, } int -kroute6_insert(struct kroute6_node *kr) +kroute6_insert(struct ktable *kt, struct kroute6_node *kr) { struct kroute6_node *krm; struct knexthop_node *h; struct in6_addr ina, inb; - if ((krm = RB_INSERT(kroute6_tree, &krt6, kr)) != NULL) { + if ((krm = RB_INSERT(kroute6_tree, &kt->krt6, kr)) != NULL) { /* multipath route, add at end of list */ while (krm->next != NULL) krm = krm->next; -@@ -1088,14 +1675,15 @@ kroute6_insert(struct kroute6_node *kr) +@@ -1088,14 +1687,15 @@ kroute6_insert(struct kroute6_node *kr) kr->next = NULL; /* to be sure */ } + /* XXX this is wrong for nexthop validated via BGP */ if (kr->r.flags & F_KERNEL) { inet6applymask(&ina, &kr->r.prefix, kr->r.prefixlen); - RB_FOREACH(h, knexthop_tree, &knt) - if (h->nexthop.af == AF_INET6) { + RB_FOREACH(h, knexthop_tree, KT2KNT(kt)) + if (h->nexthop.aid == AID_INET6) { inet6applymask(&inb, &h->nexthop.v6, kr->r.prefixlen); if (memcmp(&ina, &inb, sizeof(ina)) == 0) - knexthop_validate(h); + knexthop_validate(kt, h); } if (kr->r.flags & F_CONNECTED) -@@ -1104,19 +1692,19 @@ kroute6_insert(struct kroute6_node *kr) +@@ -1104,19 +1704,19 @@ kroute6_insert(struct kroute6_node *kr) if (krm == NULL) /* redistribute multipath routes only once */ - kr_redistribute6(IMSG_NETWORK_ADD, &kr->r); + kr_redistribute6(IMSG_NETWORK_ADD, kt, &kr->r); } return (0); } int -kroute6_remove(struct kroute6_node *kr) +kroute6_remove(struct ktable *kt, struct kroute6_node *kr) { struct kroute6_node *krm; struct knexthop_node *s; - if ((krm = RB_FIND(kroute6_tree, &krt6, kr)) == NULL) { + if ((krm = RB_FIND(kroute6_tree, &kt->krt6, kr)) == NULL) { log_warnx("kroute6_remove failed for %s/%u", log_in6addr(&kr->r.prefix), kr->r.prefixlen); return (-1); -@@ -1124,13 +1712,14 @@ kroute6_remove(struct kroute6_node *kr) +@@ -1124,13 +1724,14 @@ kroute6_remove(struct kroute6_node *kr) if (krm == kr) { /* head element */ - if (RB_REMOVE(kroute6_tree, &krt6, kr) == NULL) { + if (RB_REMOVE(kroute6_tree, &kt->krt6, kr) == NULL) { log_warnx("kroute6_remove failed for %s/%u", log_in6addr(&kr->r.prefix), kr->r.prefixlen); return (-1); } if (kr->next != NULL) { - if (RB_INSERT(kroute6_tree, &krt6, kr->next) != NULL) { + if (RB_INSERT(kroute6_tree, &kt->krt6, kr->next) != + NULL) { log_warnx("kroute6_remove failed to add %s/%u", log_in6addr(&kr->r.prefix), kr->r.prefixlen); -@@ -1151,14 +1740,14 @@ kroute6_remove(struct kroute6_node *kr) +@@ -1151,14 +1752,14 @@ kroute6_remove(struct kroute6_node *kr) } /* check whether a nexthop depends on this kroute */ - if ((kr->r.flags & F_KERNEL) && (kr->r.flags & F_NEXTHOP)) - RB_FOREACH(s, knexthop_tree, &knt) + if (kr->r.flags & F_NEXTHOP) + RB_FOREACH(s, knexthop_tree, KT2KNT(kt)) if (s->kroute == kr) - knexthop_validate(s); + knexthop_validate(kt, s); if (kr->r.flags & F_KERNEL && kr == krm && kr->next == NULL) /* again remove only once */ - kr_redistribute6(IMSG_NETWORK_REMOVE, &kr->r); + kr_redistribute6(IMSG_NETWORK_REMOVE, kt, &kr->r); if (kr->r.flags & F_CONNECTED) if (kif_kr6_remove(kr) == -1) { -@@ -1171,45 +1760,46 @@ kroute6_remove(struct kroute6_node *kr) +@@ -1171,45 +1772,46 @@ kroute6_remove(struct kroute6_node *kr) } void -kroute6_clear(void) +kroute6_clear(struct ktable *kt) { struct kroute6_node *kr; - while ((kr = RB_MIN(kroute6_tree, &krt6)) != NULL) - kroute6_remove(kr); + while ((kr = RB_MIN(kroute6_tree, &kt->krt6)) != NULL) + kroute6_remove(kt, kr); } struct knexthop_node * -knexthop_find(struct bgpd_addr *addr) +knexthop_find(struct ktable *kt, struct bgpd_addr *addr) { struct knexthop_node s; + bzero(&s, sizeof(s)); memcpy(&s.nexthop, addr, sizeof(s.nexthop)); - return (RB_FIND(knexthop_tree, &knt, &s)); + return (RB_FIND(knexthop_tree, KT2KNT(kt), &s)); } int -knexthop_insert(struct knexthop_node *kn) +knexthop_insert(struct ktable *kt, struct knexthop_node *kn) { - if (RB_INSERT(knexthop_tree, &knt, kn) != NULL) { + if (RB_INSERT(knexthop_tree, KT2KNT(kt), kn) != NULL) { log_warnx("knexthop_tree insert failed for %s", log_addr(&kn->nexthop)); free(kn); return (-1); } - knexthop_validate(kn); + knexthop_validate(kt, kn); return (0); } int -knexthop_remove(struct knexthop_node *kn) +knexthop_remove(struct ktable *kt, struct knexthop_node *kn) { - kroute_detach_nexthop(kn); + kroute_detach_nexthop(kt, kn); - if (RB_REMOVE(knexthop_tree, &knt, kn) == NULL) { + if (RB_REMOVE(knexthop_tree, KT2KNT(kt), kn) == NULL) { log_warnx("knexthop_remove failed for %s", log_addr(&kn->nexthop)); return (-1); -@@ -1220,12 +1810,12 @@ knexthop_remove(struct knexthop_node *kn +@@ -1220,12 +1822,12 @@ knexthop_remove(struct knexthop_node *kn } void -knexthop_clear(void) +knexthop_clear(struct ktable *kt) { struct knexthop_node *kn; - while ((kn = RB_MIN(knexthop_tree, &knt)) != NULL) - knexthop_remove(kn); + while ((kn = RB_MIN(knexthop_tree, KT2KNT(kt))) != NULL) + knexthop_remove(kt, kn); } struct kif_node * -@@ -1257,6 +1847,7 @@ kif_insert(struct kif_node *kif) +@@ -1257,6 +1859,7 @@ kif_insert(struct kif_node *kif) int kif_remove(struct kif_node *kif) { + struct ktable *kt; struct kif_kr *kkr; struct kif_kr6 *kkr6; -@@ -1265,20 +1856,23 @@ kif_remove(struct kif_node *kif) +@@ -1265,20 +1868,23 @@ kif_remove(struct kif_node *kif) return (-1); } + if ((kt = ktable_get(/* XXX */ 0)) == NULL) + goto done; + while ((kkr = LIST_FIRST(&kif->kroute_l)) != NULL) { LIST_REMOVE(kkr, entry); kkr->kr->r.flags &= ~F_NEXTHOP; - kroute_remove(kkr->kr); + kroute_remove(kt, kkr->kr); free(kkr); } while ((kkr6 = LIST_FIRST(&kif->kroute6_l)) != NULL) { LIST_REMOVE(kkr6, entry); kkr6->kr->r.flags &= ~F_NEXTHOP; - kroute6_remove(kkr6->kr); + kroute6_remove(kt, kkr6->kr); free(kkr6); } - +done: free(kif); return (0); } -@@ -1473,113 +2067,109 @@ kroute6_validate(struct kroute6 *kr) +@@ -1473,113 +2079,109 @@ kroute6_validate(struct kroute6 *kr) } void -knexthop_validate(struct knexthop_node *kn) +knexthop_validate(struct ktable *kt, struct knexthop_node *kn) { + void *oldk; struct kroute_node *kr; struct kroute6_node *kr6; - struct kroute_nexthop n; - int was_valid = 0; - if (kn->nexthop.af == AF_INET && (kr = kn->kroute) != NULL) - was_valid = kroute_validate(&kr->r); - if (kn->nexthop.af == AF_INET6 && (kr6 = kn->kroute) != NULL) - was_valid = kroute6_validate(&kr6->r); + oldk = kn->kroute; + kroute_detach_nexthop(kt, kn); - bzero(&n, sizeof(n)); - memcpy(&n.nexthop, &kn->nexthop, sizeof(n.nexthop)); - kroute_detach_nexthop(kn); - - switch (kn->nexthop.af) { - case AF_INET: - if ((kr = kroute_match(kn->nexthop.v4.s_addr, 0)) == NULL) { - if (was_valid) - send_nexthop_update(&n); - } else { /* match */ - if (kroute_validate(&kr->r)) { /* valid */ - n.valid = 1; - n.connected = kr->r.flags & F_CONNECTED; - if ((n.gateway.v4.s_addr = - kr->r.nexthop.s_addr) != 0) - n.gateway.af = AF_INET; - memcpy(&n.kr.kr4, &kr->r, sizeof(n.kr.kr4)); - send_nexthop_update(&n); - } else /* down */ - if (was_valid) - send_nexthop_update(&n); + switch (kn->nexthop.aid) { + case AID_INET: + kr = kroute_match(kt, kn->nexthop.v4.s_addr, 0); + if (kr) { kn->kroute = kr; kr->r.flags |= F_NEXTHOP; } + + /* + * Send update if nexthop route changed under us if + * the route remains the same then the NH state has not + * changed. State changes are tracked by knexthop_track(). + */ + if (kr != oldk) + knexthop_send_update(kn); break; - case AF_INET6: - if ((kr6 = kroute6_match(&kn->nexthop.v6, 0)) == NULL) { - if (was_valid) - send_nexthop_update(&n); - } else { /* match */ - if (kroute6_validate(&kr6->r)) { /* valid */ - n.valid = 1; - n.connected = kr6->r.flags & F_CONNECTED; - if (memcmp(&kr6->r.nexthop, &in6addr_any, - sizeof(struct in6_addr)) != 0) { - n.gateway.af = AF_INET6; - memcpy(&n.gateway.v6, &kr6->r.nexthop, - sizeof(struct in6_addr)); - } - memcpy(&n.kr.kr6, &kr6->r, sizeof(n.kr.kr6)); - send_nexthop_update(&n); - } else /* down */ - if (was_valid) - send_nexthop_update(&n); + case AID_INET6: + kr6 = kroute6_match(kt, &kn->nexthop.v6, 0); + if (kr6) { kn->kroute = kr6; kr6->r.flags |= F_NEXTHOP; } + + if (kr6 != oldk) + knexthop_send_update(kn); break; } } void -knexthop_track(void *krn) +knexthop_track(struct ktable *kt, void *krp) { struct knexthop_node *kn; + + RB_FOREACH(kn, knexthop_tree, KT2KNT(kt)) + if (kn->kroute == krp) + knexthop_send_update(kn); +} + +void +knexthop_send_update(struct knexthop_node *kn) +{ + struct kroute_nexthop n; struct kroute_node *kr; struct kroute6_node *kr6; - struct kroute_nexthop n; - RB_FOREACH(kn, knexthop_tree, &knt) - if (kn->kroute == krn) { - bzero(&n, sizeof(n)); - memcpy(&n.nexthop, &kn->nexthop, sizeof(n.nexthop)); + bzero(&n, sizeof(n)); + memcpy(&n.nexthop, &kn->nexthop, sizeof(n.nexthop)); - switch (kn->nexthop.af) { - case AF_INET: - kr = krn; - n.valid = 1; - n.connected = kr->r.flags & F_CONNECTED; - if ((n.gateway.v4.s_addr = - kr->r.nexthop.s_addr) != 0) - n.gateway.af = AF_INET; - memcpy(&n.kr.kr4, &kr->r, sizeof(n.kr.kr4)); - break; - case AF_INET6: - kr6 = krn; - n.valid = 1; - n.connected = kr6->r.flags & F_CONNECTED; - if (memcmp(&kr6->r.nexthop, &in6addr_any, - sizeof(struct in6_addr)) != 0) { - n.gateway.af = AF_INET6; - memcpy(&n.gateway.v6, &kr6->r.nexthop, - sizeof(struct in6_addr)); - } - memcpy(&n.kr.kr6, &kr6->r, sizeof(n.kr.kr6)); - break; - } - send_nexthop_update(&n); + if (kn->kroute == NULL) { + n.valid = 0; /* NH is not valid */ + send_nexthop_update(&n); + return; + } + + switch (kn->nexthop.aid) { + case AID_INET: + kr = kn->kroute; + n.valid = kroute_validate(&kr->r); + n.connected = kr->r.flags & F_CONNECTED; + if ((n.gateway.v4.s_addr = + kr->r.nexthop.s_addr) != 0) + n.gateway.aid = AID_INET; + if (n.connected) { + n.net.aid = AID_INET; + n.net.v4.s_addr = kr->r.prefix.s_addr; + n.netlen = kr->r.prefixlen; -+ } + } + break; + case AID_INET6: + kr6 = kn->kroute; + n.valid = kroute6_validate(&kr6->r); + n.connected = kr6->r.flags & F_CONNECTED; + if (memcmp(&kr6->r.nexthop, &in6addr_any, + sizeof(struct in6_addr)) != 0) { + n.gateway.aid = AID_INET6; + memcpy(&n.gateway.v6, &kr6->r.nexthop, + sizeof(struct in6_addr)); - } ++ } + if (n.connected) { + n.net.aid = AID_INET6; + memcpy(&n.net.v6, &kr6->r.nexthop, + sizeof(struct in6_addr)); + n.netlen = kr6->r.prefixlen; + } + break; + } + send_nexthop_update(&n); } struct kroute_node * -kroute_match(in_addr_t key, int matchall) +kroute_match(struct ktable *kt, in_addr_t key, int matchall) { int i; struct kroute_node *kr; -@@ -1589,13 +2179,13 @@ kroute_match(in_addr_t key, int matchall +@@ -1589,13 +2191,13 @@ kroute_match(in_addr_t key, int matchall /* we will never match the default route */ for (i = 32; i > 0; i--) - if ((kr = kroute_find(htonl(ina & prefixlen2mask(i)), i, + if ((kr = kroute_find(kt, htonl(ina & prefixlen2mask(i)), i, RTP_ANY)) != NULL) if (matchall || bgpd_filternexthop(&kr->r, NULL) == 0) return (kr); /* if we don't have a match yet, try to find a default route */ - if ((kr = kroute_find(0, 0, RTP_ANY)) != NULL) + if ((kr = kroute_find(kt, 0, 0, RTP_ANY)) != NULL) if (matchall || bgpd_filternexthop(&kr->r, NULL) == 0) return (kr); -@@ -1603,7 +2193,7 @@ kroute_match(in_addr_t key, int matchall +@@ -1603,7 +2205,7 @@ kroute_match(in_addr_t key, int matchall } struct kroute6_node * -kroute6_match(struct in6_addr *key, int matchall) +kroute6_match(struct ktable *kt, struct in6_addr *key, int matchall) { int i; struct kroute6_node *kr6; -@@ -1612,13 +2202,13 @@ kroute6_match(struct in6_addr *key, int +@@ -1612,13 +2214,13 @@ kroute6_match(struct in6_addr *key, int /* we will never match the default route */ for (i = 128; i > 0; i--) { inet6applymask(&ina, key, i); - if ((kr6 = kroute6_find(&ina, i, RTP_ANY)) != NULL) + if ((kr6 = kroute6_find(kt, &ina, i, RTP_ANY)) != NULL) if (matchall || bgpd_filternexthop(NULL, &kr6->r) == 0) return (kr6); } /* if we don't have a match yet, try to find a default route */ - if ((kr6 = kroute6_find(&in6addr_any, 0, RTP_ANY)) != NULL) + if ((kr6 = kroute6_find(kt, &in6addr_any, 0, RTP_ANY)) != NULL) if (matchall || bgpd_filternexthop(NULL, &kr6->r) == 0) return (kr6); -@@ -1626,31 +2216,30 @@ kroute6_match(struct in6_addr *key, int +@@ -1626,31 +2228,30 @@ kroute6_match(struct in6_addr *key, int } void -kroute_detach_nexthop(struct knexthop_node *kn) +kroute_detach_nexthop(struct ktable *kt, struct knexthop_node *kn) { struct knexthop_node *s; struct kroute_node *k; struct kroute6_node *k6; + if (kn->kroute == NULL) + return; + /* * check whether there's another nexthop depending on this kroute * if not remove the flag */ - - if (kn->kroute == NULL) - return; - - for (s = RB_MIN(knexthop_tree, &knt); s != NULL && - s->kroute != kn->kroute; s = RB_NEXT(knexthop_tree, &knt, s)) - ; /* nothing */ + RB_FOREACH(s, knexthop_tree, KT2KNT(kt)) + if (s->kroute == kn->kroute && s != kn) + break; if (s == NULL) { - switch (kn->nexthop.af) { - case AF_INET: + switch (kn->nexthop.aid) { + case AID_INET: k = kn->kroute; k->r.flags &= ~F_NEXTHOP; break; - case AF_INET6: + case AID_INET6: k6 = kn->kroute; k6->r.flags &= ~F_NEXTHOP; break; -@@ -1665,7 +2254,7 @@ kroute_detach_nexthop(struct knexthop_no +@@ -1665,7 +2266,7 @@ kroute_detach_nexthop(struct knexthop_no */ int -protect_lo(void) +protect_lo(struct ktable *kt) { struct kroute_node *kr; struct kroute6_node *kr6; -@@ -1675,11 +2264,11 @@ protect_lo(void) +@@ -1675,11 +2276,11 @@ protect_lo(void) log_warn("protect_lo"); return (-1); } - kr->r.prefix.s_addr = htonl(INADDR_LOOPBACK); + kr->r.prefix.s_addr = htonl(INADDR_LOOPBACK & IN_CLASSA_NET); kr->r.prefixlen = 8; kr->r.flags = F_KERNEL|F_CONNECTED; - if (RB_INSERT(kroute_tree, &krt, kr) != NULL) + if (RB_INSERT(kroute_tree, &kt->krt, kr) != NULL) free(kr); /* kernel route already there, no problem */ /* special protection for loopback */ -@@ -1689,9 +2278,9 @@ protect_lo(void) +@@ -1689,9 +2290,9 @@ protect_lo(void) } memcpy(&kr6->r.prefix, &in6addr_loopback, sizeof(kr6->r.prefix)); kr6->r.prefixlen = 128; - kr->r.flags = F_KERNEL|F_CONNECTED; + kr6->r.flags = F_KERNEL|F_CONNECTED; - if (RB_INSERT(kroute6_tree, &krt6, kr6) != NULL) + if (RB_INSERT(kroute6_tree, &kt->krt6, kr6) != NULL) free(kr6); /* kernel route already there, no problem */ return (0); -@@ -1726,17 +2315,17 @@ mask2prefixlen(in_addr_t ina) +@@ -1726,17 +2327,17 @@ mask2prefixlen(in_addr_t ina) u_int8_t mask2prefixlen6(struct sockaddr_in6 *sa_in6) { - u_int8_t l = 0, i, len; + u_int8_t l = 0, *ap, *ep; /* * sin6_len is the size of the sockaddr so substract the offset of * the possibly truncated sin6_addr struct. */ - len = sa_in6->sin6_len - - (u_int8_t)(&((struct sockaddr_in6 *)NULL)->sin6_addr); - for (i = 0; i < len; i++) { + ap = (u_int8_t *)&sa_in6->sin6_addr; + ep = (u_int8_t *)sa_in6 + sa_in6->sin6_len; + for (; ap < ep; ap++) { /* this "beauty" is adopted from sbin/route/show.c ... */ - switch (sa_in6->sin6_addr.s6_addr[i]) { + switch (*ap) { case 0xff: l += 8; break; -@@ -1764,7 +2353,7 @@ mask2prefixlen6(struct sockaddr_in6 *sa_ +@@ -1764,7 +2365,7 @@ mask2prefixlen6(struct sockaddr_in6 *sa_ case 0x00: return (l); default: - fatalx("non continguous inet6 netmask"); + fatalx("non contiguous inet6 netmask"); } } -@@ -1788,7 +2377,7 @@ prefixlen2mask6(u_int8_t prefixlen) +@@ -1788,7 +2389,7 @@ prefixlen2mask6(u_int8_t prefixlen) } #define ROUNDUP(a) \ - (((a) & ((sizeof(long)) - 1)) ? (1 + ((a) | ((sizeof(long)) - 1))) : (a)) + (((a) & (sizeof(long) - 1)) ? (1 + ((a) | (sizeof(long) - 1))) : (a)) void get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info) -@@ -1808,11 +2397,10 @@ get_rtaddrs(int addrs, struct sockaddr * +@@ -1808,11 +2409,10 @@ get_rtaddrs(int addrs, struct sockaddr * void if_change(u_short ifindex, int flags, struct if_data *ifd) { + struct ktable *kt; struct kif_node *kif; struct kif_kr *kkr; struct kif_kr6 *kkr6; - struct kroute_nexthop nh; - struct knexthop_node *n; u_int8_t reachable; if ((kif = kif_find(ifindex)) == NULL) { -@@ -1833,28 +2421,18 @@ if_change(u_short ifindex, int flags, st +@@ -1833,28 +2433,18 @@ if_change(u_short ifindex, int flags, st kif->k.nh_reachable = reachable; + kt = ktable_get(/* XXX */ 0); + LIST_FOREACH(kkr, &kif->kroute_l, entry) { if (reachable) kkr->kr->r.flags &= ~F_DOWN; else kkr->kr->r.flags |= F_DOWN; - RB_FOREACH(n, knexthop_tree, &knt) - if (n->kroute == kkr->kr) { - bzero(&nh, sizeof(nh)); - memcpy(&nh.nexthop, &n->nexthop, - sizeof(nh.nexthop)); - if (kroute_validate(&kkr->kr->r)) { - nh.valid = 1; - nh.connected = 1; - if ((nh.gateway.v4.s_addr = - kkr->kr->r.nexthop.s_addr) != 0) - nh.gateway.af = AF_INET; - } - memcpy(&nh.kr.kr4, &kkr->kr->r, - sizeof(nh.kr.kr4)); - send_nexthop_update(&nh); - } + if (kt == NULL) + continue; + + knexthop_track(kt, kkr->kr); } LIST_FOREACH(kkr6, &kif->kroute6_l, entry) { if (reachable) -@@ -1862,27 +2440,10 @@ if_change(u_short ifindex, int flags, st +@@ -1862,27 +2452,10 @@ if_change(u_short ifindex, int flags, st else kkr6->kr->r.flags |= F_DOWN; - RB_FOREACH(n, knexthop_tree, &knt) - if (n->kroute == kkr6->kr) { - bzero(&nh, sizeof(nh)); - memcpy(&nh.nexthop, &n->nexthop, - sizeof(nh.nexthop)); - if (kroute6_validate(&kkr6->kr->r)) { - nh.valid = 1; - nh.connected = 1; - if (memcmp(&kkr6->kr->r.nexthop, - &in6addr_any, sizeof(struct - in6_addr))) { - nh.gateway.af = AF_INET6; - memcpy(&nh.gateway.v6, - &kkr6->kr->r.nexthop, - sizeof(struct in6_addr)); - } - } - memcpy(&nh.kr.kr6, &kkr6->kr->r, - sizeof(nh.kr.kr6)); - send_nexthop_update(&nh); - } + if (kt == NULL) + continue; + + knexthop_track(kt, kkr6->kr); } } -@@ -1917,25 +2478,38 @@ if_announce(void *msg) +@@ -1917,25 +2490,38 @@ if_announce(void *msg) */ int -send_rtmsg(int fd, int action, struct kroute *kroute) +send_rtmsg(int fd, int action, struct ktable *kt, struct kroute *kroute) { - struct iovec iov[5]; + struct iovec iov[7]; struct rt_msghdr hdr; struct sockaddr_in prefix; struct sockaddr_in nexthop; struct sockaddr_in mask; + struct { + struct sockaddr_dl dl; + char pad[sizeof(long)]; + } ifp; +#if !defined(__FreeBSD__) /* FreeBSD has no route labeling. */ + struct sockaddr_mpls mpls; struct sockaddr_rtlabel label; +#endif /* !defined(__FreeBSD__) */ int iovcnt = 0; - if (kr_state.fib_sync == 0) + if (!kt->fib_sync) return (0); /* initialize header */ bzero(&hdr, sizeof(hdr)); hdr.rtm_version = RTM_VERSION; hdr.rtm_type = action; - hdr.rtm_tableid = kr_state.rtableid; +#if !defined(__FreeBSD__) /* XXX: FreeBSD has no multiple routing tables */ + hdr.rtm_tableid = kt->rtableid; +#endif /* !defined(__FreeBSD__) */ +#if !defined(__FreeBSD__) /* XXX: FreeBSD has no rtm_priority */ hdr.rtm_priority = RTP_BGP; +#else + hdr.rtm_flags = RTF_PROTO1; +#endif /* !defined(__FreeBSD__) */ if (kroute->flags & F_BLACKHOLE) hdr.rtm_flags |= RTF_BLACKHOLE; if (kroute->flags & F_REJECT) -@@ -1984,6 +2558,37 @@ send_rtmsg(int fd, int action, struct kr +@@ -1984,6 +2570,37 @@ send_rtmsg(int fd, int action, struct kr iov[iovcnt].iov_base = &mask; iov[iovcnt++].iov_len = sizeof(mask); + if (kt->ifindex) { + bzero(&ifp, sizeof(ifp)); + ifp.dl.sdl_len = sizeof(struct sockaddr_dl); + ifp.dl.sdl_family = AF_LINK; + ifp.dl.sdl_index = kt->ifindex; + /* adjust header */ + hdr.rtm_addrs |= RTA_IFP; + hdr.rtm_msglen += ROUNDUP(sizeof(struct sockaddr_dl)); + /* adjust iovec */ + iov[iovcnt].iov_base = &ifp; + iov[iovcnt++].iov_len = ROUNDUP(sizeof(struct sockaddr_dl)); + } + +#if !defined(__FreeBSD__) /* FreeBSD has no mpls support. */ + if (kroute->flags & F_MPLS) { + bzero(&mpls, sizeof(mpls)); + mpls.smpls_len = sizeof(mpls); + mpls.smpls_family = AF_MPLS; + mpls.smpls_label = kroute->mplslabel; + /* adjust header */ + hdr.rtm_flags |= RTF_MPLS; + hdr.rtm_mpls = MPLS_OP_PUSH; + hdr.rtm_addrs |= RTA_SRC; + hdr.rtm_msglen += sizeof(mpls); + /* adjust iovec */ + iov[iovcnt].iov_base = &mpls; + iov[iovcnt++].iov_len = sizeof(mpls); + } +#endif + +#if !defined(__FreeBSD__) /* FreeBSD has no route labeling. */ if (kroute->labelid) { bzero(&label, sizeof(label)); label.sr_len = sizeof(label); -@@ -1996,11 +2601,11 @@ send_rtmsg(int fd, int action, struct kr +@@ -1996,11 +2613,11 @@ send_rtmsg(int fd, int action, struct kr iov[iovcnt].iov_base = &label; iov[iovcnt++].iov_len = sizeof(label); } +#endif /* !defined(__FreeBSD__) */ retry: if (writev(fd, iov, iovcnt) == -1) { - switch (errno) { - case ESRCH: + if (errno == ESRCH) { if (hdr.rtm_type == RTM_CHANGE) { hdr.rtm_type = RTM_ADD; goto retry; -@@ -2009,27 +2614,18 @@ retry: +@@ -2009,27 +2626,18 @@ retry: inet_ntoa(kroute->prefix), kroute->prefixlen); return (0); - } else { - log_warnx("send_rtmsg: action %u, " - "prefix %s/%u: %s", hdr.rtm_type, - inet_ntoa(kroute->prefix), - kroute->prefixlen, strerror(errno)); - return (0); } - break; - default: - log_warnx("send_rtmsg: action %u, prefix %s/%u: %s", - hdr.rtm_type, inet_ntoa(kroute->prefix), - kroute->prefixlen, strerror(errno)); - return (0); } + log_warn("send_rtmsg: action %u, prefix %s/%u", hdr.rtm_type, + inet_ntoa(kroute->prefix), kroute->prefixlen); + return (0); } return (0); } int -send_rt6msg(int fd, int action, struct kroute6 *kroute) +send_rt6msg(int fd, int action, struct ktable *kt, struct kroute6 *kroute) { struct iovec iov[5]; struct rt_msghdr hdr; -@@ -2037,17 +2633,23 @@ send_rt6msg(int fd, int action, struct k +@@ -2037,17 +2645,23 @@ send_rt6msg(int fd, int action, struct k struct sockaddr_in6 addr; char pad[sizeof(long)]; } prefix, nexthop, mask; +#if !defined(__FreeBSD__) /* FreeBSD has no route labeling. */ struct sockaddr_rtlabel label; +#endif /* !defined(__FreeBSD__) */ int iovcnt = 0; - if (kr_state.fib_sync == 0) + if (!kt->fib_sync) return (0); /* initialize header */ bzero(&hdr, sizeof(hdr)); hdr.rtm_version = RTM_VERSION; hdr.rtm_type = action; +#if !defined(__FreeBSD__) /* XXX: FreeBSD has no multiple routing tables */ hdr.rtm_tableid = kr_state.rtableid; +#else + hdr.rtm_flags = RTF_PROTO1; +#endif /* !defined(__FreeBSD__) */ if (kroute->flags & F_BLACKHOLE) hdr.rtm_flags |= RTF_BLACKHOLE; if (kroute->flags & F_REJECT) -@@ -2100,6 +2702,7 @@ send_rt6msg(int fd, int action, struct k +@@ -2100,6 +2714,7 @@ send_rt6msg(int fd, int action, struct k iov[iovcnt].iov_base = &mask; iov[iovcnt++].iov_len = ROUNDUP(sizeof(struct sockaddr_in6)); +#if !defined(__FreeBSD__) /* FreeBSD has no route labeling. */ if (kroute->labelid) { bzero(&label, sizeof(label)); label.sr_len = sizeof(label); -@@ -2112,11 +2715,11 @@ send_rt6msg(int fd, int action, struct k +@@ -2112,11 +2727,11 @@ send_rt6msg(int fd, int action, struct k iov[iovcnt].iov_base = &label; iov[iovcnt++].iov_len = sizeof(label); } +#endif /* !defined(__FreeBSD__) */ retry: if (writev(fd, iov, iovcnt) == -1) { - switch (errno) { - case ESRCH: + if (errno == ESRCH) { if (hdr.rtm_type == RTM_CHANGE) { hdr.rtm_type = RTM_ADD; goto retry; -@@ -2125,31 +2728,26 @@ retry: +@@ -2125,31 +2740,26 @@ retry: log_in6addr(&kroute->prefix), kroute->prefixlen); return (0); - } else { - log_warnx("send_rt6msg: action %u, " - "prefix %s/%u: %s", hdr.rtm_type, - log_in6addr(&kroute->prefix), - kroute->prefixlen, strerror(errno)); - return (0); } - break; - default: - log_warnx("send_rt6msg: action %u, prefix %s/%u: %s", - hdr.rtm_type, log_in6addr(&kroute->prefix), - kroute->prefixlen, strerror(errno)); - return (0); } + log_warn("send_rt6msg: action %u, prefix %s/%u", hdr.rtm_type, + log_in6addr(&kroute->prefix), kroute->prefixlen); + return (0); } return (0); } int -fetchtable(u_int rtableid, int connected_only) +fetchtable(struct ktable *kt) { size_t len; +#if !defined(__FreeBSD__) /* FreeBSD has no table id. */ int mib[7]; - char *buf, *next, *lim; +#else + int mib[6]; +#endif + char *buf = NULL, *next, *lim; struct rt_msghdr *rtm; struct sockaddr *sa, *gw, *rti_info[RTAX_MAX]; struct sockaddr_in *sa_in; -@@ -2163,22 +2761,35 @@ fetchtable(u_int rtableid, int connected +@@ -2163,22 +2773,35 @@ fetchtable(u_int rtableid, int connected mib[3] = 0; mib[4] = NET_RT_DUMP; mib[5] = 0; - mib[6] = rtableid; +#if !defined(__FreeBSD__) /* FreeBSD has no table id. */ + mib[6] = kt->rtableid; +#endif +#if !defined(__FreeBSD__) /* FreeBSD has no table id. */ if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) { - if (rtableid != 0 && errno == EINVAL) /* table nonexistent */ +#else + if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) { +#endif + if (kt->rtableid != 0 && errno == EINVAL) + /* table nonexistent */ return (0); log_warn("sysctl"); return (-1); } - if ((buf = malloc(len)) == NULL) { - log_warn("fetchtable"); - return (-1); - } - if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) { - log_warn("sysctl"); - free(buf); - return (-1); + if (len > 0) { + if ((buf = malloc(len)) == NULL) { + log_warn("fetchtable"); + return (-1); + } +#if !defined(__FreeBSD__) /* FreeBSD has no table id. */ + if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) { +#else + if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) { +#endif + log_warn("sysctl2"); + free(buf); + return (-1); + } } lim = buf + len; -@@ -2186,7 +2797,11 @@ fetchtable(u_int rtableid, int connected +@@ -2186,7 +2809,11 @@ fetchtable(u_int rtableid, int connected rtm = (struct rt_msghdr *)next; if (rtm->rtm_version != RTM_VERSION) continue; +#if !defined(__FreeBSD__) sa = (struct sockaddr *)(next + rtm->rtm_hdrlen); +#else + sa = (struct sockaddr *)(next + sizeof(struct rt_msghdr)); +#endif get_rtaddrs(rtm->rtm_addrs, sa, rti_info); if ((sa = rti_info[RTAX_DST]) == NULL) -@@ -2205,7 +2820,11 @@ fetchtable(u_int rtableid, int connected +@@ -2205,7 +2832,11 @@ fetchtable(u_int rtableid, int connected } kr->r.flags = F_KERNEL; +#if defined(__FreeBSD__) /* no rtm_priority on FreeBSD */ + kr->r.priority = RTP_BGP; +#else kr->r.priority = rtm->rtm_priority; +#endif kr->r.ifindex = rtm->rtm_index; kr->r.prefix.s_addr = ((struct sockaddr_in *)sa)->sin_addr.s_addr; -@@ -2223,8 +2842,12 @@ fetchtable(u_int rtableid, int connected +@@ -2223,8 +2854,12 @@ fetchtable(u_int rtableid, int connected break; kr->r.prefixlen = mask2prefixlen(sa_in->sin_addr.s_addr); - } else if (rtm->rtm_flags & RTF_HOST) + } else if (rtm->rtm_flags & RTF_HOST) { kr->r.prefixlen = 32; +#if defined(__FreeBSD__) /* RTF_HOST means connected route */ + kr->r.flags |= F_CONNECTED; +#endif + } else kr->r.prefixlen = prefixlen_classful(kr->r.prefix.s_addr); -@@ -2238,7 +2861,11 @@ fetchtable(u_int rtableid, int connected +@@ -2238,11 +2873,25 @@ fetchtable(u_int rtableid, int connected } kr6->r.flags = F_KERNEL; +#if defined(__FreeBSD__) /* no rtm_priority on FreeBSD */ + kr6->r.priority = RTP_BGP; +#else kr6->r.priority = rtm->rtm_priority; +#endif kr6->r.ifindex = rtm->rtm_index; memcpy(&kr6->r.prefix, &((struct sockaddr_in6 *)sa)->sin6_addr, -@@ -2257,8 +2884,12 @@ fetchtable(u_int rtableid, int connected + sizeof(kr6->r.prefix)); ++#if defined(__KAME__) && defined(IPV6_LINKLOCAL_PEER) ++ if (IN6_IS_ADDR_LINKLOCAL(&kr6->r.prefix)) { ++ if (((struct sockaddr_in6 *)sa)->sin6_scope_id !=0) ++ SET_IN6_LINKLOCAL_IFINDEX(kr6->r.prefix, ++ ((struct sockaddr_in6 *)sa)->sin6_scope_id); ++ else ++ SET_IN6_LINKLOCAL_IFINDEX(kr6->r.prefix, ++ rtm->rtm_index); ++ } ++#endif + + sa_in6 = (struct sockaddr_in6 *)rti_info[RTAX_NETMASK]; + if (rtm->rtm_flags & RTF_STATIC) +@@ -2257,8 +2906,12 @@ fetchtable(u_int rtableid, int connected if (sa_in6->sin6_len == 0) break; kr6->r.prefixlen = mask2prefixlen6(sa_in6); - } else if (rtm->rtm_flags & RTF_HOST) + } else if (rtm->rtm_flags & RTF_HOST) { kr6->r.prefixlen = 128; +#if defined(__FreeBSD__) /* RTF_HOST means connected route */ + kr6->r.flags |= F_CONNECTED; +#endif + } else fatalx("INET6 route without netmask"); break; -@@ -2290,23 +2921,28 @@ fetchtable(u_int rtableid, int connected +@@ -2280,6 +2933,13 @@ fetchtable(u_int rtableid, int connected + memcpy(&kr6->r.nexthop, + &((struct sockaddr_in6 *)gw)->sin6_addr, + sizeof(kr6->r.nexthop)); ++#if defined(__KAME__) && defined(IPV6_LINKLOCAL_PEER) ++ if (IN6_IS_ADDR_LINKLOCAL(&kr6->r.nexthop) && ++ ((struct sockaddr_in6 *)gw)->sin6_scope_id != 0) { ++ SET_IN6_LINKLOCAL_IFINDEX(kr6->r.nexthop, ++ ((struct sockaddr_in6 *)gw)->sin6_scope_id); ++ } ++#endif + break; + case AF_LINK: + if (sa->sa_family == AF_INET) +@@ -2290,23 +2950,28 @@ fetchtable(u_int rtableid, int connected } if (sa->sa_family == AF_INET) { +#if !defined(__FreeBSD__) /* no rtm_priority on FreeBSD */ if (rtm->rtm_priority == RTP_BGP) { - send_rtmsg(kr_state.fd, RTM_DELETE, &kr->r); - free(kr); - } else if (connected_only && - !(kr->r.flags & F_CONNECTED)) +#else + /* never delete route */ + if (0) { +#endif + send_rtmsg(kr_state.fd, RTM_DELETE, kt, &kr->r); free(kr); - else - kroute_insert(kr); + } else + kroute_insert(kt, kr); } else if (sa->sa_family == AF_INET6) { +#if !defined(__FreeBSD__) /* no rtm_priority on FreeBSD */ if (rtm->rtm_priority == RTP_BGP) { - send_rt6msg(kr_state.fd, RTM_DELETE, &kr6->r); - free(kr6); - } else if (connected_only && - !(kr6->r.flags & F_CONNECTED)) +#else + /* never delete route */ + if (0) { +#endif + send_rt6msg(kr_state.fd, RTM_DELETE, kt, + &kr6->r); free(kr6); - else - kroute6_insert(kr6); + } else + kroute6_insert(kt, kr6); } } free(buf); -@@ -2327,7 +2963,7 @@ fetchifs(int ifindex) +@@ -2327,7 +2992,7 @@ fetchifs(int ifindex) mib[0] = CTL_NET; mib[1] = AF_ROUTE; mib[2] = 0; - mib[3] = AF_INET; + mib[3] = AF_INET; /* AF does not matter but AF_INET is shorter */ mib[4] = NET_RT_IFLIST; mib[5] = ifindex; -@@ -2396,7 +3032,7 @@ dispatch_rtmsg(void) +@@ -2396,7 +3061,7 @@ dispatch_rtmsg(void) struct rt_msghdr *rtm; struct if_msghdr ifm; struct sockaddr *sa, *rti_info[RTAX_MAX]; - int connected_only; + struct ktable *kt; if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) { log_warn("dispatch_rtmsg: read error"); -@@ -2418,7 +3054,11 @@ dispatch_rtmsg(void) +@@ -2418,7 +3083,11 @@ dispatch_rtmsg(void) case RTM_ADD: case RTM_CHANGE: case RTM_DELETE: +#if !defined(__FreeBSD__) sa = (struct sockaddr *)(next + rtm->rtm_hdrlen); +#else + sa = (struct sockaddr *)(next + sizeof(struct rt_msghdr)); +#endif get_rtaddrs(rtm->rtm_addrs, sa, rti_info); if (rtm->rtm_pid == kr_state.pid) /* cause by us */ -@@ -2430,16 +3070,14 @@ dispatch_rtmsg(void) +@@ -2430,16 +3099,14 @@ dispatch_rtmsg(void) if (rtm->rtm_flags & RTF_LLINFO) /* arp cache */ continue; - connected_only = 0; - if (rtm->rtm_tableid != kr_state.rtableid) { - if (rtm->rtm_tableid == 0) - connected_only = 1; - else - continue; - } +#if !defined(__FreeBSD__) /* FreeBSD has no rtm_tableid. */ + if ((kt = ktable_get(rtm->rtm_tableid)) == NULL) +#else + if ((kt = ktable_get(0)) == NULL) +#endif + continue; - if (dispatch_rtmsg_addr(rtm, rti_info, - connected_only) == -1) + if (dispatch_rtmsg_addr(rtm, rti_info, kt) == -1) return (-1); break; case RTM_IFINFO: -@@ -2460,7 +3098,7 @@ dispatch_rtmsg(void) +@@ -2460,7 +3127,7 @@ dispatch_rtmsg(void) int dispatch_rtmsg_addr(struct rt_msghdr *rtm, struct sockaddr *rti_info[RTAX_MAX], - int connected_only) + struct ktable *kt) { struct sockaddr *sa; struct sockaddr_in *sa_in; -@@ -2468,7 +3106,7 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt +@@ -2468,7 +3135,7 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt struct kroute_node *kr; struct kroute6_node *kr6; struct bgpd_addr prefix; - int flags, oflags, mpath = 0; + int flags, oflags, mpath = 0, changed = 0; u_int16_t ifindex; u_int8_t prefixlen; u_int8_t prio; -@@ -2494,31 +3132,44 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt +@@ -2494,31 +3161,54 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt mpath = 1; #endif +#if !defined(__FreeBSD__) /* no rtm_priority on FreeBSD */ prio = rtm->rtm_priority; - prefix.af = sa->sa_family; - switch (prefix.af) { +#else + prio = RTP_BGP; +#endif + switch (sa->sa_family) { case AF_INET: + prefix.aid = AID_INET; prefix.v4.s_addr = ((struct sockaddr_in *)sa)->sin_addr.s_addr; sa_in = (struct sockaddr_in *)rti_info[RTAX_NETMASK]; if (sa_in != NULL) { if (sa_in->sin_len != 0) prefixlen = mask2prefixlen( sa_in->sin_addr.s_addr); - } else if (rtm->rtm_flags & RTF_HOST) + } else if (rtm->rtm_flags & RTF_HOST) { prefixlen = 32; +#if defined(__FreeBSD__) /* RTF_HOST means connected route */ + flags |= F_CONNECTED; +#endif + } else prefixlen = prefixlen_classful(prefix.v4.s_addr); break; case AF_INET6: + prefix.aid = AID_INET6; memcpy(&prefix.v6, &((struct sockaddr_in6 *)sa)->sin6_addr, sizeof(struct in6_addr)); ++#if defined(__KAME__) && defined(IPV6_LINKLOCAL_PEER) ++ if (IN6_IS_ADDR_LINKLOCAL(&prefix.v6) != 0) { ++ if (((struct sockaddr_in6 *)sa)->sin6_scope_id !=0) ++ SET_IN6_LINKLOCAL_IFINDEX(prefix.v6, ++ ((struct sockaddr_in6 *)sa)->sin6_scope_id); ++ else ++ SET_IN6_LINKLOCAL_IFINDEX(prefix.v6, ++ rtm->rtm_index); ++ } ++#endif sa_in6 = (struct sockaddr_in6 *)rti_info[RTAX_NETMASK]; if (sa_in6 != NULL) { if (sa_in6->sin6_len != 0) prefixlen = mask2prefixlen6(sa_in6); - } else if (rtm->rtm_flags & RTF_HOST) + } else if (rtm->rtm_flags & RTF_HOST) { prefixlen = 128; +#if defined(__FreeBSD__) /* RTF_HOST means connected route */ + flags |= F_CONNECTED; +#endif + } else fatalx("in6 net addr without netmask"); break; -@@ -2537,10 +3188,10 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt +@@ -2537,10 +3227,10 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt } if (rtm->rtm_type == RTM_DELETE) { - switch (prefix.af) { - case AF_INET: + switch (prefix.aid) { + case AID_INET: sa_in = (struct sockaddr_in *)sa; - if ((kr = kroute_find(prefix.v4.s_addr, + if ((kr = kroute_find(kt, prefix.v4.s_addr, prefixlen, prio)) == NULL) return (0); if (!(kr->r.flags & F_KERNEL)) -@@ -2554,12 +3205,12 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt +@@ -2554,12 +3244,12 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt return (0); } - if (kroute_remove(kr) == -1) + if (kroute_remove(kt, kr) == -1) return (-1); break; - case AF_INET6: + case AID_INET6: sa_in6 = (struct sockaddr_in6 *)sa; - if ((kr6 = kroute6_find(&prefix.v6, prefixlen, + if ((kr6 = kroute6_find(kt, &prefix.v6, prefixlen, prio)) == NULL) return (0); if (!(kr6->r.flags & F_KERNEL)) -@@ -2574,26 +3225,23 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt +@@ -2574,26 +3264,23 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt return (0); } - if (kroute6_remove(kr6) == -1) + if (kroute6_remove(kt, kr6) == -1) return (-1); break; } return (0); } - if (connected_only && !(flags & F_CONNECTED)) - return (0); - if (sa == NULL && !(flags & F_CONNECTED)) { log_warnx("dispatch_rtmsg no nexthop for %s/%u", log_addr(&prefix), prefixlen); return (0); } - switch (prefix.af) { - case AF_INET: + switch (prefix.aid) { + case AID_INET: sa_in = (struct sockaddr_in *)sa; - if ((kr = kroute_find(prefix.v4.s_addr, prefixlen, + if ((kr = kroute_find(kt, prefix.v4.s_addr, prefixlen, prio)) != NULL) { if (kr->r.flags & F_KERNEL) { /* get the correct route */ -@@ -2605,30 +3253,38 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt +@@ -2605,30 +3292,38 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt } else if (mpath && rtm->rtm_type == RTM_ADD) goto add4; - if (sa_in != NULL) + if (sa_in != NULL) { + if (kr->r.nexthop.s_addr != + sa_in->sin_addr.s_addr) + changed = 1; kr->r.nexthop.s_addr = sa_in->sin_addr.s_addr; - else + } else { + if (kr->r.nexthop.s_addr != 0) + changed = 1; kr->r.nexthop.s_addr = 0; + } if (kr->r.flags & F_NEXTHOP) flags |= F_NEXTHOP; oflags = kr->r.flags; + if (flags != oflags) + changed = 1; kr->r.flags = flags; if ((oflags & F_CONNECTED) && !(flags & F_CONNECTED)) { kif_kr_remove(kr); kr_redistribute(IMSG_NETWORK_REMOVE, - &kr->r); + kt, &kr->r); } if ((flags & F_CONNECTED) && !(oflags & F_CONNECTED)) { kif_kr_insert(kr); kr_redistribute(IMSG_NETWORK_ADD, - &kr->r); + kt, &kr->r); } - if (kr->r.flags & F_NEXTHOP) - knexthop_track(kr); + if (kr->r.flags & F_NEXTHOP && changed) + knexthop_track(kt, kr); } } else if (rtm->rtm_type == RTM_CHANGE) { log_warnx("change req for %s/%u: not in table", -@@ -2651,50 +3307,62 @@ add4: +@@ -2651,50 +3346,62 @@ add4: kr->r.ifindex = ifindex; kr->r.priority = prio; - kroute_insert(kr); + kroute_insert(kt, kr); } break; - case AF_INET6: + case AID_INET6: sa_in6 = (struct sockaddr_in6 *)sa; - if ((kr6 = kroute6_find(&prefix.v6, prefixlen, prio)) != NULL) { + if ((kr6 = kroute6_find(kt, &prefix.v6, prefixlen, prio)) != + NULL) { if (kr6->r.flags & F_KERNEL) { /* get the correct route */ if (mpath && rtm->rtm_type == RTM_CHANGE && (kr6 = kroute6_matchgw(kr6, sa_in6)) == NULL) { log_warnx("dispatch_rtmsg[change] " - "mpath route not found"); + "IPv6 mpath route not found"); return (-1); } else if (mpath && rtm->rtm_type == RTM_ADD) goto add6; - if (sa_in6 != NULL) + if (sa_in6 != NULL) { + if (memcmp(&kr6->r.nexthop, + &sa_in6->sin6_addr, + sizeof(struct in6_addr))) + changed = 1; memcpy(&kr6->r.nexthop, &sa_in6->sin6_addr, sizeof(struct in6_addr)); - else + } else { + if (memcmp(&kr6->r.nexthop, + &in6addr_any, + sizeof(struct in6_addr))) + changed = 1; memcpy(&kr6->r.nexthop, &in6addr_any, sizeof(struct in6_addr)); + } if (kr6->r.flags & F_NEXTHOP) flags |= F_NEXTHOP; oflags = kr6->r.flags; + if (flags != oflags) + changed = 1; kr6->r.flags = flags; if ((oflags & F_CONNECTED) && !(flags & F_CONNECTED)) { kif_kr6_remove(kr6); kr_redistribute6(IMSG_NETWORK_REMOVE, - &kr6->r); + kt, &kr6->r); } if ((flags & F_CONNECTED) && !(oflags & F_CONNECTED)) { kif_kr6_insert(kr6); kr_redistribute6(IMSG_NETWORK_ADD, - &kr6->r); + kt, &kr6->r); } - if (kr6->r.flags & F_NEXTHOP) - knexthop_track(kr6); + if (kr6->r.flags & F_NEXTHOP && changed) + knexthop_track(kt, kr6); } } else if (rtm->rtm_type == RTM_CHANGE) { log_warnx("change req for %s/%u: not in table", -@@ -2720,7 +3388,7 @@ add6: +@@ -2719,8 +3426,12 @@ add6: + kr6->r.flags = flags; kr6->r.ifindex = ifindex; kr6->r.priority = prio; - +- - kroute6_insert(kr6); ++#if defined(__KAME__) && defined(IPV6_LINKLOCAL_PEER) ++ if (IN6_IS_ADDR_LINKLOCAL(&kr6->r.nexthop)) ++ SET_IN6_LINKLOCAL_IFINDEX(kr6->r.nexthop, ++ ifindex); ++#endif + kroute6_insert(kt, kr6); } break; } Index: head/net/openbgpd/files/patch-bgpd_printconf.c =================================================================== --- head/net/openbgpd/files/patch-bgpd_printconf.c (revision 354183) +++ head/net/openbgpd/files/patch-bgpd_printconf.c (revision 354184) @@ -1,440 +1,439 @@ Index: bgpd/printconf.c =================================================================== RCS file: /home/cvs/private/hrs/openbgpd/bgpd/printconf.c,v retrieving revision 1.1.1.7 -retrieving revision 1.10 -diff -u -p -r1.1.1.7 -r1.10 +retrieving revision 1.11 +diff -u -p -r1.1.1.7 -r1.11 --- bgpd/printconf.c 14 Feb 2010 20:19:57 -0000 1.1.1.7 -+++ bgpd/printconf.c 8 Dec 2012 20:17:59 -0000 1.10 ++++ bgpd/printconf.c 16 May 2014 00:36:26 -0000 1.11 @@ -1,4 +1,4 @@ -/* $OpenBSD: printconf.c,v 1.70 2009/06/06 01:10:29 claudio Exp $ */ +/* $OpenBSD: printconf.c,v 1.88 2012/09/23 09:39:18 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -16,9 +16,13 @@ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include #include #include #include +#if defined(__FreeBSD__) /* limits.h */ +#include +#endif #include "bgpd.h" #include "mrt.h" @@ -27,14 +31,19 @@ void print_op(enum comp_ops); void print_community(int, int); +void print_extcommunity(struct filter_extcommunity *); +void print_origin(u_int8_t); void print_set(struct filter_set_head *); void print_mainconf(struct bgpd_config *); +void print_rdomain_targets(struct filter_set_head *, const char *); +void print_rdomain(struct rdomain *); +const char *print_af(u_int8_t); void print_network(struct network_config *); void print_peer(struct peer_config *, struct bgpd_config *, const char *); const char *print_auth_alg(u_int8_t); const char *print_enc_alg(u_int8_t); -const char *print_safi(u_int8_t); +void print_announce(struct peer_config *, const char *); void print_rule(struct peer *, struct filter_rule *); const char * mrt_type(enum mrt_type); void print_mrt(u_int32_t, u_int32_t, const char *, const char *); @@ -94,6 +103,45 @@ print_community(int as, int type) } void +print_extcommunity(struct filter_extcommunity *c) +{ + switch (c->type & EXT_COMMUNITY_VALUE) { + case EXT_COMMUNITY_TWO_AS: + printf("%s %i:%i ", log_ext_subtype(c->subtype), + c->data.ext_as.as, c->data.ext_as.val); + break; + case EXT_COMMUNITY_IPV4: + printf("%s %s:%i ", log_ext_subtype(c->subtype), + inet_ntoa(c->data.ext_ip.addr), c->data.ext_ip.val); + break; + case EXT_COMMUNITY_FOUR_AS: + printf("%s %s:%i ", log_ext_subtype(c->subtype), + log_as(c->data.ext_as4.as4), c->data.ext_as.val); + break; + case EXT_COMMUNITY_OPAQUE: + printf("%s 0x%llx ", log_ext_subtype(c->subtype), + (long long unsigned int)c->data.ext_opaq); + break; + default: + printf("0x%x 0x%llx ", c->type, (long long unsigned int)c->data.ext_opaq); + break; + } +} + +void +print_origin(u_int8_t o) +{ + if (o == ORIGIN_IGP) + printf("igp "); + else if (o == ORIGIN_EGP) + printf("egp "); + else if (o == ORIGIN_INCOMPLETE) + printf("incomplete "); + else + printf("%u ", o); +} + +void print_set(struct filter_set_head *set) { struct filter_set *s; @@ -161,11 +209,23 @@ print_set(struct filter_set_head *set) case ACTION_RTLABEL: printf("rtlabel %s ", s->action.rtlabel); break; + case ACTION_SET_ORIGIN: + printf("origin "); + print_origin(s->action.origin); + break; case ACTION_RTLABEL_ID: case ACTION_PFTABLE_ID: /* not possible */ printf("king bula saiz: config broken"); break; + case ACTION_SET_EXT_COMMUNITY: + printf("ext-community "); + print_extcommunity(&s->action.ext_community); + break; + case ACTION_DEL_EXT_COMMUNITY: + printf("ext-community delete "); + print_extcommunity(&s->action.ext_community); + break; } } printf("}"); @@ -182,6 +242,10 @@ print_mainconf(struct bgpd_config *conf) printf(" %u", conf->short_as); ina.s_addr = conf->bgpid; printf("\nrouter-id %s\n", inet_ntoa(ina)); + + printf("socket \"%s\"\n", conf->csock); + if (conf->rcsock) + printf("socket \"%s\" restricted\n", conf->rcsock); if (conf->holdtime) printf("holdtime %u\n", conf->holdtime); if (conf->min_holdtime) @@ -189,11 +253,6 @@ print_mainconf(struct bgpd_config *conf) if (conf->connectretry) printf("connect-retry %u\n", conf->connectretry); - if (conf->flags & BGPD_FLAG_NO_FIB_UPDATE) - printf("fib-update no\n"); - else - printf("fib-update yes\n"); - if (conf->flags & BGPD_FLAG_NO_EVALUATE) printf("route-collector yes\n"); @@ -214,43 +273,67 @@ print_mainconf(struct bgpd_config *conf) printf("nexthop qualify via bgp\n"); if (conf->flags & BGPD_FLAG_NEXTHOP_DEFAULT) printf("nexthop qualify via default\n"); +} - if (conf->flags & BGPD_FLAG_REDIST_CONNECTED) { - printf("network inet connected"); - if (!TAILQ_EMPTY(&conf->connectset)) - printf(" "); - print_set(&conf->connectset); - printf("\n"); - } - if (conf->flags & BGPD_FLAG_REDIST_STATIC) { - printf("network inet static"); - if (!TAILQ_EMPTY(&conf->staticset)) - printf(" "); - print_set(&conf->staticset); - printf("\n"); - } - if (conf->flags & BGPD_FLAG_REDIST6_CONNECTED) { - printf("network inet6 connected"); - if (!TAILQ_EMPTY(&conf->connectset6)) - printf(" "); - print_set(&conf->connectset6); - printf("\n"); - } - if (conf->flags & BGPD_FLAG_REDIST_STATIC) { - printf("network inet6 static"); - if (!TAILQ_EMPTY(&conf->staticset6)) - printf(" "); - print_set(&conf->staticset6); +void +print_rdomain_targets(struct filter_set_head *set, const char *tgt) +{ + struct filter_set *s; + TAILQ_FOREACH(s, set, entry) { + printf("\t%s ", tgt); + print_extcommunity(&s->action.ext_community); printf("\n"); } - if (conf->rtableid) - printf("rtable %u\n", conf->rtableid); +} + +void +print_rdomain(struct rdomain *r) +{ + printf("rdomain %u {\n", r->rtableid); + printf("\tdescr \"%s\"\n", r->descr); + if (r->flags & F_RIB_NOFIBSYNC) + printf("\tfib-update no\n"); + else + printf("\tfib-update yes\n"); + printf("\tdepend on %s\n", r->ifmpe); + + printf("\n\t%s\n", log_rd(r->rd)); + + print_rdomain_targets(&r->export, "export-target"); + print_rdomain_targets(&r->import, "import-target"); + + printf("}\n"); +} + +const char * +print_af(u_int8_t aid) +{ + /* + * Hack around the fact that aid2str() will return "IPv4 unicast" + * for AID_INET. AID_INET and AID_INET6 need special handling and + * the other AID should never end up here (at least for now). + */ + if (aid == AID_INET) + return ("inet"); + if (aid == AID_INET6) + return ("inet6"); + return (aid2str(aid)); } void print_network(struct network_config *n) { - printf("network %s/%u", log_addr(&n->prefix), n->prefixlen); + switch (n->type) { + case NETWORK_STATIC: + printf("network %s static", print_af(n->prefix.aid)); + break; + case NETWORK_CONNECTED: + printf("network %s connected", print_af(n->prefix.aid)); + break; + default: + printf("network %s/%u", log_addr(&n->prefix), n->prefixlen); + break; + } if (!TAILQ_EMPTY(&n->attrset)) printf(" "); print_set(&n->attrset); @@ -263,8 +346,8 @@ print_peer(struct peer_config *p, struct char *method; struct in_addr ina; - if ((p->remote_addr.af == AF_INET && p->remote_masklen != 32) || - (p->remote_addr.af == AF_INET6 && p->remote_masklen != 128)) + if ((p->remote_addr.aid == AID_INET && p->remote_masklen != 32) || + (p->remote_addr.aid == AID_INET6 && p->remote_masklen != 128)) printf("%sneighbor %s/%u {\n", c, log_addr(&p->remote_addr), p->remote_masklen); else @@ -281,7 +364,7 @@ print_peer(struct peer_config *p, struct printf("%s\tmultihop %u\n", c, p->distance); if (p->passive) printf("%s\tpassive\n", c); - if (p->local_addr.af) + if (p->local_addr.aid) printf("%s\tlocal-address %s\n", c, log_addr(&p->local_addr)); if (p->max_prefix) { printf("%s\tmax-prefix %u", c, p->max_prefix); @@ -295,6 +378,12 @@ print_peer(struct peer_config *p, struct printf("%s\tholdtime min %u\n", c, p->min_holdtime); if (p->announce_capa == 0) printf("%s\tannounce capabilities no\n", c); + if (p->capabilities.refresh == 0) + printf("%s\tannounce refresh no\n", c); + if (p->capabilities.grestart.restart == 0) + printf("%s\tannounce restart no\n", c); + if (p->capabilities.as4byte == 0) + printf("%s\tannounce as4byte no\n", c); if (p->announce_type == ANNOUNCE_SELF) printf("%s\tannounce self\n", c); else if (p->announce_type == ANNOUNCE_NONE) @@ -324,6 +413,10 @@ print_peer(struct peer_config *p, struct printf("%s\tdepend on \"%s\"\n", c, p->if_depend); if (p->flags & PEERFLAG_TRANS_AS) printf("%s\ttransparent-as yes\n", c); +#if defined(IPV6_LINKLOCAL_PEER) + if (p->lliface[0]) + printf("%s\tinterface %s\n", c, p->lliface); +#endif if (p->auth.method == AUTH_MD5SIG) printf("%s\ttcp md5sig\n", c); @@ -354,8 +447,7 @@ print_peer(struct peer_config *p, struct if (p->ttlsec) printf("%s\tttl-security yes\n", c); - printf("%s\tannounce IPv4 %s\n", c, print_safi(p->capabilities.mp_v4)); - printf("%s\tannounce IPv6 %s\n", c, print_safi(p->capabilities.mp_v6)); + print_announce(p, c); if (p->softreconfig_in == 1) printf("%s\tsoftreconfig in yes\n", c); @@ -399,17 +491,14 @@ print_enc_alg(u_int8_t alg) } } -const char * -print_safi(u_int8_t safi) +void +print_announce(struct peer_config *p, const char *c) { - switch (safi) { - case SAFI_NONE: - return ("none"); - case SAFI_UNICAST: - return ("unicast"); - default: - return ("?"); - } + u_int8_t aid; + + for (aid = 0; aid < AID_MAX; aid++) + if (p->capabilities.mp[aid]) + printf("%s\tannounce %s\n", c, aid2str(aid)); } void @@ -455,14 +544,14 @@ print_rule(struct peer *peer_l, struct f } else printf("any "); - if (r->match.prefix.addr.af) + if (r->match.prefix.addr.aid) printf("prefix %s/%u ", log_addr(&r->match.prefix.addr), r->match.prefix.len); - if (r->match.prefix.addr.af == 0 && r->match.prefixlen.af) { - if (r->match.prefixlen.af == AF_INET) + if (r->match.prefix.addr.aid == 0 && r->match.prefixlen.aid) { + if (r->match.prefixlen.aid == AID_INET) printf("inet "); - if (r->match.prefixlen.af == AF_INET6) + if (r->match.prefixlen.aid == AID_INET6) printf("inet6 "); } @@ -479,6 +568,13 @@ print_rule(struct peer *peer_l, struct f } } + if (r->match.nexthop.flags) { + if (r->match.nexthop.flags == FILTER_NEXTHOP_NEIGHBOR) + printf("nexthop neighbor "); + else + printf("nexthop %s ", log_addr(&r->match.nexthop.addr)); + } + if (r->match.as.type) { if (r->match.as.type == AS_ALL) printf("AS %s ", log_as(r->match.as.as)); @@ -492,11 +588,20 @@ print_rule(struct peer *peer_l, struct f printf("unfluffy-as %s ", log_as(r->match.as.as)); } + if (r->match.aslen.type) { + printf("%s %u ", r->match.aslen.type == ASLEN_MAX ? + "max-as-len" : "max-as-seq", r->match.aslen.aslen); + } + if (r->match.community.as != COMMUNITY_UNSET) { printf("community "); print_community(r->match.community.as, r->match.community.type); } + if (r->match.ext_community.flags & EXT_COMMUNITY_FLAG_VALID) { + printf("ext-community "); + print_extcommunity(&r->match.ext_community); + } print_set(&r->set); @@ -513,6 +618,8 @@ mrt_type(enum mrt_type t) return "table"; case MRT_TABLE_DUMP_MP: return "table-mp"; + case MRT_TABLE_DUMP_V2: + return "table-v2"; case MRT_ALL_IN: return "all in"; case MRT_ALL_OUT: -@@ -541,13 +648,12 @@ print_mrt(u_int32_t pid, u_int32_t gid, +@@ -541,12 +648,12 @@ print_mrt(u_int32_t pid, u_int32_t gid, printf("%s%sdump ", prep, prep2); if (m->rib[0]) printf("rib %s ", m->rib); + printf("%s \"%s\"", mrt_type(m->type), + MRT2MC(m)->name); if (MRT2MC(m)->ReopenTimerInterval == 0) - printf("%s %s\n", mrt_type(m->type), - MRT2MC(m)->name); + printf("\n"); else - printf("%s %s %d\n", mrt_type(m->type), - MRT2MC(m)->name, -- MRT2MC(m)->ReopenTimerInterval); -+ printf(" %d\n", MRT2MC(m)->ReopenTimerInterval); ++ printf(" %ld\n", + MRT2MC(m)->ReopenTimerInterval); } } - -@@ -612,26 +718,34 @@ peer_compare(const void *aa, const void +@@ -612,26 +719,34 @@ peer_compare(const void *aa, const void void print_config(struct bgpd_config *conf, struct rib_names *rib_l, struct network_head *net_l, struct peer *peer_l, - struct filter_head *rules_l, struct mrt_head *mrt_l) + struct filter_head *rules_l, struct mrt_head *mrt_l, + struct rdomain_head *rdom_l) { struct filter_rule *r; struct network *n; struct rde_rib *rr; + struct rdomain *rd; xmrt_l = mrt_l; - printf("\n"); print_mainconf(conf); printf("\n"); + TAILQ_FOREACH(n, net_l, entry) + print_network(&n->net); + printf("\n"); + SIMPLEQ_FOREACH(rd, rdom_l, entry) + print_rdomain(rd); + printf("\n"); SIMPLEQ_FOREACH(rr, rib_l, entry) { if (rr->flags & F_RIB_NOEVALUATE) printf("rde rib %s no evaluate\n", rr->name); - else + else if (rr->flags & F_RIB_NOFIB) printf("rde rib %s\n", rr->name); + else + printf("rde rib %s rtable %u fib-update %s\n", rr->name, + rr->rtableid, rr->flags & F_RIB_NOFIBSYNC ? + "no" : "yes"); } printf("\n"); - TAILQ_FOREACH(n, net_l, entry) - print_network(&n->net); - printf("\n"); print_mrt(0, 0, "", ""); printf("\n"); print_groups(conf, peer_l); Index: head/net/openbgpd/files/patch-bgpd_rde.c =================================================================== --- head/net/openbgpd/files/patch-bgpd_rde.c (revision 354183) +++ head/net/openbgpd/files/patch-bgpd_rde.c (revision 354184) @@ -1,2595 +1,2614 @@ Index: bgpd/rde.c =================================================================== RCS file: /home/cvs/private/hrs/openbgpd/bgpd/rde.c,v retrieving revision 1.1.1.8 -retrieving revision 1.11 -diff -u -p -r1.1.1.8 -r1.11 +retrieving revision 1.12 +diff -u -p -r1.1.1.8 -r1.12 --- bgpd/rde.c 14 Feb 2010 20:19:57 -0000 1.1.1.8 -+++ bgpd/rde.c 13 Oct 2012 18:36:00 -0000 1.11 ++++ bgpd/rde.c 16 May 2014 00:36:26 -0000 1.12 @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.264 2009/06/29 12:22:16 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.320 2012/09/18 09:45:51 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -18,10 +18,11 @@ #include #include +#include +#include #include #include -#include #include #include #include @@ -50,13 +51,18 @@ void rde_update_withdraw(struct rde_pe u_int8_t); int rde_attr_parse(u_char *, u_int16_t, struct rde_peer *, struct rde_aspath *, struct mpattr *); +int rde_attr_add(struct rde_aspath *, u_char *, u_int16_t); u_int8_t rde_attr_missing(struct rde_aspath *, int, u_int16_t); -int rde_get_mp_nexthop(u_char *, u_int16_t, u_int16_t, - struct rde_aspath *); +int rde_get_mp_nexthop(u_char *, u_int16_t, u_int8_t, + struct rde_aspath *, struct rde_peer *); +int rde_update_extract_prefix(u_char *, u_int16_t, void *, + u_int8_t, u_int8_t); int rde_update_get_prefix(u_char *, u_int16_t, struct bgpd_addr *, u_int8_t *); int rde_update_get_prefix6(u_char *, u_int16_t, struct bgpd_addr *, u_int8_t *); +int rde_update_get_vpn4(u_char *, u_int16_t, struct bgpd_addr *, + u_int8_t *); void rde_update_err(struct rde_peer *, u_int8_t , u_int8_t, void *, u_int16_t); void rde_update_log(const char *, u_int16_t, @@ -78,11 +84,15 @@ void rde_dump_ctx_new(struct ctl_show_ void rde_dump_mrt_new(struct mrt *, pid_t, int); void rde_dump_done(void *); +int rde_rdomain_import(struct rde_aspath *, struct rdomain *); void rde_up_dump_upcall(struct rib_entry *, void *); void rde_softreconfig_out(struct rib_entry *, void *); void rde_softreconfig_in(struct rib_entry *, void *); +void rde_softreconfig_load(struct rib_entry *, void *); +void rde_softreconfig_load_peer(struct rib_entry *, void *); +void rde_softreconfig_unload_peer(struct rib_entry *, void *); void rde_update_queue_runner(void); -void rde_update6_queue_runner(void); +void rde_update6_queue_runner(u_int8_t); void peer_init(u_int32_t); void peer_shutdown(void); @@ -91,10 +101,12 @@ struct rde_peer *peer_add(u_int32_t, str struct rde_peer *peer_get(u_int32_t); void peer_up(u_int32_t, struct session_up *); void peer_down(u_int32_t); -void peer_dump(u_int32_t, u_int16_t, u_int8_t); -void peer_send_eor(struct rde_peer *, u_int16_t, u_int16_t); +void peer_flush(struct rde_peer *, u_int8_t); +void peer_stale(u_int32_t, u_int8_t); +void peer_recv_eor(struct rde_peer *, u_int8_t); +void peer_dump(u_int32_t, u_int8_t); +void peer_send_eor(struct rde_peer *, u_int8_t); -void network_init(struct network_head *); void network_add(struct network_config *, int); void network_delete(struct network_config *, int); void network_dump_upcall(struct rib_entry *, void *); @@ -108,6 +120,7 @@ time_t reloadtime; struct rde_peer_head peerlist; struct rde_peer *peerself; struct filter_head *rules_l, *newrules; +struct rdomain_head *rdomains_l, *newdomains; struct imsgbuf *ibuf_se; struct imsgbuf *ibuf_se_ctl; struct imsgbuf *ibuf_main; @@ -120,11 +133,12 @@ struct rde_dump_ctx { }; struct rde_mrt_ctx { - struct mrt mrt; - struct rib_context ribctx; + struct mrt mrt; + struct rib_context ribctx; + LIST_ENTRY(rde_mrt_ctx) entry; }; -struct mrt_head rde_mrts = LIST_HEAD_INITIALIZER(rde_mrts); +LIST_HEAD(, rde_mrt_ctx) rde_mrts = LIST_HEAD_INITIALIZER(rde_mrts); u_int rde_mrt_cnt; void @@ -144,24 +158,17 @@ u_int32_t attrhashsize = 512; u_int32_t nexthophashsize = 64; pid_t -rde_main(struct bgpd_config *config, struct peer *peer_l, - struct network_head *net_l, struct filter_head *rules, - struct mrt_head *mrt_l, struct rib_names *rib_n, int pipe_m2r[2], - int pipe_s2r[2], int pipe_m2s[2], int pipe_s2rctl[2], int debug) +rde_main(int pipe_m2r[2], int pipe_s2r[2], int pipe_m2s[2], int pipe_s2rctl[2], + int debug) { pid_t pid; struct passwd *pw; - struct peer *p; - struct listen_addr *la; struct pollfd *pfd = NULL; - struct filter_rule *f; - struct filter_set *set; - struct nexthop *nh; - struct rde_rib *rr; - struct mrt *mrt, *xmrt; + struct rde_mrt_ctx *mctx, *xmctx; void *newp; u_int pfd_elms = 0, i, j; int timeout; + u_int8_t aid; switch (pid = fork()) { case -1: @@ -172,8 +179,6 @@ rde_main(struct bgpd_config *config, str return (pid); } - conf = config; - if ((pw = getpwnam(BGPD_USER)) == NULL) fatal("getpwnam"); @@ -194,6 +199,8 @@ rde_main(struct bgpd_config *config, str signal(SIGINT, rde_sighdlr); signal(SIGPIPE, SIG_IGN); signal(SIGHUP, SIG_IGN); + signal(SIGALRM, SIG_IGN); + signal(SIGUSR1, SIG_IGN); close(pipe_s2r[0]); close(pipe_s2rctl[0]); @@ -210,50 +217,25 @@ rde_main(struct bgpd_config *config, str imsg_init(ibuf_se_ctl, pipe_s2rctl[1]); imsg_init(ibuf_main, pipe_m2r[1]); - /* peer list, mrt list and listener list are not used in the RDE */ - while ((p = peer_l) != NULL) { - peer_l = p->next; - free(p); - } - - while ((mrt = LIST_FIRST(mrt_l)) != NULL) { - LIST_REMOVE(mrt, entry); - free(mrt); - } - - while ((la = TAILQ_FIRST(config->listen_addrs)) != NULL) { - TAILQ_REMOVE(config->listen_addrs, la, entry); - close(la->fd); - free(la); - } - free(config->listen_addrs); - pt_init(); - while ((rr = SIMPLEQ_FIRST(&ribnames))) { - SIMPLEQ_REMOVE_HEAD(&ribnames, entry); - rib_new(-1, rr->name, rr->flags); - free(rr); - } path_init(pathhashsize); aspath_init(pathhashsize); attr_init(attrhashsize); nexthop_init(nexthophashsize); peer_init(peerhashsize); - rules_l = rules; - network_init(net_l); + rules_l = calloc(1, sizeof(struct filter_head)); + if (rules_l == NULL) + fatal(NULL); + TAILQ_INIT(rules_l); + rdomains_l = calloc(1, sizeof(struct rdomain_head)); + if (rdomains_l == NULL) + fatal(NULL); + SIMPLEQ_INIT(rdomains_l); + if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL) + fatal(NULL); log_info("route decision engine ready"); - TAILQ_FOREACH(f, rules, entry) { - f->peer.ribid = rib_find(f->rib); - TAILQ_FOREACH(set, &f->set, entry) { - if (set->type == ACTION_SET_NEXTHOP) { - nh = nexthop_get(&set->action.nexthop); - nh->refcnt++; - } - } - } - while (rde_quit == 0) { if (pfd_elms < PFD_PIPE_COUNT + rde_mrt_cnt) { if ((newp = realloc(pfd, sizeof(struct pollfd) * @@ -287,11 +269,18 @@ rde_main(struct bgpd_config *config, str timeout = 0; i = PFD_PIPE_COUNT; - LIST_FOREACH(mrt, &rde_mrts, entry) { - if (mrt->wbuf.queued) { - pfd[i].fd = mrt->wbuf.fd; + for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) { + xmctx = LIST_NEXT(mctx, entry); + if (mctx->mrt.wbuf.queued) { + pfd[i].fd = mctx->mrt.wbuf.fd; pfd[i].events = POLLOUT; i++; + } else if (mctx->mrt.state == MRT_STATE_REMOVE) { + close(mctx->mrt.wbuf.fd); + LIST_REMOVE(&mctx->ribctx, entry); + LIST_REMOVE(mctx, entry); + free(mctx); + rde_mrt_cnt--; } } @@ -325,24 +314,17 @@ rde_main(struct bgpd_config *config, str if (pfd[PFD_PIPE_SESSION_CTL].revents & POLLIN) rde_dispatch_imsg_session(ibuf_se_ctl); - for (j = PFD_PIPE_COUNT, mrt = LIST_FIRST(&rde_mrts); - j < i && mrt != 0; j++) { - xmrt = LIST_NEXT(mrt, entry); - if (pfd[j].fd == mrt->wbuf.fd && + for (j = PFD_PIPE_COUNT, mctx = LIST_FIRST(&rde_mrts); + j < i && mctx != 0; j++) { + if (pfd[j].fd == mctx->mrt.wbuf.fd && pfd[j].revents & POLLOUT) - mrt_write(mrt); - if (mrt->wbuf.queued == 0 && - mrt->state == MRT_STATE_REMOVE) { - close(mrt->wbuf.fd); - LIST_REMOVE(mrt, entry); - free(mrt); - rde_mrt_cnt--; - } - mrt = xmrt; + mrt_write(&mctx->mrt); + mctx = LIST_NEXT(mctx, entry); } rde_update_queue_runner(); - rde_update6_queue_runner(); + for (aid = AID_INET6; aid < AID_MAX; aid++) + rde_update6_queue_runner(aid); if (ibuf_se_ctl->w.queued <= 0) rib_dump_runner(); } @@ -351,11 +333,12 @@ rde_main(struct bgpd_config *config, str if (debug) rde_shutdown(); - while ((mrt = LIST_FIRST(&rde_mrts)) != NULL) { - msgbuf_clear(&mrt->wbuf); - close(mrt->wbuf.fd); - LIST_REMOVE(mrt, entry); - free(mrt); + while ((mctx = LIST_FIRST(&rde_mrts)) != NULL) { + msgbuf_clear(&mctx->mrt.wbuf); + close(mctx->mrt.wbuf.fd); + LIST_REMOVE(&mctx->ribctx, entry); + LIST_REMOVE(mctx, entry); + free(mctx); } msgbuf_clear(&ibuf_se->w); @@ -378,13 +361,18 @@ rde_dispatch_imsg_session(struct imsgbuf struct imsg imsg; struct peer p; struct peer_config pconf; - struct rrefresh r; - struct rde_peer *peer; struct session_up sup; + struct ctl_show_rib csr; struct ctl_show_rib_request req; + struct rde_peer *peer; + struct rde_aspath *asp; struct filter_set *s; struct nexthop *nh; - int n; + u_int8_t *asdata; + ssize_t n; + int verbose; + u_int16_t len; + u_int8_t aid; if ((n = imsg_read(ibuf)) == -1) fatal("rde_dispatch_imsg_session: imsg_read error"); @@ -422,13 +410,56 @@ rde_dispatch_imsg_session(struct imsgbuf case IMSG_SESSION_DOWN: peer_down(imsg.hdr.peerid); break; + case IMSG_SESSION_STALE: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_STALE: bad AID"); + peer_stale(imsg.hdr.peerid, aid); + break; + case IMSG_SESSION_FLUSH: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_FLUSH: bad AID"); + if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { + log_warnx("rde_dispatch: unknown peer id %d", + imsg.hdr.peerid); + break; + } + peer_flush(peer, aid); + break; + case IMSG_SESSION_RESTARTED: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_SESSION_RESTARTED: bad AID"); + if ((peer = peer_get(imsg.hdr.peerid)) == NULL) { + log_warnx("rde_dispatch: unknown peer id %d", + imsg.hdr.peerid); + break; + } + if (peer->staletime[aid]) + peer_flush(peer, aid); + break; case IMSG_REFRESH: - if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(r)) { + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) { log_warnx("rde_dispatch: wrong imsg len"); break; } - memcpy(&r, imsg.data, sizeof(r)); - peer_dump(imsg.hdr.peerid, r.afi, r.safi); + memcpy(&aid, imsg.data, sizeof(aid)); + if (aid >= AID_MAX) + fatalx("IMSG_REFRESH: bad AID"); + peer_dump(imsg.hdr.peerid, aid); break; case IMSG_NETWORK_ADD: if (imsg.hdr.len - IMSG_HEADER_SIZE != @@ -440,23 +471,68 @@ rde_dispatch_imsg_session(struct imsgbuf TAILQ_INIT(&netconf_s.attrset); session_set = &netconf_s.attrset; break; + case IMSG_NETWORK_ASPATH: + if (imsg.hdr.len - IMSG_HEADER_SIZE < + sizeof(struct ctl_show_rib)) { + log_warnx("rde_dispatch: wrong imsg len"); + bzero(&netconf_s, sizeof(netconf_s)); + break; + } + asdata = imsg.data; + asdata += sizeof(struct ctl_show_rib); + memcpy(&csr, imsg.data, sizeof(csr)); + if (csr.aspath_len + sizeof(csr) > imsg.hdr.len - + IMSG_HEADER_SIZE) { + log_warnx("rde_dispatch: wrong aspath len"); + bzero(&netconf_s, sizeof(netconf_s)); + break; + } + asp = path_get(); + asp->lpref = csr.local_pref; + asp->med = csr.med; + asp->weight = csr.weight; + asp->flags = csr.flags; + asp->origin = csr.origin; + asp->flags |= F_PREFIX_ANNOUNCED | F_ANN_DYNAMIC; + asp->aspath = aspath_get(asdata, csr.aspath_len); + netconf_s.asp = asp; + break; + case IMSG_NETWORK_ATTR: + if (imsg.hdr.len <= IMSG_HEADER_SIZE) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } + /* parse path attributes */ + len = imsg.hdr.len - IMSG_HEADER_SIZE; + asp = netconf_s.asp; + if (rde_attr_add(asp, imsg.data, len) == -1) { + log_warnx("rde_dispatch: bad network " + "attribute"); + path_put(asp); + bzero(&netconf_s, sizeof(netconf_s)); + break; + } + break; case IMSG_NETWORK_DONE: if (imsg.hdr.len != IMSG_HEADER_SIZE) { log_warnx("rde_dispatch: wrong imsg len"); break; } session_set = NULL; - switch (netconf_s.prefix.af) { - case AF_INET: + switch (netconf_s.prefix.aid) { + case AID_INET: if (netconf_s.prefixlen > 32) goto badnet; network_add(&netconf_s, 0); break; - case AF_INET6: + case AID_INET6: if (netconf_s.prefixlen > 128) goto badnet; network_add(&netconf_s, 0); break; + case 0: + /* something failed beforehands */ + break; default: badnet: log_warnx("rde_dispatch: bad network"); @@ -528,10 +604,14 @@ badnet: peer->prefix_rcvd_update; p.stats.prefix_rcvd_withdraw = peer->prefix_rcvd_withdraw; + p.stats.prefix_rcvd_eor = + peer->prefix_rcvd_eor; p.stats.prefix_sent_update = peer->prefix_sent_update; p.stats.prefix_sent_withdraw = peer->prefix_sent_withdraw; + p.stats.prefix_sent_eor = + peer->prefix_sent_eor; } imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0, imsg.hdr.pid, -1, &p, sizeof(struct peer)); @@ -544,6 +624,11 @@ badnet: imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0, imsg.hdr.pid, -1, &rdemem, sizeof(rdemem)); break; + case IMSG_CTL_LOG_VERBOSE: + /* already checked by SE */ + memcpy(&verbose, imsg.data, sizeof(verbose)); + log_verbose(verbose); + break; default: break; } @@ -554,14 +639,17 @@ badnet: void rde_dispatch_imsg_parent(struct imsgbuf *ibuf) { + static struct rdomain *rd; struct imsg imsg; struct mrt xmrt; struct rde_rib rn; struct rde_peer *peer; + struct peer_config *pconf; struct filter_rule *r; struct filter_set *s; struct nexthop *nh; - int n, fd, reconf_in = 0, reconf_out = 0; + int n, fd, reconf_in = 0, reconf_out = 0, + reconf_rib = 0; u_int16_t rid; if ((n = imsg_read(ibuf)) == -1) @@ -576,20 +664,12 @@ rde_dispatch_imsg_parent(struct imsgbuf break; switch (imsg.hdr.type) { - case IMSG_RECONF_CONF: - reloadtime = time(NULL); - newrules = calloc(1, sizeof(struct filter_head)); - if (newrules == NULL) - fatal(NULL); - TAILQ_INIT(newrules); - if ((nconf = malloc(sizeof(struct bgpd_config))) == - NULL) - fatal(NULL); - memcpy(nconf, imsg.data, sizeof(struct bgpd_config)); - for (rid = 0; rid < rib_size; rid++) - ribs[rid].state = RIB_DELETE; - break; case IMSG_NETWORK_ADD: + if (imsg.hdr.len - IMSG_HEADER_SIZE != + sizeof(struct network_config)) { + log_warnx("rde_dispatch: wrong imsg len"); + break; + } memcpy(&netconf_p, imsg.data, sizeof(netconf_p)); TAILQ_INIT(&netconf_p.attrset); parent_set = &netconf_p.attrset; @@ -608,6 +688,26 @@ rde_dispatch_imsg_parent(struct imsgbuf TAILQ_INIT(&netconf_p.attrset); network_delete(&netconf_p, 1); break; + case IMSG_RECONF_CONF: + if (imsg.hdr.len - IMSG_HEADER_SIZE != + sizeof(struct bgpd_config)) + fatalx("IMSG_RECONF_CONF bad len"); + reloadtime = time(NULL); + newrules = calloc(1, sizeof(struct filter_head)); + if (newrules == NULL) + fatal(NULL); + TAILQ_INIT(newrules); + newdomains = calloc(1, sizeof(struct rdomain_head)); + if (newdomains == NULL) + fatal(NULL); + SIMPLEQ_INIT(newdomains); + if ((nconf = malloc(sizeof(struct bgpd_config))) == + NULL) + fatal(NULL); + memcpy(nconf, imsg.data, sizeof(struct bgpd_config)); + for (rid = 0; rid < rib_size; rid++) + ribs[rid].state = RECONF_DELETE; + break; case IMSG_RECONF_RIB: if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(struct rde_rib)) @@ -615,9 +715,26 @@ rde_dispatch_imsg_parent(struct imsgbuf memcpy(&rn, imsg.data, sizeof(rn)); rid = rib_find(rn.name); if (rid == RIB_FAILED) - rib_new(-1, rn.name, rn.flags); - else - ribs[rid].state = RIB_ACTIVE; + rib_new(rn.name, rn.rtableid, rn.flags); + else if (ribs[rid].rtableid != rn.rtableid || + (ribs[rid].flags & F_RIB_HASNOFIB) != + (rn.flags & F_RIB_HASNOFIB)) { + /* Big hammer in the F_RIB_NOFIB case but + * not often enough used to optimise it more. */ + rib_free(&ribs[rid]); + rib_new(rn.name, rn.rtableid, rn.flags); + } else + ribs[rid].state = RECONF_KEEP; + break; + case IMSG_RECONF_PEER: + if (imsg.hdr.len - IMSG_HEADER_SIZE != + sizeof(struct peer_config)) + fatalx("IMSG_RECONF_PEER bad len"); + if ((peer = peer_get(imsg.hdr.peerid)) == NULL) + break; + pconf = imsg.data; + strlcpy(peer->conf.rib, pconf->rib, + sizeof(peer->conf.rib)); break; case IMSG_RECONF_FILTER: if (imsg.hdr.len - IMSG_HEADER_SIZE != @@ -631,12 +748,42 @@ rde_dispatch_imsg_parent(struct imsgbuf parent_set = &r->set; TAILQ_INSERT_TAIL(newrules, r, entry); break; + case IMSG_RECONF_RDOMAIN: + if (imsg.hdr.len - IMSG_HEADER_SIZE != + sizeof(struct rdomain)) + fatalx("IMSG_RECONF_RDOMAIN bad len"); + if ((rd = malloc(sizeof(struct rdomain))) == NULL) + fatal(NULL); + memcpy(rd, imsg.data, sizeof(struct rdomain)); + TAILQ_INIT(&rd->import); + TAILQ_INIT(&rd->export); + SIMPLEQ_INSERT_TAIL(newdomains, rd, entry); + break; + case IMSG_RECONF_RDOMAIN_EXPORT: + if (rd == NULL) { + log_warnx("rde_dispatch_imsg_parent: " + "IMSG_RECONF_RDOMAIN_EXPORT unexpected"); + break; + } + parent_set = &rd->export; + break; + case IMSG_RECONF_RDOMAIN_IMPORT: + if (rd == NULL) { + log_warnx("rde_dispatch_imsg_parent: " + "IMSG_RECONF_RDOMAIN_IMPORT unexpected"); + break; + } + parent_set = &rd->import; + break; + case IMSG_RECONF_RDOMAIN_DONE: + parent_set = NULL; + break; case IMSG_RECONF_DONE: if (nconf == NULL) fatalx("got IMSG_RECONF_DONE but no config"); if ((nconf->flags & BGPD_FLAG_NO_EVALUATE) != (conf->flags & BGPD_FLAG_NO_EVALUATE)) { - log_warnx( "change to/from route-collector " + log_warnx("change to/from route-collector " "mode ignored"); if (conf->flags & BGPD_FLAG_NO_EVALUATE) nconf->flags |= BGPD_FLAG_NO_EVALUATE; @@ -644,10 +791,27 @@ rde_dispatch_imsg_parent(struct imsgbuf nconf->flags &= ~BGPD_FLAG_NO_EVALUATE; } memcpy(conf, nconf, sizeof(struct bgpd_config)); + conf->listen_addrs = NULL; + conf->csock = NULL; + conf->rcsock = NULL; free(nconf); nconf = NULL; parent_set = NULL; - prefix_network_clean(peerself, reloadtime, 0); + /* sync peerself with conf */ + peerself->remote_bgpid = ntohl(conf->bgpid); + peerself->conf.local_as = conf->as; + peerself->conf.remote_as = conf->as; + peerself->short_as = conf->short_as; + + /* apply new set of rdomain, sync will be done later */ + while ((rd = SIMPLEQ_FIRST(rdomains_l)) != NULL) { + SIMPLEQ_REMOVE_HEAD(rdomains_l, entry); + filterset_free(&rd->import); + filterset_free(&rd->export); + free(rd); + } + free(rdomains_l); + rdomains_l = newdomains; /* check if filter changed */ LIST_FOREACH(peer, &peerlist, peer_l) { @@ -655,30 +819,59 @@ rde_dispatch_imsg_parent(struct imsgbuf continue; peer->reconf_out = 0; peer->reconf_in = 0; - if (peer->conf.softreconfig_out && - !rde_filter_equal(rules_l, newrules, peer, - DIR_OUT)) { - peer->reconf_out = 1; - reconf_out = 1; - } + peer->reconf_rib = 0; if (peer->conf.softreconfig_in && !rde_filter_equal(rules_l, newrules, peer, DIR_IN)) { peer->reconf_in = 1; reconf_in = 1; } + if (peer->ribid != rib_find(peer->conf.rib)) { + rib_dump(&ribs[peer->ribid], + rde_softreconfig_unload_peer, peer, + AID_UNSPEC); + peer->ribid = rib_find(peer->conf.rib); + peer->reconf_rib = 1; + reconf_rib = 1; + continue; + } + if (peer->conf.softreconfig_out && + !rde_filter_equal(rules_l, newrules, peer, + DIR_OUT)) { + peer->reconf_out = 1; + reconf_out = 1; + } -+ } + } +- /* XXX this needs rework anyway */ +- /* sync local-RIB first */ + /* bring ribs in sync before softreconfig dance */ + for (rid = 0; rid < rib_size; rid++) { + if (ribs[rid].state == RECONF_DELETE) + rib_free(&ribs[rid]); + else if (ribs[rid].state == RECONF_REINIT) + rib_dump(&ribs[0], + rde_softreconfig_load, &ribs[rid], + AID_UNSPEC); - } -- /* XXX this needs rework anyway */ -- /* sync local-RIB first */ ++ } + /* sync local-RIBs first */ if (reconf_in) rib_dump(&ribs[0], rde_softreconfig_in, NULL, - AF_UNSPEC); + AID_UNSPEC); /* then sync peers */ if (reconf_out) { int i; - for (i = 1; i < rib_size; i++) + for (i = 1; i < rib_size; i++) { + if (ribs[i].state == RECONF_REINIT) + /* already synced by _load */ + continue; rib_dump(&ribs[i], rde_softreconfig_out, - NULL, AF_UNSPEC); + NULL, AID_UNSPEC); + } + } + if (reconf_rib) { + LIST_FOREACH(peer, &peerlist, peer_l) { + rib_dump(&ribs[peer->ribid], + rde_softreconfig_load_peer, + peer, AID_UNSPEC); + } } while ((r = TAILQ_FIRST(rules_l)) != NULL) { @@ -688,16 +881,18 @@ rde_dispatch_imsg_parent(struct imsgbuf } free(rules_l); rules_l = newrules; - for (rid = 0; rid < rib_size; rid++) { - if (ribs[rid].state == RIB_DELETE) - rib_free(&ribs[rid]); - } + log_info("RDE reconfigured"); + imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0, + -1, NULL, 0); break; case IMSG_NEXTHOP_UPDATE: nexthop_update(imsg.data); break; case IMSG_FILTER_SET: + if (imsg.hdr.len > IMSG_HEADER_SIZE + + sizeof(struct filter_set)) + fatalx("IMSG_RECONF_CONF bad len"); if (parent_set == NULL) { log_warnx("rde_dispatch_imsg_parent: " "IMSG_FILTER_SET unexpected"); @@ -725,7 +920,8 @@ rde_dispatch_imsg_parent(struct imsgbuf log_warnx("expected to receive fd for mrt dump " "but didn't receive any"); else if (xmrt.type == MRT_TABLE_DUMP || - xmrt.type == MRT_TABLE_DUMP_MP) { + xmrt.type == MRT_TABLE_DUMP_MP || + xmrt.type == MRT_TABLE_DUMP_V2) { rde_dump_mrt_new(&xmrt, imsg.hdr.pid, fd); } else close(fd); @@ -744,6 +940,8 @@ rde_dispatch_imsg_parent(struct imsgbuf int rde_update_dispatch(struct imsg *imsg) { + struct bgpd_addr prefix; + struct mpattr mpa; struct rde_peer *peer; struct rde_aspath *asp = NULL; u_char *p, *mpp = NULL; @@ -752,9 +950,8 @@ rde_update_dispatch(struct imsg *imsg) u_int16_t withdrawn_len; u_int16_t attrpath_len; u_int16_t nlri_len; - u_int8_t prefixlen, safi, subtype; - struct bgpd_addr prefix; - struct mpattr mpa; + u_int8_t aid, prefixlen, safi, subtype; + u_int32_t fas; peer = peer_get(imsg->hdr.peerid); if (peer == NULL) /* unknown peer, cannot happen */ @@ -810,26 +1007,21 @@ rde_update_dispatch(struct imsg *imsg) goto done; } - /* - * if either ATTR_AS4_AGGREGATOR or ATTR_AS4_PATH is present - * try to fixup the attributes. - * XXX do not fixup if F_ATTR_LOOP is set. - */ - if (asp->flags & F_ATTR_AS4BYTE_NEW && - !(asp->flags & F_ATTR_LOOP)) - rde_as4byte_fixup(peer, asp); + rde_as4byte_fixup(peer, asp); /* enforce remote AS if requested */ if (asp->flags & F_ATTR_ASPATH && - peer->conf.enforce_as == ENFORCE_AS_ON) - if (peer->conf.remote_as != - aspath_neighbor(asp->aspath)) { - log_peer_warnx(&peer->conf, "bad path, " - "enforce remote-as enabled"); - rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, + peer->conf.enforce_as == ENFORCE_AS_ON) { + fas = aspath_neighbor(asp->aspath); + if (peer->conf.remote_as != fas) { + log_peer_warnx(&peer->conf, "bad path, " + "starting with %s, " + "enforce neighbor-as enabled", log_as(fas)); + rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, NULL, 0); - goto done; + goto done; } + } rde_reflector(peer, asp); } @@ -860,9 +1052,9 @@ rde_update_dispatch(struct imsg *imsg) p += pos; len -= pos; - if (peer->capa_received.mp_v4 == SAFI_NONE && - peer->capa_received.mp_v6 != SAFI_NONE) { - log_peer_warnx(&peer->conf, "bad AFI, IPv4 disabled"); + if (peer->capa.mp[AID_INET] == 0) { + log_peer_warnx(&peer->conf, + "bad withdraw, %s disabled", aid2str(AID_INET)); rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, NULL, 0); goto done; @@ -879,6 +1071,10 @@ rde_update_dispatch(struct imsg *imsg) ERR_UPD_ATTRLIST, NULL, 0); return (-1); } + if (withdrawn_len == 0) { + /* EoR marker */ + peer_recv_eor(peer, AID_INET); + } return (0); } @@ -892,15 +1088,30 @@ rde_update_dispatch(struct imsg *imsg) afi = ntohs(afi); safi = *mpp++; mplen--; - switch (afi) { - case AFI_IPv6: - if (peer->capa_received.mp_v6 == SAFI_NONE) { - log_peer_warnx(&peer->conf, "bad AFI, " - "IPv6 disabled"); - rde_update_err(peer, ERR_UPDATE, - ERR_UPD_OPTATTR, NULL, 0); - goto done; - } + + if (afi2aid(afi, safi, &aid) == -1) { + log_peer_warnx(&peer->conf, + "bad AFI/SAFI pair in withdraw"); + rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, + NULL, 0); + goto done; + } + + if (peer->capa.mp[aid] == 0) { + log_peer_warnx(&peer->conf, + "bad withdraw, %s disabled", aid2str(aid)); + rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, + NULL, 0); + goto done; + } + + if ((asp->flags & ~F_ATTR_MP_UNREACH) == 0 && mplen == 0) { + /* EoR marker */ + peer_recv_eor(peer, aid); + } + + switch (aid) { + case AID_INET6: while (mplen > 0) { if ((pos = rde_update_get_prefix6(mpp, mplen, &prefix, &prefixlen)) == -1) { @@ -926,6 +1137,32 @@ rde_update_dispatch(struct imsg *imsg) rde_update_withdraw(peer, &prefix, prefixlen); } break; + case AID_VPN_IPv4: + while (mplen > 0) { + if ((pos = rde_update_get_vpn4(mpp, mplen, + &prefix, &prefixlen)) == -1) { + log_peer_warnx(&peer->conf, + "bad VPNv4 withdraw prefix"); + rde_update_err(peer, ERR_UPDATE, + ERR_UPD_OPTATTR, + mpa.unreach, mpa.unreach_len); + goto done; + } + if (prefixlen > 32) { + log_peer_warnx(&peer->conf, + "bad VPNv4 withdraw prefix"); + rde_update_err(peer, ERR_UPDATE, + ERR_UPD_OPTATTR, + mpa.unreach, mpa.unreach_len); + goto done; + } + + mpp += pos; + mplen -= pos; + + rde_update_withdraw(peer, &prefix, prefixlen); + } + break; default: /* silently ignore unsupported multiprotocol AF */ break; @@ -963,9 +1200,9 @@ rde_update_dispatch(struct imsg *imsg) p += pos; nlri_len -= pos; - if (peer->capa_received.mp_v4 == SAFI_NONE && - peer->capa_received.mp_v6 != SAFI_NONE) { - log_peer_warnx(&peer->conf, "bad AFI, IPv4 disabled"); + if (peer->capa.mp[AID_INET] == 0) { + log_peer_warnx(&peer->conf, + "bad update, %s disabled", aid2str(AID_INET)); rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, NULL, 0); goto done; @@ -995,6 +1232,22 @@ rde_update_dispatch(struct imsg *imsg) safi = *mpp++; mplen--; + if (afi2aid(afi, safi, &aid) == -1) { + log_peer_warnx(&peer->conf, + "bad AFI/SAFI pair in update"); + rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, + NULL, 0); + goto done; + } + + if (peer->capa.mp[aid] == 0) { + log_peer_warnx(&peer->conf, + "bad update, %s disabled", aid2str(aid)); + rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, + NULL, 0); + goto done; + } + /* * this works because asp is not linked. * But first unlock the previously locked nexthop. @@ -1004,8 +1257,8 @@ rde_update_dispatch(struct imsg *imsg) (void)nexthop_delete(asp->nexthop); asp->nexthop = NULL; } - if ((pos = rde_get_mp_nexthop(mpp, mplen, afi, asp)) == -1) { - log_peer_warnx(&peer->conf, "bad IPv6 nlri prefix"); + if ((pos = rde_get_mp_nexthop(mpp, mplen, aid, asp, peer)) == -1) { + log_peer_warnx(&peer->conf, "bad nlri prefix"); rde_update_err(peer, ERR_UPDATE, ERR_UPD_OPTATTR, mpa.reach, mpa.reach_len); goto done; @@ -1013,16 +1266,8 @@ rde_update_dispatch(struct imsg *imsg) mpp += pos; mplen -= pos; - switch (afi) { - case AFI_IPv6: - if (peer->capa_received.mp_v6 == SAFI_NONE) { - log_peer_warnx(&peer->conf, "bad AFI, " - "IPv6 disabled"); - rde_update_err(peer, ERR_UPDATE, - ERR_UPD_OPTATTR, NULL, 0); - goto done; - } - + switch (aid) { + case AID_INET6: while (mplen > 0) { if ((pos = rde_update_get_prefix6(mpp, mplen, &prefix, &prefixlen)) == -1) { @@ -1058,6 +1303,42 @@ rde_update_dispatch(struct imsg *imsg) } break; + case AID_VPN_IPv4: + while (mplen > 0) { + if ((pos = rde_update_get_vpn4(mpp, mplen, + &prefix, &prefixlen)) == -1) { + log_peer_warnx(&peer->conf, + "bad VPNv4 nlri prefix"); + rde_update_err(peer, ERR_UPDATE, + ERR_UPD_OPTATTR, + mpa.reach, mpa.reach_len); + goto done; + } + if (prefixlen > 32) { + rde_update_err(peer, ERR_UPDATE, + ERR_UPD_OPTATTR, + mpa.reach, mpa.reach_len); + goto done; + } + + mpp += pos; + mplen -= pos; + + rde_update_update(peer, asp, &prefix, + prefixlen); + + /* max prefix checker */ + if (peer->conf.max_prefix && + peer->prefix_cnt >= peer->conf.max_prefix) { + log_peer_warnx(&peer->conf, + "prefix limit reached"); + rde_update_err(peer, ERR_CEASE, + ERR_CEASE_MAX_PREFIX, NULL, 0); + goto done; + } + + } + break; default: /* silently ignore unsupported multiprotocol AF */ break; @@ -1085,7 +1366,8 @@ rde_update_update(struct rde_peer *peer, struct bgpd_addr *prefix, u_int8_t prefixlen) { struct rde_aspath *fasp; - int r = 0; + enum filter_actions action; + int r = 0, f = 0; u_int16_t i; peer->prefix_rcvd_update++; @@ -1095,18 +1377,24 @@ rde_update_update(struct rde_peer *peer, for (i = 1; i < rib_size; i++) { /* input filter */ - if (rde_filter(i, &fasp, rules_l, peer, asp, prefix, prefixlen, - peer, DIR_IN) == ACTION_DENY) - goto done; + action = rde_filter(i, &fasp, rules_l, peer, asp, prefix, + prefixlen, peer, DIR_IN); if (fasp == NULL) fasp = asp; - rde_update_log("update", i, peer, &fasp->nexthop->exit_nexthop, - prefix, prefixlen); - r += path_update(&ribs[i], peer, fasp, prefix, prefixlen); + if (action == ACTION_ALLOW) { + rde_update_log("update", i, peer, + &fasp->nexthop->exit_nexthop, prefix, prefixlen); + r += path_update(&ribs[i], peer, fasp, prefix, + prefixlen); + } else if (prefix_remove(&ribs[i], peer, prefix, prefixlen, + 0)) { + rde_update_log("filtered withdraw", i, peer, + NULL, prefix, prefixlen); + f++; + } -done: /* free modified aspath */ if (fasp != asp) path_put(fasp); @@ -1114,6 +1402,8 @@ done: if (r) peer->prefix_cnt++; + else if (f) + peer->prefix_cnt--; } void @@ -1152,7 +1442,7 @@ rde_update_withdraw(struct rde_peer *pee } while (0) #define CHECK_FLAGS(s, t, m) \ - (((s) & ~(ATTR_EXTLEN | (m))) == (t)) + (((s) & ~(ATTR_DEFMASK | (m))) == (t)) int rde_attr_parse(u_char *p, u_int16_t len, struct rde_peer *peer, @@ -1161,6 +1451,7 @@ rde_attr_parse(u_char *p, u_int16_t len, struct bgpd_addr nexthop; u_char *op = p, *npath; u_int32_t tmp32; + int err; u_int16_t attr_len, nlen; u_int16_t plen = 0; u_int8_t flags; @@ -1195,6 +1486,7 @@ bad_len: switch (type) { case ATTR_UNDEF: /* ignore and drop path attributes with a type code of 0 */ + plen += attr_len; break; case ATTR_ORIGIN: if (attr_len != 1) @@ -1220,7 +1512,17 @@ bad_flags: case ATTR_ASPATH: if (!CHECK_FLAGS(flags, ATTR_WELL_KNOWN, 0)) goto bad_flags; - if (aspath_verify(p, attr_len, rde_as4byte(peer)) != 0) { + err = aspath_verify(p, attr_len, rde_as4byte(peer)); + if (err == AS_ERR_SOFT) { + /* + * soft errors like unexpected segment types are + * not considered fatal and the path is just + * marked invalid. + */ + a->flags |= F_ATTR_PARSE_ERR; + log_peer_warnx(&peer->conf, "bad ASPATH, " + "path invalidated and prefix withdrawn"); + } else if (err != 0) { rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, NULL, 0); return (-1); @@ -1248,7 +1550,7 @@ bad_flags: a->flags |= F_ATTR_NEXTHOP; bzero(&nexthop, sizeof(nexthop)); - nexthop.af = AF_INET; + nexthop.aid = AID_INET; UPD_READ(&nexthop.v4.s_addr, p, plen, 4); /* * Check if the nexthop is a valid IP address. We consider @@ -1305,9 +1607,21 @@ bad_flags: goto optattr; case ATTR_AGGREGATOR: if ((!rde_as4byte(peer) && attr_len != 6) || - (rde_as4byte(peer) && attr_len != 8)) - goto bad_len; - if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, 0)) + (rde_as4byte(peer) && attr_len != 8)) { + /* + * ignore attribute in case of error as per + * draft-ietf-idr-optional-transitive-00.txt + * but only if partial bit is set + */ + if ((flags & ATTR_PARTIAL) == 0) + goto bad_len; + log_peer_warnx(&peer->conf, "bad AGGREGATOR, " + "partial attribute ignored"); + plen += attr_len; + break; + } + if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, + ATTR_PARTIAL)) goto bad_flags; if (!rde_as4byte(peer)) { /* need to inflate aggregator AS to 4-byte */ @@ -1323,8 +1637,35 @@ bad_flags: /* 4-byte ready server take the default route */ goto optattr; case ATTR_COMMUNITIES: - if ((attr_len & 0x3) != 0) - goto bad_len; + if (attr_len % 4 != 0) { + /* + * mark update as bad and withdraw all routes as per + * draft-ietf-idr-optional-transitive-00.txt + * but only if partial bit is set + */ + if ((flags & ATTR_PARTIAL) == 0) + goto bad_len; + a->flags |= F_ATTR_PARSE_ERR; + log_peer_warnx(&peer->conf, "bad COMMUNITIES, " + "path invalidated and prefix withdrawn"); + } + if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, + ATTR_PARTIAL)) + goto bad_flags; + goto optattr; + case ATTR_EXT_COMMUNITIES: + if (attr_len % 8 != 0) { + /* + * mark update as bad and withdraw all routes as per + * draft-ietf-idr-optional-transitive-00.txt + * but only if partial bit is set + */ + if ((flags & ATTR_PARTIAL) == 0) + goto bad_len; + a->flags |= F_ATTR_PARSE_ERR; + log_peer_warnx(&peer->conf, "bad EXT_COMMUNITIES, " + "path invalidated and prefix withdrawn"); + } if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_PARTIAL)) goto bad_flags; @@ -1336,7 +1677,7 @@ bad_flags: goto bad_flags; goto optattr; case ATTR_CLUSTER_LIST: - if ((attr_len & 0x3) != 0) + if (attr_len % 4 != 0) goto bad_len; if (!CHECK_FLAGS(flags, ATTR_OPTIONAL, 0)) goto bad_flags; @@ -1370,8 +1711,15 @@ bad_flags: plen += attr_len; break; case ATTR_AS4_AGGREGATOR: - if (attr_len != 8) - goto bad_len; + if (attr_len != 8) { + /* see ATTR_AGGREGATOR ... */ + if ((flags & ATTR_PARTIAL) == 0) + goto bad_len; + log_peer_warnx(&peer->conf, "bad AS4_AGGREGATOR, " + "partial attribute ignored"); + plen += attr_len; + break; + } if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_PARTIAL)) goto bad_flags; -@@ -1381,19 +1729,30 @@ bad_flags: +@@ -1381,20 +1729,31 @@ bad_flags: if (!CHECK_FLAGS(flags, ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_PARTIAL)) goto bad_flags; - if (aspath_verify(p, attr_len, 1) != 0) { + if ((err = aspath_verify(p, attr_len, 1)) != 0) { /* * XXX RFC does not specify how to handle errors. * XXX Instead of dropping the session because of a - * XXX bad path just mark the full update as not - * XXX loop-free the update is no longer eligible and - * XXX will not be considered for routing or - * XXX redistribution. Something better is needed. + * XXX bad path just mark the full update as having + * XXX a parse error which makes the update no longer + * XXX eligible and will not be considered for routing + * XXX or redistribution. + * XXX We follow draft-ietf-idr-optional-transitive + * XXX by looking at the partial bit. + * XXX Consider soft errors similar to a partial attr. */ - a->flags |= F_ATTR_LOOP; - goto optattr; - } - a->flags |= F_ATTR_AS4BYTE_NEW; +- goto optattr; + if (flags & ATTR_PARTIAL || err == AS_ERR_SOFT) { + a->flags |= F_ATTR_PARSE_ERR; + log_peer_warnx(&peer->conf, "bad AS4_PATH, " + "path invalidated and prefix withdrawn"); + goto optattr; + } else { + rde_update_err(peer, ERR_UPDATE, ERR_UPD_ASPATH, + NULL, 0); + return (-1); + } + } + a->flags |= F_ATTR_AS4BYTE_NEW; - goto optattr; ++ goto optattr; default: if ((flags & ATTR_OPTIONAL) == 0) { + rde_update_err(peer, ERR_UPDATE, ERR_UPD_UNKNWN_WK_ATTR, @@ -1415,6 +1774,42 @@ bad_list: return (plen); } + +int +rde_attr_add(struct rde_aspath *a, u_char *p, u_int16_t len) +{ + u_int16_t attr_len; + u_int16_t plen = 0; + u_int8_t flags; + u_int8_t type; + u_int8_t tmp8; + + if (a == NULL) /* no aspath, nothing to do */ + return (0); + if (len < 3) + return (-1); + + UPD_READ(&flags, p, plen, 1); + UPD_READ(&type, p, plen, 1); + + if (flags & ATTR_EXTLEN) { + if (len - plen < 2) + return (-1); + UPD_READ(&attr_len, p, plen, 2); + attr_len = ntohs(attr_len); + } else { + UPD_READ(&tmp8, p, plen, 1); + attr_len = tmp8; + } + + if (len - plen < attr_len) + return (-1); + + if (attr_optadd(a, flags, type, p, attr_len) == -1) + return (-1); + return (0); +} + #undef UPD_READ #undef CHECK_FLAGS @@ -1440,8 +1835,8 @@ rde_attr_missing(struct rde_aspath *a, i } int -rde_get_mp_nexthop(u_char *data, u_int16_t len, u_int16_t afi, - struct rde_aspath *asp) +rde_get_mp_nexthop(u_char *data, u_int16_t len, u_int8_t aid, + struct rde_aspath *asp, struct rde_peer *peer) { struct bgpd_addr nexthop; u_int8_t totlen, nhlen; @@ -1457,8 +1852,9 @@ rde_get_mp_nexthop(u_char *data, u_int16 return (-1); bzero(&nexthop, sizeof(nexthop)); - switch (afi) { - case AFI_IPv6: + nexthop.aid = aid; + switch (aid) { + case AID_INET6: /* * RFC2545 describes that there may be a link-local * address carried in nexthop. Yikes! -@@ -1471,72 +1867,143 @@ rde_get_mp_nexthop(u_char *data, u_int16 +@@ -1471,72 +1867,144 @@ rde_get_mp_nexthop(u_char *data, u_int16 log_warnx("bad multiprotocol nexthop, bad size"); return (-1); } - nexthop.af = AF_INET6; memcpy(&nexthop.v6.s6_addr, data, 16); - asp->nexthop = nexthop_get(&nexthop); +#if defined(__KAME__) && defined(IPV6_LINKLOCAL_PEER) + if (IN6_IS_ADDR_LINKLOCAL(&nexthop.v6) && + peer->conf.lliface[0]) { + int ifindex; + + ifindex = if_nametoindex(peer->conf.lliface); -+ if (ifindex != 0) ++ if (ifindex != 0) { + SET_IN6_LINKLOCAL_IFINDEX(nexthop.v6, ifindex); -+ else ++ nexthop.scope_id = ifindex; ++ } else + log_warnx("bad interface: %s", peer->conf.lliface); + } +#endif + break; + case AID_VPN_IPv4: /* - * lock the nexthop because it is not yet linked else - * withdraws may remove this nexthop which in turn would - * cause a use after free error. + * Neither RFC4364 nor RFC3107 specify the format of the + * nexthop in an explicit way. The quality of RFC went down + * the toilet the larger the number got. + * RFC4364 is very confusing about VPN-IPv4 address and the + * VPN-IPv4 prefix that carries also a MPLS label. + * So the nexthop is a 12-byte address with a 64bit RD and + * an IPv4 address following. In the nexthop case the RD can + * be ignored. + * Since the nexthop has to be in the main IPv4 table just + * create an AID_INET nexthop. So we don't need to handle + * AID_VPN_IPv4 in nexthop and kroute. */ - asp->nexthop->refcnt++; - - /* ignore reserved (old SNPA) field as per RFC 4760 */ - totlen += nhlen + 1; - data += nhlen + 1; - - return (totlen); - default: - log_warnx("bad multiprotocol nexthop, bad AF"); + if (nhlen != 12) { + log_warnx("bad multiprotocol nexthop, bad size"); + return (-1); + } + data += sizeof(u_int64_t); + nexthop.aid = AID_INET; + memcpy(&nexthop.v4, data, sizeof(nexthop.v4)); break; + default: + log_warnx("bad multiprotocol nexthop, bad AID"); + return (-1); } - return (-1); + asp->nexthop = nexthop_get(&nexthop); + /* + * lock the nexthop because it is not yet linked else + * withdraws may remove this nexthop which in turn would + * cause a use after free error. + */ + asp->nexthop->refcnt++; + + /* ignore reserved (old SNPA) field as per RFC4760 */ + totlen += nhlen + 1; + data += nhlen + 1; + + return (totlen); +} + +int +rde_update_extract_prefix(u_char *p, u_int16_t len, void *va, + u_int8_t pfxlen, u_int8_t max) +{ + static u_char addrmask[] = { + 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff }; + u_char *a = va; + int i; + u_int16_t plen = 0; + + for (i = 0; pfxlen && i < max; i++) { + if (len <= plen) + return (-1); + if (pfxlen < 8) { + a[i] = *p++ & addrmask[pfxlen]; + plen++; + break; + } else { + a[i] = *p++; + plen++; + pfxlen -= 8; + } + } + return (plen); } int rde_update_get_prefix(u_char *p, u_int16_t len, struct bgpd_addr *prefix, u_int8_t *prefixlen) { - int i; - u_int8_t pfxlen; - u_int16_t plen; - union { - struct in_addr a32; - u_int8_t a8[4]; - } addr; + u_int8_t pfxlen; + int plen; if (len < 1) return (-1); - memcpy(&pfxlen, p, 1); - p += 1; - plen = 1; + pfxlen = *p++; + len--; bzero(prefix, sizeof(struct bgpd_addr)); - addr.a32.s_addr = 0; - for (i = 0; i <= 3; i++) { - if (pfxlen > i * 8) { - if (len - plen < 1) - return (-1); - memcpy(&addr.a8[i], p++, 1); - plen++; - } - } - prefix->af = AF_INET; - prefix->v4.s_addr = addr.a32.s_addr; + prefix->aid = AID_INET; *prefixlen = pfxlen; - return (plen); + if ((plen = rde_update_extract_prefix(p, len, &prefix->v4, pfxlen, + sizeof(prefix->v4))) == -1) + return (-1); + + return (plen + 1); /* pfxlen needs to be added */ } int rde_update_get_prefix6(u_char *p, u_int16_t len, struct bgpd_addr *prefix, u_int8_t *prefixlen) { - int i; + int plen; u_int8_t pfxlen; - u_int16_t plen; + + if (len < 1) + return (-1); + + pfxlen = *p++; + len--; + + bzero(prefix, sizeof(struct bgpd_addr)); + prefix->aid = AID_INET6; + *prefixlen = pfxlen; + + if ((plen = rde_update_extract_prefix(p, len, &prefix->v6, pfxlen, + sizeof(prefix->v6))) == -1) + return (-1); + + return (plen + 1); /* pfxlen needs to be added */ +} + +int +rde_update_get_vpn4(u_char *p, u_int16_t len, struct bgpd_addr *prefix, + u_int8_t *prefixlen) +{ + int rv, done = 0; + u_int8_t pfxlen; + u_int16_t plen; if (len < 1) return (-1); -@@ -1546,25 +2013,50 @@ rde_update_get_prefix6(u_char *p, u_int1 +@@ -1546,25 +2014,50 @@ rde_update_get_prefix6(u_char *p, u_int1 plen = 1; bzero(prefix, sizeof(struct bgpd_addr)); - for (i = 0; i <= 15; i++) { - if (pfxlen > i * 8) { - if (len - plen < 1) - return (-1); - memcpy(&prefix->v6.s6_addr[i], p++, 1); - plen++; - } - } - prefix->af = AF_INET6; + + /* label stack */ + do { + if (len - plen < 3 || pfxlen < 3 * 8) + return (-1); + if (prefix->vpn4.labellen + 3U > + sizeof(prefix->vpn4.labelstack)) + return (-1); + prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++; + prefix->vpn4.labelstack[prefix->vpn4.labellen++] = *p++; + prefix->vpn4.labelstack[prefix->vpn4.labellen] = *p++; + if (prefix->vpn4.labelstack[prefix->vpn4.labellen] & + BGP_MPLS_BOS) + done = 1; + prefix->vpn4.labellen++; + plen += 3; + pfxlen -= 3 * 8; + } while (!done); + + /* RD */ + if (len - plen < (int)sizeof(u_int64_t) || + pfxlen < sizeof(u_int64_t) * 8) + return (-1); + memcpy(&prefix->vpn4.rd, p, sizeof(u_int64_t)); + pfxlen -= sizeof(u_int64_t) * 8; + p += sizeof(u_int64_t); + plen += sizeof(u_int64_t); + + /* prefix */ + prefix->aid = AID_VPN_IPv4; *prefixlen = pfxlen; - return (plen); + if ((rv = rde_update_extract_prefix(p, len, &prefix->vpn4.addr, + pfxlen, sizeof(prefix->vpn4.addr))) == -1) + return (-1); + + return (plen + rv); } void rde_update_err(struct rde_peer *peer, u_int8_t error, u_int8_t suberr, void *data, u_int16_t size) { - struct buf *wbuf; + struct ibuf *wbuf; if ((wbuf = imsg_create(ibuf_se, IMSG_UPDATE_ERR, peer->conf.id, 0, size + sizeof(error) + sizeof(suberr))) == NULL) -@@ -1616,16 +2108,30 @@ rde_as4byte_fixup(struct rde_peer *peer, +@@ -1616,16 +2109,30 @@ rde_as4byte_fixup(struct rde_peer *peer, struct attr *nasp, *naggr, *oaggr; u_int32_t as; + /* + * if either ATTR_AS4_AGGREGATOR or ATTR_AS4_PATH is present + * try to fixup the attributes. + * Do not fixup if F_ATTR_PARSE_ERR is set. + */ + if (!(a->flags & F_ATTR_AS4BYTE_NEW) || a->flags & F_ATTR_PARSE_ERR) + return; + /* first get the attributes */ nasp = attr_optget(a, ATTR_AS4_PATH); naggr = attr_optget(a, ATTR_AS4_AGGREGATOR); if (rde_as4byte(peer)) { /* NEW session using 4-byte ASNs */ - if (nasp) + if (nasp) { + log_peer_warnx(&peer->conf, "uses 4-byte ASN " + "but sent AS4_PATH attribute."); attr_free(a, nasp); - if (naggr) + } + if (naggr) { + log_peer_warnx(&peer->conf, "uses 4-byte ASN " + "but sent AS4_AGGREGATOR attribute."); attr_free(a, naggr); + } return; } /* OLD session using 2-byte ASNs */ -@@ -1669,6 +2175,10 @@ rde_reflector(struct rde_peer *peer, str +@@ -1669,6 +2176,10 @@ rde_reflector(struct rde_peer *peer, str u_int16_t len; u_int32_t id; + /* do not consider updates with parse errors */ + if (asp->flags & F_ATTR_PARSE_ERR) + return; + /* check for originator id if eq router_id drop */ if ((a = attr_optget(asp, ATTR_ORIGINATOR_ID)) != NULL) { if (memcmp(&conf->bgpid, a->data, sizeof(conf->bgpid)) == 0) { -@@ -1677,10 +2187,10 @@ rde_reflector(struct rde_peer *peer, str +@@ -1677,10 +2188,10 @@ rde_reflector(struct rde_peer *peer, str return; } } else if (conf->flags & BGPD_FLAG_REFLECTOR) { - if (peer->conf.ebgp == 0) - id = htonl(peer->remote_bgpid); - else + if (peer->conf.ebgp) id = conf->bgpid; + else + id = htonl(peer->remote_bgpid); if (attr_optadd(asp, ATTR_OPTIONAL, ATTR_ORIGINATOR_ID, &id, sizeof(u_int32_t)) == -1) fatalx("attr_optadd failed but impossible"); -@@ -1724,17 +2234,17 @@ void +@@ -1724,17 +2235,17 @@ void rde_dump_rib_as(struct prefix *p, struct rde_aspath *asp, pid_t pid, int flags) { struct ctl_show_rib rib; - struct buf *wbuf; + struct ibuf *wbuf; struct attr *a; void *bp; + time_t staletime; u_int8_t l; bzero(&rib, sizeof(rib)); rib.lastchange = p->lastchange; rib.local_pref = asp->lpref; rib.med = asp->med; - rib.prefix_cnt = asp->prefix_cnt; - rib.active_cnt = asp->active_cnt; + rib.weight = asp->weight; strlcpy(rib.descr, asp->peer->conf.descr, sizeof(rib.descr)); memcpy(&rib.remote_addr, &asp->peer->remote_addr, sizeof(rib.remote_addr)); -@@ -1748,23 +2258,26 @@ rde_dump_rib_as(struct prefix *p, struct +@@ -1748,23 +2259,26 @@ rde_dump_rib_as(struct prefix *p, struct /* announced network may have a NULL nexthop */ bzero(&rib.true_nexthop, sizeof(rib.true_nexthop)); bzero(&rib.exit_nexthop, sizeof(rib.exit_nexthop)); - rib.true_nexthop.af = p->prefix->af; - rib.exit_nexthop.af = p->prefix->af; + rib.true_nexthop.aid = p->prefix->aid; + rib.exit_nexthop.aid = p->prefix->aid; } pt_getaddr(p->prefix, &rib.prefix); rib.prefixlen = p->prefix->prefixlen; rib.origin = asp->origin; rib.flags = 0; if (p->rib->active == p) - rib.flags |= F_RIB_ACTIVE; - if (asp->peer->conf.ebgp == 0) - rib.flags |= F_RIB_INTERNAL; + rib.flags |= F_PREF_ACTIVE; + if (!asp->peer->conf.ebgp) + rib.flags |= F_PREF_INTERNAL; if (asp->flags & F_PREFIX_ANNOUNCED) - rib.flags |= F_RIB_ANNOUNCE; + rib.flags |= F_PREF_ANNOUNCE; if (asp->nexthop == NULL || asp->nexthop->state == NEXTHOP_REACH) - rib.flags |= F_RIB_ELIGIBLE; + rib.flags |= F_PREF_ELIGIBLE; if (asp->flags & F_ATTR_LOOP) - rib.flags &= ~F_RIB_ELIGIBLE; + rib.flags &= ~F_PREF_ELIGIBLE; + staletime = asp->peer->staletime[p->prefix->aid]; + if (staletime && p->lastchange <= staletime) + rib.flags |= F_PREF_STALE; rib.aspath_len = aspath_length(asp->aspath); if ((wbuf = imsg_create(ibuf_se_ctl, IMSG_CTL_SHOW_RIB, 0, pid, -@@ -1784,13 +2297,13 @@ rde_dump_rib_as(struct prefix *p, struct +@@ -1784,13 +2298,13 @@ rde_dump_rib_as(struct prefix *p, struct IMSG_CTL_SHOW_RIB_ATTR, 0, pid, attr_optlen(a))) == NULL) return; - if ((bp = buf_reserve(wbuf, attr_optlen(a))) == NULL) { - buf_free(wbuf); + if ((bp = ibuf_reserve(wbuf, attr_optlen(a))) == NULL) { + ibuf_free(wbuf); return; } if (attr_write(bp, attr_optlen(a), a->flags, a->type, a->data, a->len) == -1) { - buf_free(wbuf); + ibuf_free(wbuf); return; } imsg_close(ibuf_se_ctl, wbuf); -@@ -1828,17 +2341,20 @@ rde_dump_filter(struct prefix *p, struct +@@ -1828,17 +2342,20 @@ rde_dump_filter(struct prefix *p, struct { struct rde_peer *peer; - if (req->flags & F_CTL_ADJ_IN || + if (req->flags & F_CTL_ADJ_IN || !(req->flags & (F_CTL_ADJ_IN|F_CTL_ADJ_OUT))) { if (req->peerid && req->peerid != p->aspath->peer->conf.id) return; - if (req->type == IMSG_CTL_SHOW_RIB_AS && - !aspath_match(p->aspath->aspath, req->as.type, req->as.as)) + if (req->type == IMSG_CTL_SHOW_RIB_AS && + !aspath_match(p->aspath->aspath->data, + p->aspath->aspath->len, req->as.type, req->as.as)) return; if (req->type == IMSG_CTL_SHOW_RIB_COMMUNITY && - !rde_filter_community(p->aspath, req->community.as, + !community_match(p->aspath, req->community.as, req->community.type)) return; + if ((req->flags & F_CTL_ACTIVE) && p->rib->active != p) + return; rde_dump_rib_as(p, p->aspath, req->pid, req->flags); } else if (req->flags & F_CTL_ADJ_OUT) { if (p->rib->active != p) -@@ -1872,7 +2388,7 @@ rde_dump_prefix_upcall(struct rib_entry +@@ -1872,7 +2389,7 @@ rde_dump_prefix_upcall(struct rib_entry pt = re->prefix; pt_getaddr(pt, &addr); - if (addr.af != ctx->req.prefix.af) + if (addr.aid != ctx->req.prefix.aid) return; if (ctx->req.prefixlen > pt->prefixlen) return; -@@ -1889,6 +2405,7 @@ rde_dump_ctx_new(struct ctl_show_rib_req +@@ -1889,6 +2406,7 @@ rde_dump_ctx_new(struct ctl_show_rib_req struct rib_entry *re; u_int error; u_int16_t id; + u_int8_t hostplen = 0; if ((ctx = calloc(1, sizeof(*ctx))) == NULL) { log_warn("rde_dump_ctx_new"); -@@ -1902,6 +2419,7 @@ rde_dump_ctx_new(struct ctl_show_rib_req +@@ -1902,6 +2420,7 @@ rde_dump_ctx_new(struct ctl_show_rib_req error = CTL_RES_NOSUCHPEER; imsg_compose(ibuf_se_ctl, IMSG_CTL_RESULT, 0, pid, -1, &error, sizeof(error)); + free(ctx); return; } -@@ -1924,7 +2442,18 @@ rde_dump_ctx_new(struct ctl_show_rib_req +@@ -1924,7 +2443,18 @@ rde_dump_ctx_new(struct ctl_show_rib_req ctx->ribctx.ctx_upcall = rde_dump_prefix_upcall; break; } - if (req->prefixlen == 32) + switch (req->prefix.aid) { + case AID_INET: + case AID_VPN_IPv4: + hostplen = 32; + break; + case AID_INET6: + hostplen = 128; + break; + default: + fatalx("rde_dump_ctx_new: unknown af"); + } + if (req->prefixlen == hostplen) re = rib_lookup(&ribs[id], &req->prefix); else re = rib_get(&ribs[id], &req->prefix, req->prefixlen); -@@ -1937,7 +2466,7 @@ rde_dump_ctx_new(struct ctl_show_rib_req +@@ -1937,7 +2467,7 @@ rde_dump_ctx_new(struct ctl_show_rib_req } ctx->ribctx.ctx_done = rde_dump_done; ctx->ribctx.ctx_arg = ctx; - ctx->ribctx.ctx_af = ctx->req.af; + ctx->ribctx.ctx_aid = ctx->req.aid; rib_dump_r(&ctx->ribctx); } -@@ -1971,13 +2500,17 @@ rde_dump_mrt_new(struct mrt *mrt, pid_t +@@ -1971,13 +2501,17 @@ rde_dump_mrt_new(struct mrt *mrt, pid_t free(ctx); return; } + + if (ctx->mrt.type == MRT_TABLE_DUMP_V2) + mrt_dump_v2_hdr(&ctx->mrt, conf, &peerlist); + ctx->ribctx.ctx_count = RDE_RUNNER_ROUNDS; ctx->ribctx.ctx_rib = &ribs[id]; ctx->ribctx.ctx_upcall = mrt_dump_upcall; - ctx->ribctx.ctx_done = mrt_dump_done; + ctx->ribctx.ctx_done = mrt_done; ctx->ribctx.ctx_arg = &ctx->mrt; - ctx->ribctx.ctx_af = AF_UNSPEC; - LIST_INSERT_HEAD(&rde_mrts, &ctx->mrt, entry); + ctx->ribctx.ctx_aid = AID_UNSPEC; + LIST_INSERT_HEAD(&rde_mrts, ctx, entry); rde_mrt_cnt++; rib_dump_r(&ctx->ribctx); } -@@ -1985,13 +2518,25 @@ rde_dump_mrt_new(struct mrt *mrt, pid_t +@@ -1985,13 +2519,25 @@ rde_dump_mrt_new(struct mrt *mrt, pid_t /* * kroute specific functions */ +int +rde_rdomain_import(struct rde_aspath *asp, struct rdomain *rd) +{ + struct filter_set *s; + + TAILQ_FOREACH(s, &rd->import, entry) { + if (community_ext_match(asp, &s->action.ext_community, 0)) + return (1); + } + return (0); +} + void -rde_send_kroute(struct prefix *new, struct prefix *old) +rde_send_kroute(struct prefix *new, struct prefix *old, u_int16_t ribid) { - struct kroute_label kl; - struct kroute6_label kl6; + struct kroute_full kr; struct bgpd_addr addr; struct prefix *p; + struct rdomain *rd; enum imsg_type type; /* -@@ -2011,43 +2556,43 @@ rde_send_kroute(struct prefix *new, stru +@@ -2011,43 +2557,43 @@ rde_send_kroute(struct prefix *new, stru } pt_getaddr(p->prefix, &addr); - switch (addr.af) { - case AF_INET: - bzero(&kl, sizeof(kl)); - kl.kr.prefix.s_addr = addr.v4.s_addr; - kl.kr.prefixlen = p->prefix->prefixlen; - if (p->aspath->flags & F_NEXTHOP_REJECT) - kl.kr.flags |= F_REJECT; - if (p->aspath->flags & F_NEXTHOP_BLACKHOLE) - kl.kr.flags |= F_BLACKHOLE; - if (type == IMSG_KROUTE_CHANGE) - kl.kr.nexthop.s_addr = - p->aspath->nexthop->true_nexthop.v4.s_addr; - strlcpy(kl.label, rtlabel_id2name(p->aspath->rtlabelid), - sizeof(kl.label)); - if (imsg_compose(ibuf_main, type, 0, 0, -1, &kl, - sizeof(kl)) == -1) - fatal("imsg_compose error"); + bzero(&kr, sizeof(kr)); + memcpy(&kr.prefix, &addr, sizeof(kr.prefix)); + kr.prefixlen = p->prefix->prefixlen; + if (p->aspath->flags & F_NEXTHOP_REJECT) + kr.flags |= F_REJECT; + if (p->aspath->flags & F_NEXTHOP_BLACKHOLE) + kr.flags |= F_BLACKHOLE; + if (type == IMSG_KROUTE_CHANGE) + memcpy(&kr.nexthop, &p->aspath->nexthop->true_nexthop, + sizeof(kr.nexthop)); + strlcpy(kr.label, rtlabel_id2name(p->aspath->rtlabelid), + sizeof(kr.label)); + + switch (addr.aid) { + case AID_VPN_IPv4: + if (ribid != 1) + /* not Loc-RIB, no update for VPNs */ + break; + + SIMPLEQ_FOREACH(rd, rdomains_l, entry) { + if (!rde_rdomain_import(p->aspath, rd)) + continue; + /* must send exit_nexthop so that correct MPLS tunnel + * is chosen + */ + if (type == IMSG_KROUTE_CHANGE) + memcpy(&kr.nexthop, + &p->aspath->nexthop->exit_nexthop, + sizeof(kr.nexthop)); + if (imsg_compose(ibuf_main, type, rd->rtableid, 0, -1, + &kr, sizeof(kr)) == -1) + fatal("imsg_compose error"); + } break; - case AF_INET6: - bzero(&kl6, sizeof(kl6)); - memcpy(&kl6.kr.prefix, &addr.v6, sizeof(struct in6_addr)); - kl6.kr.prefixlen = p->prefix->prefixlen; - if (p->aspath->flags & F_NEXTHOP_REJECT) - kl6.kr.flags |= F_REJECT; - if (p->aspath->flags & F_NEXTHOP_BLACKHOLE) - kl6.kr.flags |= F_BLACKHOLE; - if (type == IMSG_KROUTE_CHANGE) { - type = IMSG_KROUTE6_CHANGE; - memcpy(&kl6.kr.nexthop, - &p->aspath->nexthop->true_nexthop.v6, - sizeof(struct in6_addr)); - } else - type = IMSG_KROUTE6_DELETE; - strlcpy(kl6.label, rtlabel_id2name(p->aspath->rtlabelid), - sizeof(kl6.label)); - if (imsg_compose(ibuf_main, type, 0, 0, -1, &kl6, - sizeof(kl6)) == -1) + default: + if (imsg_compose(ibuf_main, type, ribs[ribid].rtableid, 0, -1, + &kr, sizeof(kr)) == -1) fatal("imsg_compose error"); break; } -@@ -2098,7 +2643,6 @@ rde_send_pftable_commit(void) +@@ -2098,7 +2644,6 @@ rde_send_pftable_commit(void) void rde_send_nexthop(struct bgpd_addr *next, int valid) { - size_t size; int type; if (valid) -@@ -2106,8 +2650,6 @@ rde_send_nexthop(struct bgpd_addr *next, +@@ -2106,8 +2651,6 @@ rde_send_nexthop(struct bgpd_addr *next, else type = IMSG_NEXTHOP_REMOVE; - size = sizeof(struct bgpd_addr); - if (imsg_compose(ibuf_main, type, 0, 0, -1, next, sizeof(struct bgpd_addr)) == -1) fatal("imsg_compose error"); -@@ -2201,6 +2743,10 @@ rde_softreconfig_in(struct rib_entry *re +@@ -2201,6 +2744,10 @@ rde_softreconfig_in(struct rib_entry *re continue; for (i = 1; i < rib_size; i++) { + /* only active ribs need a softreconfig rerun */ + if (ribs[i].state != RECONF_KEEP) + continue; + /* check if prefix changed */ oa = rde_filter(i, &oasp, rules_l, peer, asp, &addr, pt->prefixlen, peer, DIR_IN); -@@ -2228,7 +2774,7 @@ rde_softreconfig_in(struct rib_entry *re +@@ -2228,7 +2775,7 @@ rde_softreconfig_in(struct rib_entry *re if (path_compare(nasp, oasp) == 0) goto done; /* send update */ - path_update(&ribs[1], peer, nasp, &addr, + path_update(&ribs[i], peer, nasp, &addr, pt->prefixlen); } -@@ -2241,6 +2787,104 @@ done: +@@ -2241,6 +2788,104 @@ done: } } +void +rde_softreconfig_load(struct rib_entry *re, void *ptr) +{ + struct rib *rib = ptr; + struct prefix *p, *np; + struct pt_entry *pt; + struct rde_peer *peer; + struct rde_aspath *asp, *nasp; + enum filter_actions action; + struct bgpd_addr addr; + + pt = re->prefix; + pt_getaddr(pt, &addr); + for (p = LIST_FIRST(&re->prefix_h); p != NULL; p = np) { + np = LIST_NEXT(p, rib_l); + + /* store aspath as prefix may change till we're done */ + asp = p->aspath; + peer = asp->peer; + + action = rde_filter(rib->id, &nasp, newrules, peer, asp, &addr, + pt->prefixlen, peer, DIR_IN); + nasp = nasp != NULL ? nasp : asp; + + if (action == ACTION_ALLOW) { + /* update Local-RIB */ + path_update(rib, peer, nasp, &addr, pt->prefixlen); + } + + if (nasp != asp) + path_put(nasp); + } +} + +void +rde_softreconfig_load_peer(struct rib_entry *re, void *ptr) +{ + struct rde_peer *peer = ptr; + struct prefix *p = re->active; + struct pt_entry *pt; + struct rde_aspath *nasp; + enum filter_actions na; + struct bgpd_addr addr; + + pt = re->prefix; + pt_getaddr(pt, &addr); + + /* check if prefix was announced */ + if (up_test_update(peer, p) != 1) + return; + + na = rde_filter(re->ribid, &nasp, newrules, peer, p->aspath, + &addr, pt->prefixlen, p->aspath->peer, DIR_OUT); + nasp = nasp != NULL ? nasp : p->aspath; + + if (na == ACTION_DENY) + /* nothing todo */ + goto done; + + /* send update */ + up_generate(peer, nasp, &addr, pt->prefixlen); +done: + if (nasp != p->aspath) + path_put(nasp); +} + +void +rde_softreconfig_unload_peer(struct rib_entry *re, void *ptr) +{ + struct rde_peer *peer = ptr; + struct prefix *p = re->active; + struct pt_entry *pt; + struct rde_aspath *oasp; + enum filter_actions oa; + struct bgpd_addr addr; + + pt = re->prefix; + pt_getaddr(pt, &addr); + + /* check if prefix was announced */ + if (up_test_update(peer, p) != 1) + return; + + oa = rde_filter(re->ribid, &oasp, rules_l, peer, p->aspath, + &addr, pt->prefixlen, p->aspath->peer, DIR_OUT); + oasp = oasp != NULL ? oasp : p->aspath; + + if (oa == ACTION_DENY) + /* nothing todo */ + goto done; + + /* send withdraw */ + up_generate(peer, NULL, &addr, pt->prefixlen); +done: + if (oasp != p->aspath) + path_put(oasp); +} + /* * update specific functions */ -@@ -2252,7 +2896,7 @@ rde_up_dump_upcall(struct rib_entry *re, +@@ -2252,7 +2897,7 @@ rde_up_dump_upcall(struct rib_entry *re, struct rde_peer *peer = ptr; if (re->ribid != peer->ribid) - fatalx("King Bula: monsterous evil horror."); + fatalx("King Bula: monstrous evil horror."); if (re->active == NULL) return; up_generate_updates(rules_l, peer, re->active, NULL); -@@ -2265,7 +2909,7 @@ rde_generate_updates(u_int16_t ribid, st +@@ -2265,7 +2910,7 @@ rde_generate_updates(u_int16_t ribid, st /* * If old is != NULL we know it was active and should be removed. - * If new is != NULL we know it is reachable and then we should + * If new is != NULL we know it is reachable and then we should * generate an update. */ if (old == NULL && new == NULL) -@@ -2286,7 +2930,7 @@ void +@@ -2286,7 +2931,7 @@ void rde_update_queue_runner(void) { struct rde_peer *peer; - int r, sent, max = RDE_RUNNER_ROUNDS; + int r, sent, max = RDE_RUNNER_ROUNDS, eor = 0; u_int16_t len, wd_len, wpos; len = sizeof(queue_buf) - MSGSIZE_HEADER; -@@ -2300,7 +2944,7 @@ rde_update_queue_runner(void) +@@ -2300,7 +2945,7 @@ rde_update_queue_runner(void) /* first withdraws */ wpos = 2; /* reserve space for the length field */ r = up_dump_prefix(queue_buf + wpos, len - wpos - 2, - &peer->withdraws, peer); + &peer->withdraws[AID_INET], peer); wd_len = r; /* write withdraws length filed */ wd_len = htons(wd_len); -@@ -2310,31 +2954,49 @@ rde_update_queue_runner(void) +@@ -2310,31 +2955,49 @@ rde_update_queue_runner(void) /* now bgp path attributes */ r = up_dump_attrnlri(queue_buf + wpos, len - wpos, peer); - wpos += r; - - if (wpos == 4) - /* - * No packet to send. The 4 bytes are the - * needed withdraw and path attribute length. - */ - continue; + switch (r) { + case -1: + eor = 1; + if (wd_len == 0) { + /* no withdraws queued just send EoR */ + peer_send_eor(peer, AID_INET); + continue; + } + break; + case 2: + if (wd_len == 0) { + /* + * No packet to send. No withdraws and + * no path attributes. Skip. + */ + continue; + } + /* FALLTHROUGH */ + default: + wpos += r; + break; + } /* finally send message to SE */ if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 0, -1, queue_buf, wpos) == -1) fatal("imsg_compose error"); sent++; + if (eor) { + eor = 0; + peer_send_eor(peer, AID_INET); + } } max -= sent; } while (sent != 0 && max > 0); } void -rde_update6_queue_runner(void) +rde_update6_queue_runner(u_int8_t aid) { struct rde_peer *peer; u_char *b; - int sent, max = RDE_RUNNER_ROUNDS / 2; + int r, sent, max = RDE_RUNNER_ROUNDS / 2; u_int16_t len; /* first withdraws ... */ -@@ -2346,7 +3008,7 @@ rde_update6_queue_runner(void) +@@ -2346,7 +3009,7 @@ rde_update6_queue_runner(void) if (peer->state != PEER_UP) continue; len = sizeof(queue_buf) - MSGSIZE_HEADER; - b = up_dump_mp_unreach(queue_buf, &len, peer); + b = up_dump_mp_unreach(queue_buf, &len, peer, aid); if (b == NULL) continue; -@@ -2369,10 +3031,18 @@ rde_update6_queue_runner(void) +@@ -2369,10 +3032,18 @@ rde_update6_queue_runner(void) if (peer->state != PEER_UP) continue; len = sizeof(queue_buf) - MSGSIZE_HEADER; - b = up_dump_mp_reach(queue_buf, &len, peer); - - if (b == NULL) + r = up_dump_mp_reach(queue_buf, &len, peer, aid); + switch (r) { + case -2: - continue; ++ continue; + case -1: + peer_send_eor(peer, aid); -+ continue; + continue; + default: + b = queue_buf + r; + break; + } + /* finally send message to SE */ if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, 0, -1, b, len) == -1) -@@ -2411,7 +3081,7 @@ rde_decisionflags(void) +@@ -2411,7 +3082,7 @@ rde_decisionflags(void) int rde_as4byte(struct rde_peer *peer) { - return (peer->capa_announced.as4byte && peer->capa_received.as4byte); + return (peer->capa.as4byte); } /* -@@ -2429,7 +3099,6 @@ void +@@ -2429,7 +3100,6 @@ void peer_init(u_int32_t hashsize) { struct peer_config pc; - struct in_addr id; u_int32_t hs, i; for (hs = 1; hs < hashsize; hs <<= 1) -@@ -2445,17 +3114,13 @@ peer_init(u_int32_t hashsize) +@@ -2445,17 +3115,13 @@ peer_init(u_int32_t hashsize) peertable.peer_hashmask = hs - 1; bzero(&pc, sizeof(pc)); - pc.remote_as = conf->as; - id.s_addr = conf->bgpid; - snprintf(pc.descr, sizeof(pc.descr), "LOCAL: ID %s", inet_ntoa(id)); + snprintf(pc.descr, sizeof(pc.descr), "LOCAL"); peerself = peer_add(0, &pc); if (peerself == NULL) fatalx("peer_init add self"); peerself->state = PEER_UP; - peerself->remote_bgpid = ntohl(conf->bgpid); - peerself->short_as = conf->short_as; } void -@@ -2534,14 +3199,10 @@ peer_localaddrs(struct rde_peer *peer, s +@@ -2534,14 +3200,10 @@ peer_localaddrs(struct rde_peer *peer, s if (ifa->ifa_addr->sa_family == match->ifa_addr->sa_family) ifa = match; - peer->local_v4_addr.af = AF_INET; - peer->local_v4_addr.v4.s_addr = - ((struct sockaddr_in *)ifa->ifa_addr)-> - sin_addr.s_addr; + sa2addr(ifa->ifa_addr, &peer->local_v4_addr); break; } } - for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) { if (ifa->ifa_addr->sa_family == AF_INET6 && strcmp(ifa->ifa_name, match->ifa_name) == 0) { -@@ -2559,13 +3220,7 @@ peer_localaddrs(struct rde_peer *peer, s +@@ -2559,13 +3221,7 @@ peer_localaddrs(struct rde_peer *peer, s &((struct sockaddr_in6 *)ifa-> ifa_addr)->sin6_addr)) continue; - peer->local_v6_addr.af = AF_INET6; - memcpy(&peer->local_v6_addr.v6, - &((struct sockaddr_in6 *)ifa->ifa_addr)-> - sin6_addr, sizeof(struct in6_addr)); - peer->local_v6_addr.scope_id = - ((struct sockaddr_in6 *)ifa->ifa_addr)-> - sin6_scope_id; + sa2addr(ifa->ifa_addr, &peer->local_v6_addr); break; } } -@@ -2577,23 +3232,22 @@ void +@@ -2577,23 +3233,22 @@ void peer_up(u_int32_t id, struct session_up *sup) { struct rde_peer *peer; + u_int8_t i; peer = peer_get(id); if (peer == NULL) { - log_warnx("peer_up: peer id %d already exists", id); + log_warnx("peer_up: unknown peer id %d", id); return; } - if (peer->state != PEER_DOWN && peer->state != PEER_NONE) + if (peer->state != PEER_DOWN && peer->state != PEER_NONE && + peer->state != PEER_UP) fatalx("peer_up: bad state"); peer->remote_bgpid = ntohl(sup->remote_bgpid); peer->short_as = sup->short_as; memcpy(&peer->remote_addr, &sup->remote_addr, sizeof(peer->remote_addr)); - memcpy(&peer->capa_announced, &sup->capa_announced, - sizeof(peer->capa_announced)); - memcpy(&peer->capa_received, &sup->capa_received, - sizeof(peer->capa_received)); + memcpy(&peer->capa, &sup->capa, sizeof(peer->capa)); peer_localaddrs(peer, &sup->local_addr); -@@ -2607,7 +3261,10 @@ peer_up(u_int32_t id, struct session_up +@@ -2607,7 +3262,10 @@ peer_up(u_int32_t id, struct session_up */ return; - peer_dump(id, AFI_ALL, SAFI_ALL); + for (i = 0; i < AID_MAX; i++) { + if (peer->capa.mp[i] == 1) + peer_dump(id, i); + } } void -@@ -2641,43 +3298,90 @@ peer_down(u_int32_t id) +@@ -2641,43 +3299,90 @@ peer_down(u_int32_t id) free(peer); } +/* + * Flush all routes older then staletime. If staletime is 0 all routes will + * be flushed. + */ +void +peer_flush(struct rde_peer *peer, u_int8_t aid) +{ + struct rde_aspath *asp, *nasp; + + /* walk through per peer RIB list and remove all stale prefixes. */ + for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = nasp) { + nasp = LIST_NEXT(asp, peer_l); + path_remove_stale(asp, aid); + } + + /* Deletions are performed in path_remove() */ + rde_send_pftable_commit(); + + /* flushed no need to keep staletime */ + peer->staletime[aid] = 0; +} + void -peer_dump(u_int32_t id, u_int16_t afi, u_int8_t safi) +peer_stale(u_int32_t id, u_int8_t aid) { struct rde_peer *peer; + time_t now; peer = peer_get(id); if (peer == NULL) { - log_warnx("peer_down: unknown peer id %d", id); + log_warnx("peer_stale: unknown peer id %d", id); return; } - if (afi == AFI_ALL || afi == AFI_IPv4) - if (safi == SAFI_ALL || safi == SAFI_UNICAST) { - if (peer->conf.announce_type == ANNOUNCE_DEFAULT_ROUTE) - up_generate_default(rules_l, peer, AF_INET); - else - rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, - peer, AF_INET); - } - if (afi == AFI_ALL || afi == AFI_IPv6) - if (safi == SAFI_ALL || safi == SAFI_UNICAST) { - if (peer->conf.announce_type == ANNOUNCE_DEFAULT_ROUTE) - up_generate_default(rules_l, peer, AF_INET6); - else - rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, - peer, AF_INET6); - } + if (peer->staletime[aid]) + peer_flush(peer, aid); + peer->staletime[aid] = now = time(NULL); - if (peer->capa_received.restart && peer->capa_announced.restart) - peer_send_eor(peer, afi, safi); + /* make sure new prefixes start on a higher timestamp */ + do { + sleep(1); + } while (now >= time(NULL)); } -/* End-of-RIB marker, draft-ietf-idr-restart-13.txt */ void -peer_send_eor(struct rde_peer *peer, u_int16_t afi, u_int16_t safi) +peer_dump(u_int32_t id, u_int8_t aid) { - if (afi == AFI_IPv4 && safi == SAFI_UNICAST) { + struct rde_peer *peer; + + peer = peer_get(id); + if (peer == NULL) { + log_warnx("peer_dump: unknown peer id %d", id); + return; + } + + if (peer->conf.announce_type == ANNOUNCE_DEFAULT_ROUTE) + up_generate_default(rules_l, peer, aid); + else + rib_dump(&ribs[peer->ribid], rde_up_dump_upcall, peer, aid); + if (peer->capa.grestart.restart) + up_generate_marker(peer, aid); +} + +/* End-of-RIB marker, RFC 4724 */ +void +peer_recv_eor(struct rde_peer *peer, u_int8_t aid) +{ + peer->prefix_rcvd_eor++; + + /* First notify SE to remove possible race with the timeout. */ + if (imsg_compose(ibuf_se, IMSG_SESSION_RESTARTED, peer->conf.id, + 0, -1, &aid, sizeof(aid)) == -1) + fatal("imsg_compose error"); +} + +void +peer_send_eor(struct rde_peer *peer, u_int8_t aid) +{ + u_int16_t afi; + u_int8_t safi; + + peer->prefix_sent_eor++; + + if (aid == AID_INET) { u_char null[4]; bzero(&null, 4); -@@ -2688,6 +3392,9 @@ peer_send_eor(struct rde_peer *peer, u_i +@@ -2688,6 +3393,9 @@ peer_send_eor(struct rde_peer *peer, u_i u_int16_t i; u_char buf[10]; + if (aid2afi(aid, &afi, &safi) == -1) + fatalx("peer_send_eor: bad AID"); + i = 0; /* v4 withdrawn len */ bcopy(&i, &buf[0], sizeof(i)); i = htons(6); /* path attr len */ -@@ -2709,39 +3416,61 @@ peer_send_eor(struct rde_peer *peer, u_i +@@ -2709,39 +3417,61 @@ peer_send_eor(struct rde_peer *peer, u_i * network announcement stuff */ void -network_init(struct network_head *net_l) -{ - struct network *n; - - reloadtime = time(NULL); - - while ((n = TAILQ_FIRST(net_l)) != NULL) { - TAILQ_REMOVE(net_l, n, entry); - network_add(&n->net, 1); - free(n); - } -} - -void network_add(struct network_config *nc, int flagstatic) { + struct rdomain *rd; struct rde_aspath *asp; + struct filter_set_head *vpnset = NULL; + in_addr_t prefix4; u_int16_t i; - asp = path_get(); - asp->aspath = aspath_get(NULL, 0); - asp->origin = ORIGIN_IGP; - asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH | - F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED; - /* the nexthop is unset unless a default set overrides it */ + if (nc->rtableid) { + SIMPLEQ_FOREACH(rd, rdomains_l, entry) { + if (rd->rtableid != nc->rtableid) + continue; + switch (nc->prefix.aid) { + case AID_INET: + prefix4 = nc->prefix.v4.s_addr; + bzero(&nc->prefix, sizeof(nc->prefix)); + nc->prefix.aid = AID_VPN_IPv4; + nc->prefix.vpn4.rd = rd->rd; + nc->prefix.vpn4.addr.s_addr = prefix4; + nc->prefix.vpn4.labellen = 3; + nc->prefix.vpn4.labelstack[0] = + (rd->label >> 12) & 0xff; + nc->prefix.vpn4.labelstack[1] = + (rd->label >> 4) & 0xff; + nc->prefix.vpn4.labelstack[2] = + (rd->label << 4) & 0xf0; + nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS; + vpnset = &rd->export; + break; + default: + log_warnx("unable to VPNize prefix"); + filterset_free(&nc->attrset); + return; + } + } + } + + if (nc->type == NETWORK_MRTCLONE) { + asp = nc->asp; + } else { + asp = path_get(); + asp->aspath = aspath_get(NULL, 0); + asp->origin = ORIGIN_IGP; + asp->flags = F_ATTR_ORIGIN | F_ATTR_ASPATH | + F_ATTR_LOCALPREF | F_PREFIX_ANNOUNCED; + /* the nexthop is unset unless a default set overrides it */ + } if (!flagstatic) asp->flags |= F_ANN_DYNAMIC; - - rde_apply_set(asp, &nc->attrset, nc->prefix.af, peerself, peerself); + rde_apply_set(asp, &nc->attrset, nc->prefix.aid, peerself, peerself); + if (vpnset) + rde_apply_set(asp, vpnset, nc->prefix.aid, peerself, peerself); for (i = 1; i < rib_size; i++) path_update(&ribs[i], peerself, asp, &nc->prefix, nc->prefixlen); - path_put(asp); filterset_free(&nc->attrset); } -@@ -2749,12 +3478,41 @@ network_add(struct network_config *nc, i +@@ -2749,12 +3479,41 @@ network_add(struct network_config *nc, i void network_delete(struct network_config *nc, int flagstatic) { - u_int32_t flags = F_PREFIX_ANNOUNCED; - u_int32_t i; + struct rdomain *rd; + in_addr_t prefix4; + u_int32_t flags = F_PREFIX_ANNOUNCED; + u_int32_t i; if (!flagstatic) flags |= F_ANN_DYNAMIC; + if (nc->rtableid) { + SIMPLEQ_FOREACH(rd, rdomains_l, entry) { + if (rd->rtableid != nc->rtableid) + continue; + switch (nc->prefix.aid) { + case AID_INET: + prefix4 = nc->prefix.v4.s_addr; + bzero(&nc->prefix, sizeof(nc->prefix)); + nc->prefix.aid = AID_VPN_IPv4; + nc->prefix.vpn4.rd = rd->rd; + nc->prefix.vpn4.addr.s_addr = prefix4; + nc->prefix.vpn4.labellen = 3; + nc->prefix.vpn4.labelstack[0] = + (rd->label >> 12) & 0xff; + nc->prefix.vpn4.labelstack[1] = + (rd->label >> 4) & 0xff; + nc->prefix.vpn4.labelstack[2] = + (rd->label << 4) & 0xf0; + nc->prefix.vpn4.labelstack[2] |= BGP_MPLS_BOS; + break; + default: + log_warnx("unable to VPNize prefix"); + return; + } + } + } + for (i = rib_size - 1; i > 0; i--) prefix_remove(&ribs[i], peerself, &nc->prefix, nc->prefixlen, flags); -@@ -2764,38 +3522,31 @@ void +@@ -2764,38 +3523,31 @@ void network_dump_upcall(struct rib_entry *re, void *ptr) { struct prefix *p; - struct kroute k; - struct kroute6 k6; + struct kroute_full k; struct bgpd_addr addr; struct rde_dump_ctx *ctx = ptr; LIST_FOREACH(p, &re->prefix_h, rib_l) { if (!(p->aspath->flags & F_PREFIX_ANNOUNCED)) continue; - if (p->prefix->af == AF_INET) { - bzero(&k, sizeof(k)); - pt_getaddr(p->prefix, &addr); - k.prefix.s_addr = addr.v4.s_addr; - k.prefixlen = p->prefix->prefixlen; - if (p->aspath->peer == peerself) - k.flags = F_KERNEL; - if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK, 0, - ctx->req.pid, -1, &k, sizeof(k)) == -1) - log_warnx("network_dump_upcall: " - "imsg_compose error"); - } - if (p->prefix->af == AF_INET6) { - bzero(&k6, sizeof(k6)); - pt_getaddr(p->prefix, &addr); - memcpy(&k6.prefix, &addr.v6, sizeof(k6.prefix)); - k6.prefixlen = p->prefix->prefixlen; - if (p->aspath->peer == peerself) - k6.flags = F_KERNEL; - if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK6, 0, - ctx->req.pid, -1, &k6, sizeof(k6)) == -1) - log_warnx("network_dump_upcall: " - "imsg_compose error"); - } + pt_getaddr(p->prefix, &addr); + + bzero(&k, sizeof(k)); + memcpy(&k.prefix, &addr, sizeof(k.prefix)); + if (p->aspath->nexthop == NULL || + p->aspath->nexthop->state != NEXTHOP_REACH) + k.nexthop.aid = k.prefix.aid; + else + memcpy(&k.nexthop, &p->aspath->nexthop->true_nexthop, + sizeof(k.nexthop)); + k.prefixlen = p->prefix->prefixlen; + k.flags = F_KERNEL; + if ((p->aspath->flags & F_ANN_DYNAMIC) == 0) + k.flags = F_STATIC; + if (imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NETWORK, 0, + ctx->req.pid, -1, &k, sizeof(k)) == -1) + log_warnx("network_dump_upcall: " + "imsg_compose error"); } } -@@ -2841,10 +3592,10 @@ sa_cmp(struct bgpd_addr *a, struct socka +@@ -2841,10 +3593,10 @@ sa_cmp(struct bgpd_addr *a, struct socka struct sockaddr_in *in_b; struct sockaddr_in6 *in6_b; - if (a->af != b->sa_family) + if (aid2af(a->aid) != b->sa_family) return (1); - switch (a->af) { + switch (b->sa_family) { case AF_INET: in_b = (struct sockaddr_in *)b; if (a->v4.s_addr != in_b->sin_addr.s_addr) +@@ -2855,10 +3607,11 @@ sa_cmp(struct bgpd_addr *a, struct socka + #ifdef __KAME__ + /* directly stolen from sbin/ifconfig/ifconfig.c */ + if (IN6_IS_ADDR_LINKLOCAL(&in6_b->sin6_addr)) { +- in6_b->sin6_scope_id = +- ntohs(*(u_int16_t *)&in6_b->sin6_addr.s6_addr[2]); +- in6_b->sin6_addr.s6_addr[2] = +- in6_b->sin6_addr.s6_addr[3] = 0; ++ if (in6_b->sin6_scope_id == 0) { ++ in6_b->sin6_scope_id = ++ IN6_LINKLOCAL_IFINDEX(in6_b->sin6_addr); ++ } ++ SET_IN6_LINKLOCAL_IFINDEX(in6_b->sin6_addr, 0); + } + #endif + if (bcmp(&a->v6, &in6_b->sin6_addr,