Page MenuHomeFreeBSD

netlink/route: extend pre-2.6.19 Linux compat shim to del/getroute
ClosedPublic

Authored by olivier on Fri, May 29, 5:30 PM.
Tags
None
Referenced Files
F159349554: D57338.id178961.diff
Sat, Jun 13, 3:55 AM
F159340660: D57338.id178961.diff
Sat, Jun 13, 1:20 AM
Unknown Object (File)
Mon, Jun 8, 11:14 PM
Unknown Object (File)
Mon, Jun 8, 12:14 PM
Unknown Object (File)
Mon, Jun 8, 12:09 PM
Unknown Object (File)
Sat, Jun 6, 11:28 PM
Unknown Object (File)
Sat, Jun 6, 11:24 PM
Unknown Object (File)
Sat, Jun 6, 10:00 AM

Details

Summary

Commit f34aca55adef ("netlink/route: provide pre-2.6.19 Linux compat shim", 2024-06) fixed the partial fix for net/bird2 on the netlink path by mapping the legacy 8-bit struct rtmsg::rtm_table field onto the modern 32-bit RTA_TABLE attribute when the latter is absent.

That fix, however, was only applied to rtnl_handle_newroute. The two sibling handlers: rtnl_handle_delroute and rtnl_handle_getroute were left looking at attrs.rta_table directly. They are reachable from exactly the same client (bird, in its netlink scan path), so any FIB number that fits in 8 bits silently maps to RT_TABLE_UNSPEC in those
handlers.

Test Plan

Example of regression test script that fails with bird@netlink and works with bird@rtsock.
Fails before this patch, works after.

#!/bin/sh
# Usage: sh bird_fib_test.sh start|check|stop
set -eu

SUDO=${SUDO:-sudo}
RUNDIR=/var/run/bird-fibtest
CONF=${RUNDIR}/bird.conf
CTL=${RUNDIR}/bird.ctl
LOG=/var/log/bird-fibtest.log
FIB=1
LO_LOCAL=lo901          # carries 10.99.1.1/24 (router) - in FIB ${FIB}
KERNEL_IN_FIB1=10.55.0.0/24      # manually installed in FIB 1 via `route add -fib 1`
STATIC_TO_FIB1=10.123.0.0/24     # bird's static route, should land in FIB 1
NEXTHOP=10.99.1.2       # bogus host on the lo901 subnet so the static is resolvable

die() { echo "EXIT: $*" >&2; exit 1; }

usage () {
	echo "Usage: $0 start|check|stop"
}

check_req () {
	which bird   >/dev/null 2>&1 || die "net/bird2 not installed: bird not found"
	which birdc  >/dev/null 2>&1 || die "net/bird2 not installed: birdc not found"
	fibs=$(sysctl -n net.fibs 2>/dev/null || echo 1)
	[ "${fibs}" -ge 2 ] || die "kernel built with net.fibs=${fibs}, need >=2 (set net.fibs=2 in /boot/loader.conf and reboot)"
	# Identify which flavor is installed; useful for the report.
	if pkg query %n bird2 >/dev/null 2>&1; then
		flavor=$(pkg query %n bird2)
		echo "Installed package: ${flavor}"
	fi
}

write_conf () {
	${SUDO} mkdir -p "${RUNDIR}"
	cat <<EOF | ${SUDO} tee "${CONF}" >/dev/null
# bird config for multi-FIB regression test
log "${LOG}" all;
log stderr all;

router id 10.99.1.1;

protocol device {
	scan time 10;
}

# Import the directly-connected network from lo901.
protocol direct {
	ipv4;
	interface "${LO_LOCAL}";
}

# Single kernel protocol bound to FIB ${FIB}. With a buggy netlink build
# this is the code path that misbehaves: routes living in kernel FIB ${FIB}
# should be imported into bird (learn), and bird's static route should be
# installed into kernel FIB ${FIB} (export). bird 2.x refuses two kernel
# protocols on the same bird table, so we only declare one here.
protocol kernel kernel${FIB} {
	learn;
	kernel table ${FIB};
	ipv4 {
		import all;
		export all;
	};
}

# A static route that bird should push down into kernel FIB ${FIB}.
protocol static static_to_fib${FIB} {
	ipv4;
	route ${STATIC_TO_FIB1} via ${NEXTHOP};
}
EOF
}

start () {
	check_req
	# Refuse to start if anything is already running.
	if [ -S "${CTL}" ]; then
		die "control socket ${CTL} already exists - run '$0 stop' first"
	fi

	# Create a loopback clone bound to FIB ${FIB} to carry the router IP.
	${SUDO} ifconfig "${LO_LOCAL}" create  || die "cannot create ${LO_LOCAL}"
	${SUDO} ifconfig "${LO_LOCAL}" fib "${FIB}"
	${SUDO} ifconfig "${LO_LOCAL}" inet 10.99.1.1/24 up

	# Manually inject a static route into kernel FIB ${FIB} so that bird's
	# kernel protocol (with `learn`) has something non-trivial to import.
	${SUDO} route -4 add -fib "${FIB}" "${KERNEL_IN_FIB1}" -iface "${LO_LOCAL}" >/dev/null

	echo "---- FIB ${FIB} routes ----"
	netstat -4 -rn -F "${FIB}"

	write_conf
	${SUDO} bird -c "${CONF}" -s "${CTL}"
	# Give bird a beat to converge.
	sleep 2
	echo "bird started; logs in ${LOG}; run '$0 check' to verify."
}

check () {
	[ -S "${CTL}" ] || die "bird control socket ${CTL} missing - did you run start?"

	pass=0
	fail=0

	echo "==== bird: routes learned from kernel ===="
	# Routes that bird imported via kernel protocols. If the netlink bug
	# bites, the 10.99.2.0/24 route (only in FIB ${FIB}) will be absent.
	${SUDO} birdc -s "${CTL}" "show route protocol kernel${FIB}" || true

	if ${SUDO} birdc -s "${CTL}" "show route protocol kernel${FIB}" 2>/dev/null \
	    | grep -q "${KERNEL_IN_FIB1%/*}"; then
		echo "PASS: bird learned ${KERNEL_IN_FIB1} from kernel FIB ${FIB}"
		pass=$((pass+1))
	else
		echo "FAIL: bird did NOT learn ${KERNEL_IN_FIB1} from kernel FIB ${FIB}"
		fail=$((fail+1))
	fi

	echo "==== kernel FIB ${FIB}: routes pushed by bird ===="
	netstat -4 -rn -F "${FIB}" || true

	if netstat -4 -rn -F "${FIB}" | awk '{print $1}' | grep -q "^${STATIC_TO_FIB1%/*}"; then
		echo "PASS: kernel FIB ${FIB} contains ${STATIC_TO_FIB1} (pushed by bird)"
		pass=$((pass+1))
	else
		echo "FAIL: kernel FIB ${FIB} does NOT contain ${STATIC_TO_FIB1}"
		fail=$((fail+1))
	fi

	echo "==== summary: ${pass} pass / ${fail} fail ===="
	if [ "${fail}" -gt 0 ]; then
		echo "---- tail of ${LOG} ----"
		${SUDO} tail -n 40 "${LOG}" 2>/dev/null || true
		exit 1
	fi
}

stop () {
	if [ -S "${CTL}" ]; then
		${SUDO} birdc -s "${CTL}" down 2>/dev/null || true
		sleep 1
	fi
	${SUDO} pkill -f "bird -c ${CONF}" 2>/dev/null || true
	# Best-effort cleanup; destroying lo901 also removes routes that
	# referenced it, so order doesn't matter much.
	${SUDO} route -4 delete -fib "${FIB}" "${KERNEL_IN_FIB1}" 2>/dev/null || true
	${SUDO} route -4 delete -fib "${FIB}" "${STATIC_TO_FIB1}" 2>/dev/null || true
	${SUDO} ifconfig "${LO_LOCAL}" destroy 2>/dev/null || true
	${SUDO} rm -f "${CONF}" "${CTL}"
	${SUDO} rm -rf "${RUNDIR}"
	echo "cleaned up."
}

if [ $# -eq 0 ]; then
	usage
	exit 2
fi
case "$1" in
	start|check|stop) "$1" ;;
	*) usage; exit 2 ;;
esac

Diff Detail

Lint
Lint Skipped
Unit
Tests Skipped

Event Timeline

Perhaps worth a static inline old_linux_compat() to avoid repeating it 3 times? LGTM either way

This revision is now accepted and ready to land.Fri, May 29, 10:00 PM
This revision now requires review to proceed.Fri, May 29, 10:19 PM

Perhaps worth a static inline old_linux_compat() to avoid repeating it 3 times? LGTM either way

good idea! Done

This revision is now accepted and ready to land.Fri, May 29, 10:19 PM