diff --git a/sys/kern/uipc_debug.c b/sys/kern/uipc_debug.c
index 0abb5352bed5..ead7d0e506c0 100644
--- a/sys/kern/uipc_debug.c
+++ b/sys/kern/uipc_debug.c
@@ -1,526 +1,525 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2007 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Debugger routines relating to sockets, protocols, etc, for use in DDB.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 static void
 db_print_sotype(short so_type)
 {
 
 	switch (so_type) {
 	case SOCK_STREAM:
 		db_printf("SOCK_STREAM");
 		break;
 
 	case SOCK_DGRAM:
 		db_printf("SOCK_DGRAM");
 		break;
 
 	case SOCK_RAW:
 		db_printf("SOCK_RAW");
 		break;
 
 	case SOCK_RDM:
 		db_printf("SOCK_RDM");
 		break;
 
 	case SOCK_SEQPACKET:
 		db_printf("SOCK_SEQPACKET");
 		break;
 
 	default:
 		db_printf("unknown");
 		break;
 	}
 }
 
 static void
 db_print_sooptions(int so_options)
 {
 	int comma;
 
 	comma = 0;
 	if (so_options & SO_DEBUG) {
 		db_printf("%sSO_DEBUG", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_ACCEPTCONN) {
 		db_printf("%sSO_ACCEPTCONN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_REUSEADDR) {
 		db_printf("%sSO_REUSEADDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_KEEPALIVE) {
 		db_printf("%sSO_KEEPALIVE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_DONTROUTE) {
 		db_printf("%sSO_DONTROUTE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_BROADCAST) {
 		db_printf("%sSO_BROADCAST", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_USELOOPBACK) {
 		db_printf("%sSO_USELOOPBACK", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_LINGER) {
 		db_printf("%sSO_LINGER", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_OOBINLINE) {
 		db_printf("%sSO_OOBINLINE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_REUSEPORT) {
 		db_printf("%sSO_REUSEPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_REUSEPORT_LB) {
 		db_printf("%sSO_REUSEPORT_LB", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_TIMESTAMP) {
 		db_printf("%sSO_TIMESTAMP", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_NOSIGPIPE) {
 		db_printf("%sSO_NOSIGPIPE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_ACCEPTFILTER) {
 		db_printf("%sSO_ACCEPTFILTER", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_BINTIME) {
 		db_printf("%sSO_BINTIME", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_NO_OFFLOAD) {
 		db_printf("%sSO_NO_OFFLOAD", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_NO_DDP) {
 		db_printf("%sSO_NO_DDP", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_sostate(short so_state)
 {
 	int comma;
 
 	comma = 0;
 	if (so_state & SS_ISCONNECTED) {
 		db_printf("%sSS_ISCONNECTED", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_ISCONNECTING) {
 		db_printf("%sSS_ISCONNECTING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_ISDISCONNECTING) {
 		db_printf("%sSS_ISDISCONNECTING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_NBIO) {
 		db_printf("%sSS_NBIO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_ASYNC) {
 		db_printf("%sSS_ASYNC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_ISCONFIRMING) {
 		db_printf("%sSS_ISCONFIRMING", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_soqstate(int so_qstate)
 {
 	int comma;
 
 	comma = 0;
 	if (so_qstate & SQ_INCOMP) {
 		db_printf("%sSQ_INCOMP", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_qstate & SQ_COMP) {
 		db_printf("%sSQ_COMP", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_sbstate(short sb_state)
 {
 	int comma;
 
 	comma = 0;
 	if (sb_state & SBS_CANTSENDMORE) {
 		db_printf("%sSBS_CANTSENDMORE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_state & SBS_CANTRCVMORE) {
 		db_printf("%sSBS_CANTRCVMORE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_state & SBS_RCVATMARK) {
 		db_printf("%sSBS_RCVATMARK", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_domain(struct domain *d, const char *domain_name, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", domain_name, d);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("dom_family: %d   ", d->dom_family);
 	db_printf("dom_name: %s\n", d->dom_name);
 
 	db_print_indent(indent);
 	db_printf("dom_externalize: %p   ", d->dom_externalize);
 	db_printf("dom_dispose: %p\n", d->dom_dispose);
 
 	db_print_indent(indent);
 	db_printf("dom_protosw: %p   ", d->dom_protosw);
 	db_printf("dom_next: %p\n", d->dom_next);
 
 	db_print_indent(indent);
 	db_printf("dom_rtattach: %p   ", d->dom_rtattach);
 
 	db_print_indent(indent);
 	db_printf("dom_ifattach: %p   ", d->dom_ifattach);
 	db_printf("dom_ifdetach: %p\n", d->dom_ifdetach);
 }
 
 static void
 db_print_prflags(short pr_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (pr_flags & PR_ATOMIC) {
 		db_printf("%sPR_ATOMIC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_ADDR) {
 		db_printf("%sPR_ADDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_CONNREQUIRED) {
 		db_printf("%sPR_CONNREQUIRED", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_WANTRCVD) {
 		db_printf("%sPR_WANTRCVD", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_RIGHTS) {
 		db_printf("%sPR_RIGHTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_IMPLOPCL) {
 		db_printf("%sPR_IMPLOPCL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_LASTHDR) {
 		db_printf("%sPR_LASTHDR", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_protosw(struct protosw *pr, const char *prname, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", prname, pr);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("pr_type: %d   ", pr->pr_type);
 	db_printf("pr_domain: %p\n", pr->pr_domain);
 	if (pr->pr_domain != NULL)
 		db_print_domain(pr->pr_domain, "pr_domain", indent);
 
 	db_print_indent(indent);
 	db_printf("pr_protocol: %d\n", pr->pr_protocol);
 
 	db_print_indent(indent);
 	db_printf("pr_flags: %d (", pr->pr_flags);
 	db_print_prflags(pr->pr_flags);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("pr_input: %p   ", pr->pr_input);
-	db_printf("pr_output: %p   ", pr->pr_output);
 	db_printf("pr_ctlinput: %p\n", pr->pr_ctlinput);
 	db_printf("pr_ctloutput: %p   ", pr->pr_ctloutput);
 
 	db_print_indent(indent);
 	db_printf("pr_fasttimo: %p   ", pr->pr_fasttimo);
 	db_printf("pr_slowtimo: %p   ", pr->pr_slowtimo);
 	db_printf("pr_drain: %p\n", pr->pr_drain);
 }
 
 static void
 db_print_sbflags(short sb_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (sb_flags & SB_WAIT) {
 		db_printf("%sSB_WAIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_SEL) {
 		db_printf("%sSB_SEL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_ASYNC) {
 		db_printf("%sSB_ASYNC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_UPCALL) {
 		db_printf("%sSB_UPCALL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_NOINTR) {
 		db_printf("%sSB_NOINTR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_AIO) {
 		db_printf("%sSB_AIO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_KNOTE) {
 		db_printf("%sSB_KNOTE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_AUTOSIZE) {
 		db_printf("%sSB_AUTOSIZE", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_sockbuf(struct sockbuf *sb, const char *sockbufname, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", sockbufname, sb);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("sb_state: 0x%x (", sb->sb_state);
 	db_print_sbstate(sb->sb_state);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("sb_mb: %p   ", sb->sb_mb);
 	db_printf("sb_mbtail: %p   ", sb->sb_mbtail);
 	db_printf("sb_lastrecord: %p\n", sb->sb_lastrecord);
 
 	db_print_indent(indent);
 	db_printf("sb_sndptr: %p   ", sb->sb_sndptr);
 	db_printf("sb_sndptroff: %u\n", sb->sb_sndptroff);
 
 	db_print_indent(indent);
 	db_printf("sb_acc: %u   ", sb->sb_acc);
 	db_printf("sb_ccc: %u   ", sb->sb_ccc);
 	db_printf("sb_hiwat: %u   ", sb->sb_hiwat);
 	db_printf("sb_mbcnt: %u   ", sb->sb_mbcnt);
 	db_printf("sb_mbmax: %u\n", sb->sb_mbmax);
 
 	db_print_indent(indent);
 	db_printf("sb_ctl: %u   ", sb->sb_ctl);
 	db_printf("sb_lowat: %d   ", sb->sb_lowat);
 	db_printf("sb_timeo: %jd\n", sb->sb_timeo);
 
 	db_print_indent(indent);
 	db_printf("sb_flags: 0x%x (", sb->sb_flags);
 	db_print_sbflags(sb->sb_flags);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("sb_aiojobq first: %p\n", TAILQ_FIRST(&sb->sb_aiojobq));
 }
 
 static void
 db_print_socket(struct socket *so, const char *socketname, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", socketname, so);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("so_count: %d   ", so->so_count);
 	db_printf("so_type: %d (", so->so_type);
 	db_print_sotype(so->so_type);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("so_options: 0x%x (", so->so_options);
 	db_print_sooptions(so->so_options);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("so_linger: %d   ", so->so_linger);
 	db_printf("so_state: 0x%x (", so->so_state);
 	db_print_sostate(so->so_state);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("so_pcb: %p   ", so->so_pcb);
 	db_printf("so_proto: %p\n", so->so_proto);
 
 	if (so->so_proto != NULL)
 		db_print_protosw(so->so_proto, "so_proto", indent);
 
 	db_print_indent(indent);
 	if (so->so_options & SO_ACCEPTCONN) {
 		db_printf("sol_incomp first: %p   ",
 		    TAILQ_FIRST(&so->sol_incomp));
 		db_printf("sol_comp first: %p\n", TAILQ_FIRST(&so->sol_comp));
 		db_printf("sol_qlen: %d   ", so->sol_qlen);
 		db_printf("sol_incqlen: %d   ", so->sol_incqlen);
 		db_printf("sol_qlimit: %d   ", so->sol_qlimit);
 	} else {
 		db_printf("so_qstate: 0x%x (", so->so_qstate);
 		db_print_soqstate(so->so_qstate);
 		db_printf(")   ");
 		db_printf("so_listen: %p   ", so->so_listen);
 		/* so_list skipped */
 		db_printf("so_timeo: %d   ", so->so_timeo);
 		db_printf("so_error: %d\n", so->so_error);
 
 		db_print_indent(indent);
 		db_printf("so_sigio: %p   ", so->so_sigio);
 		db_printf("so_oobmark: %lu\n", so->so_oobmark);
 
 		db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
 		db_print_sockbuf(&so->so_snd, "so_snd", indent);
 	}
 }
 
 DB_SHOW_COMMAND(socket, db_show_socket)
 {
 	struct socket *so;
 
 	if (!have_addr) {
 		db_printf("usage: show socket <addr>\n");
 		return;
 	}
 	so = (struct socket *)addr;
 
 	db_print_socket(so, "socket", 0);
 }
 
 DB_SHOW_COMMAND(sockbuf, db_show_sockbuf)
 {
 	struct sockbuf *sb;
 
 	if (!have_addr) {
 		db_printf("usage: show sockbuf <addr>\n");
 		return;
 	}
 	sb = (struct sockbuf *)addr;
 
 	db_print_sockbuf(sb, "sockbuf", 0);
 }
 
 DB_SHOW_COMMAND(protosw, db_show_protosw)
 {
 	struct protosw *pr;
 
 	if (!have_addr) {
 		db_printf("usage: show protosw <addr>\n");
 		return;
 	}
 	pr = (struct protosw *)addr;
 
 	db_print_protosw(pr, "protosw", 0);
 }
 
 DB_SHOW_COMMAND(domain, db_show_domain)
 {
 	struct domain *d;
 
 	if (!have_addr) {
 		db_printf("usage: show protosw <addr>\n");
 		return;
 	}
 	d = (struct domain *)addr;
 
 	db_print_domain(d, "domain", 0);
 }
 #endif
diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c
index 18a682de276e..07e5f829e957 100644
--- a/sys/kern/uipc_domain.c
+++ b/sys/kern/uipc_domain.c
@@ -1,514 +1,513 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_domain.c	8.2 (Berkeley) 10/18/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/socket.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/epoch.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rmlock.h>
 #include <sys/socketvar.h>
 #include <sys/systm.h>
 
 #include <machine/atomic.h>
 
 #include <net/vnet.h>
 
 /*
  * System initialization
  *
  * Note: domain initialization takes place on a per domain basis
  * as a result of traversing a SYSINIT linker set.  Most likely,
  * each domain would want to call DOMAIN_SET(9) itself, which
  * would cause the domain to be added just after domaininit()
  * is called during startup.
  *
  * See DOMAIN_SET(9) for details on its use.
  */
 
 static void domaininit(void *);
 SYSINIT(domain, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, domaininit, NULL);
 
 static void domainfinalize(void *);
 SYSINIT(domainfin, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, domainfinalize,
     NULL);
 
 static struct callout pffast_callout;
 static struct callout pfslow_callout;
 
 static void	pffasttimo(void *);
 static void	pfslowtimo(void *);
 
 static struct rmlock pftimo_lock;
 RM_SYSINIT(pftimo_lock, &pftimo_lock, "pftimo");
 
 static LIST_HEAD(, protosw) pffast_list =
     LIST_HEAD_INITIALIZER(pffast_list);
 static LIST_HEAD(, protosw) pfslow_list =
     LIST_HEAD_INITIALIZER(pfslow_list);
 
 struct domain *domains;		/* registered protocol domains */
 int domain_init_status = 0;
 static struct mtx dom_mtx;		/* domain list lock */
 MTX_SYSINIT(domain, &dom_mtx, "domain list", MTX_DEF);
 
 /*
  * Dummy protocol specific user requests function pointer array.
  * All functions return EOPNOTSUPP.
  */
 struct pr_usrreqs nousrreqs = {
 	.pru_accept =		pru_accept_notsupp,
 	.pru_attach =		pru_attach_notsupp,
 	.pru_bind =		pru_bind_notsupp,
 	.pru_connect =		pru_connect_notsupp,
 	.pru_connect2 =		pru_connect2_notsupp,
 	.pru_control =		pru_control_notsupp,
 	.pru_disconnect	=	pru_disconnect_notsupp,
 	.pru_listen =		pru_listen_notsupp,
 	.pru_peeraddr =		pru_peeraddr_notsupp,
 	.pru_rcvd =		pru_rcvd_notsupp,
 	.pru_rcvoob =		pru_rcvoob_notsupp,
 	.pru_send =		pru_send_notsupp,
 	.pru_sense =		pru_sense_null,
 	.pru_shutdown =		pru_shutdown_notsupp,
 	.pru_sockaddr =		pru_sockaddr_notsupp,
 	.pru_sosend =		pru_sosend_notsupp,
 	.pru_soreceive =	pru_soreceive_notsupp,
 	.pru_sopoll =		pru_sopoll_notsupp,
 };
 
 static void
 pr_usrreqs_init(struct protosw *pr)
 {
 	struct pr_usrreqs *pu;
 
 	pu = pr->pr_usrreqs;
 	KASSERT(pu != NULL, ("%s: %ssw[%d] has no usrreqs!", __func__,
 	    pr->pr_domain->dom_name,
 	    (int)(pr - pr->pr_domain->dom_protosw)));
 
 	/*
 	 * Protocol switch methods fall into three categories: mandatory,
 	 * mandatory but protosw_init() provides a default, and optional.
 	 *
 	 * For true protocols (i.e., pru_attach != NULL), KASSERT truly
 	 * mandatory methods with no defaults, and initialize defaults for
 	 * other mandatory methods if the protocol hasn't defined an
 	 * implementation (NULL function pointer).
 	 */
 #if 0
 	if (pu->pru_attach != NULL) {
 		KASSERT(pu->pru_abort != NULL,
 		    ("protosw_init: %ssw[%d] pru_abort NULL",
 		    pr->pr_domain->dom_name,
 		    (int)(pr - pr->pr_domain->dom_protosw)));
 		KASSERT(pu->pru_send != NULL,
 		    ("protosw_init: %ssw[%d] pru_send NULL",
 		    pr->pr_domain->dom_name,
 		    (int)(pr - pr->pr_domain->dom_protosw)));
 	}
 #endif
 
 #define DEFAULT(foo, bar)	if ((foo) == NULL)  (foo) = (bar)
 	DEFAULT(pu->pru_accept, pru_accept_notsupp);
 	DEFAULT(pu->pru_aio_queue, pru_aio_queue_notsupp);
 	DEFAULT(pu->pru_bind, pru_bind_notsupp);
 	DEFAULT(pu->pru_bindat, pru_bindat_notsupp);
 	DEFAULT(pu->pru_connect, pru_connect_notsupp);
 	DEFAULT(pu->pru_connect2, pru_connect2_notsupp);
 	DEFAULT(pu->pru_connectat, pru_connectat_notsupp);
 	DEFAULT(pu->pru_control, pru_control_notsupp);
 	DEFAULT(pu->pru_disconnect, pru_disconnect_notsupp);
 	DEFAULT(pu->pru_listen, pru_listen_notsupp);
 	DEFAULT(pu->pru_peeraddr, pru_peeraddr_notsupp);
 	DEFAULT(pu->pru_rcvd, pru_rcvd_notsupp);
 	DEFAULT(pu->pru_rcvoob, pru_rcvoob_notsupp);
 	DEFAULT(pu->pru_sense, pru_sense_null);
 	DEFAULT(pu->pru_shutdown, pru_shutdown_notsupp);
 	DEFAULT(pu->pru_sockaddr, pru_sockaddr_notsupp);
 	DEFAULT(pu->pru_sosend, sosend_generic);
 	DEFAULT(pu->pru_soreceive, soreceive_generic);
 	DEFAULT(pu->pru_sopoll, sopoll_generic);
 	DEFAULT(pu->pru_ready, pru_ready_notsupp);
 #undef DEFAULT
 }
 
 /*
  * Add a new protocol domain to the list of supported domains
  * Note: you cant unload it again because a socket may be using it.
  * XXX can't fail at this time.
  */
 void
 domain_init(void *arg)
 {
 	struct domain *dp = arg;
 	struct protosw *pr;
 	int flags;
 
 	MPASS(IS_DEFAULT_VNET(curvnet));
 
 	flags = atomic_load_acq_int(&dp->dom_flags);
 	if ((flags & DOMF_SUPPORTED) == 0)
 		return;
 	MPASS((flags & DOMF_INITED) == 0);
 
 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
 		pr_usrreqs_init(pr);
 		rm_wlock(&pftimo_lock);
 		if (pr->pr_fasttimo != NULL)
 			LIST_INSERT_HEAD(&pffast_list, pr, pr_fasttimos);
 		if (pr->pr_slowtimo != NULL)
 			LIST_INSERT_HEAD(&pfslow_list, pr, pr_slowtimos);
 		rm_wunlock(&pftimo_lock);
 	}
 
 	/*
 	 * update global information about maximums
 	 */
 	max_hdr = max_linkhdr + max_protohdr;
 	max_datalen = MHLEN - max_hdr;
 	if (max_datalen < 1)
 		panic("%s: max_datalen < 1", __func__);
 	atomic_set_rel_int(&dp->dom_flags, DOMF_INITED);
 }
 
 /*
  * Add a new protocol domain to the list of supported domains
  * Note: you cant unload it again because a socket may be using it.
  * XXX can't fail at this time.
  */
 void
 domain_add(void *data)
 {
 	struct domain *dp;
 
 	dp = (struct domain *)data;
 	if (dp->dom_probe != NULL && (*dp->dom_probe)() != 0)
 		return;
 	atomic_set_rel_int(&dp->dom_flags, DOMF_SUPPORTED);
 	mtx_lock(&dom_mtx);
 	dp->dom_next = domains;
 	domains = dp;
 
 	KASSERT(domain_init_status >= 1,
 	    ("attempt to domain_add(%s) before domaininit()",
 	    dp->dom_name));
 #ifndef INVARIANTS
 	if (domain_init_status < 1)
 		printf("WARNING: attempt to domain_add(%s) before "
 		    "domaininit()\n", dp->dom_name);
 #endif
 	mtx_unlock(&dom_mtx);
 }
 
 /* ARGSUSED*/
 static void
 domaininit(void *dummy)
 {
 
 	if (max_linkhdr < 16)		/* XXX */
 		max_linkhdr = 16;
 
 	callout_init(&pffast_callout, 1);
 	callout_init(&pfslow_callout, 1);
 
 	mtx_lock(&dom_mtx);
 	KASSERT(domain_init_status == 0, ("domaininit called too late!"));
 	domain_init_status = 1;
 	mtx_unlock(&dom_mtx);
 }
 
 /* ARGSUSED*/
 static void
 domainfinalize(void *dummy)
 {
 
 	mtx_lock(&dom_mtx);
 	KASSERT(domain_init_status == 1, ("domainfinalize called too late!"));
 	domain_init_status = 2;
 	mtx_unlock(&dom_mtx);	
 
 	callout_reset(&pffast_callout, 1, pffasttimo, NULL);
 	callout_reset(&pfslow_callout, 1, pfslowtimo, NULL);
 }
 
 struct domain *
 pffinddomain(int family)
 {
 	struct domain *dp;
 
 	for (dp = domains; dp != NULL; dp = dp->dom_next)
 		if (dp->dom_family == family)
 			return (dp);
 	return (NULL);
 }
 
 struct protosw *
 pffindtype(int family, int type)
 {
 	struct domain *dp;
 	struct protosw *pr;
 
 	dp = pffinddomain(family);
 	if (dp == NULL)
 		return (NULL);
 
 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
 		if (pr->pr_type && pr->pr_type == type)
 			return (pr);
 	return (NULL);
 }
 
 struct protosw *
 pffindproto(int family, int protocol, int type)
 {
 	struct domain *dp;
 	struct protosw *pr;
 	struct protosw *maybe;
 
 	maybe = NULL;
 	if (family == 0)
 		return (NULL);
 
 	dp = pffinddomain(family);
 	if (dp == NULL)
 		return (NULL);
 
 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
 		if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
 			return (pr);
 
 		if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
 		    pr->pr_protocol == 0 && maybe == NULL)
 			maybe = pr;
 	}
 	return (maybe);
 }
 
 /*
  * The caller must make sure that the new protocol is fully set up and ready to
  * accept requests before it is registered.
  */
 int
 pf_proto_register(int family, struct protosw *npr)
 {
 	struct domain *dp;
 	struct protosw *pr, *fpr;
 
 	/* Sanity checks. */
 	if (family == 0)
 		return (EPFNOSUPPORT);
 	if (npr->pr_type == 0)
 		return (EPROTOTYPE);
 	if (npr->pr_protocol == 0)
 		return (EPROTONOSUPPORT);
 	if (npr->pr_usrreqs == NULL)
 		return (ENXIO);
 
 	/* Try to find the specified domain based on the family. */
 	dp = pffinddomain(family);
 	if (dp == NULL)
 		return (EPFNOSUPPORT);
 
 	/* Initialize backpointer to struct domain. */
 	npr->pr_domain = dp;
 	fpr = NULL;
 
 	/*
 	 * Protect us against races when two protocol registrations for
 	 * the same protocol happen at the same time.
 	 */
 	mtx_lock(&dom_mtx);
 
 	/* The new protocol must not yet exist. */
 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
 		if ((pr->pr_type == npr->pr_type) &&
 		    (pr->pr_protocol == npr->pr_protocol)) {
 			mtx_unlock(&dom_mtx);
 			return (EEXIST);	/* XXX: Check only protocol? */
 		}
 		/* While here, remember the first free spacer. */
 		if ((fpr == NULL) && (pr->pr_protocol == PROTO_SPACER))
 			fpr = pr;
 	}
 
 	/* If no free spacer is found we can't add the new protocol. */
 	if (fpr == NULL) {
 		mtx_unlock(&dom_mtx);
 		return (ENOMEM);
 	}
 
 	/* Copy the new struct protosw over the spacer. */
 	bcopy(npr, fpr, sizeof(*fpr));
 
 	pr_usrreqs_init(fpr);
 	rm_wlock(&pftimo_lock);
 	if (fpr->pr_fasttimo != NULL)
 		LIST_INSERT_HEAD(&pffast_list, fpr, pr_fasttimos);
 	if (fpr->pr_slowtimo != NULL)
 		LIST_INSERT_HEAD(&pfslow_list, fpr, pr_slowtimos);
 	rm_wunlock(&pftimo_lock);
 
 	/* Job is done, no more protection required. */
 	mtx_unlock(&dom_mtx);
 
 	return (0);
 }
 
 /*
  * The caller must make sure the protocol and its functions correctly shut down
  * all sockets and release all locks and memory references.
  */
 int
 pf_proto_unregister(int family, int protocol, int type)
 {
 	struct domain *dp;
 	struct protosw *pr, *dpr;
 
 	/* Sanity checks. */
 	if (family == 0)
 		return (EPFNOSUPPORT);
 	if (protocol == 0)
 		return (EPROTONOSUPPORT);
 	if (type == 0)
 		return (EPROTOTYPE);
 
 	/* Try to find the specified domain based on the family type. */
 	dp = pffinddomain(family);
 	if (dp == NULL)
 		return (EPFNOSUPPORT);
 
 	dpr = NULL;
 
 	/* Lock out everyone else while we are manipulating the protosw. */
 	mtx_lock(&dom_mtx);
 
 	/* The protocol must exist and only once. */
 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
 		if ((pr->pr_type == type) && (pr->pr_protocol == protocol)) {
 			if (dpr != NULL) {
 				mtx_unlock(&dom_mtx);
 				return (EMLINK);   /* Should not happen! */
 			} else
 				dpr = pr;
 		}
 	}
 
 	/* Protocol does not exist. */
 	if (dpr == NULL) {
 		mtx_unlock(&dom_mtx);
 		return (EPROTONOSUPPORT);
 	}
 
 	rm_wlock(&pftimo_lock);
 	if (dpr->pr_fasttimo != NULL)
 		LIST_REMOVE(dpr, pr_fasttimos);
 	if (dpr->pr_slowtimo != NULL)
 		LIST_REMOVE(dpr, pr_slowtimos);
 	rm_wunlock(&pftimo_lock);
 
 	/* De-orbit the protocol and make the slot available again. */
 	dpr->pr_type = 0;
 	dpr->pr_domain = dp;
 	dpr->pr_protocol = PROTO_SPACER;
 	dpr->pr_flags = 0;
 	dpr->pr_input = NULL;
-	dpr->pr_output = NULL;
 	dpr->pr_ctlinput = NULL;
 	dpr->pr_ctloutput = NULL;
 	dpr->pr_fasttimo = NULL;
 	dpr->pr_slowtimo = NULL;
 	dpr->pr_drain = NULL;
 	dpr->pr_usrreqs = &nousrreqs;
 
 	/* Job is done, not more protection required. */
 	mtx_unlock(&dom_mtx);
 
 	return (0);
 }
 
 void
 pfctlinput(int cmd, struct sockaddr *sa)
 {
 	struct domain *dp;
 	struct protosw *pr;
 
 	NET_EPOCH_ASSERT();
 
 	for (dp = domains; dp; dp = dp->dom_next)
 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
 			if (pr->pr_ctlinput)
 				(*pr->pr_ctlinput)(cmd, sa, (void *)0);
 }
 
 static void
 pfslowtimo(void *arg)
 {
 	struct rm_priotracker tracker;
 	struct epoch_tracker et;
 	struct protosw *pr;
 
 	rm_rlock(&pftimo_lock, &tracker);
 	NET_EPOCH_ENTER(et);
 	LIST_FOREACH(pr, &pfslow_list, pr_slowtimos) {
 		(*pr->pr_slowtimo)();
 	}
 	NET_EPOCH_EXIT(et);
 	rm_runlock(&pftimo_lock, &tracker);
 	callout_reset(&pfslow_callout, hz / PR_SLOWHZ, pfslowtimo, NULL);
 }
 
 static void
 pffasttimo(void *arg)
 {
 	struct rm_priotracker tracker;
 	struct epoch_tracker et;
 	struct protosw *pr;
 
 	rm_rlock(&pftimo_lock, &tracker);
 	NET_EPOCH_ENTER(et);
 	LIST_FOREACH(pr, &pffast_list, pr_fasttimos) {
 		(*pr->pr_fasttimo)();
 	}
 	NET_EPOCH_EXIT(et);
 	rm_runlock(&pftimo_lock, &tracker);
 	callout_reset(&pffast_callout, hz / PR_FASTHZ, pffasttimo, NULL);
 }
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index 71558be619d8..013e4ea5c68b 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -1,2325 +1,2323 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002 Michael Shalayeff.
  * Copyright (c) 2003 Ryan McBride.
  * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bpf.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/devctl.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/taskqueue.h>
 #include <sys/counter.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip.h>
 #include <machine/in_cksum.h>
 #endif
 #ifdef INET
 #include <netinet/ip_var.h>
 #include <netinet/if_ether.h>
 #endif
 
 #ifdef INET6
 #include <netinet/icmp6.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #endif
 
 #include <crypto/sha1.h>
 
 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
 
 struct carp_softc {
 	struct ifnet		*sc_carpdev;	/* Pointer to parent ifnet. */
 	struct ifaddr		**sc_ifas;	/* Our ifaddrs. */
 	struct sockaddr_dl	sc_addr;	/* Our link level address. */
 	struct callout		sc_ad_tmo;	/* Advertising timeout. */
 #ifdef INET
 	struct callout		sc_md_tmo;	/* Master down timeout. */
 #endif
 #ifdef INET6
 	struct callout 		sc_md6_tmo;	/* XXX: Master down timeout. */
 #endif
 	struct mtx		sc_mtx;
 
 	int			sc_vhid;
 	int			sc_advskew;
 	int			sc_advbase;
 
 	int			sc_naddrs;
 	int			sc_naddrs6;
 	int			sc_ifasiz;
 	enum { INIT = 0, BACKUP, MASTER }	sc_state;
 	int			sc_suppress;
 	int			sc_sendad_errors;
 #define	CARP_SENDAD_MAX_ERRORS	3
 	int			sc_sendad_success;
 #define	CARP_SENDAD_MIN_SUCCESS 3
 
 	int			sc_init_counter;
 	uint64_t		sc_counter;
 
 	/* authentication */
 #define	CARP_HMAC_PAD	64
 	unsigned char sc_key[CARP_KEY_LEN];
 	unsigned char sc_pad[CARP_HMAC_PAD];
 	SHA1_CTX sc_sha1;
 
 	TAILQ_ENTRY(carp_softc)	sc_list;	/* On the carp_if list. */
 	LIST_ENTRY(carp_softc)	sc_next;	/* On the global list. */
 };
 
 struct carp_if {
 #ifdef INET
 	int	cif_naddrs;
 #endif
 #ifdef INET6
 	int	cif_naddrs6;
 #endif
 	TAILQ_HEAD(, carp_softc) cif_vrs;
 #ifdef INET
 	struct ip_moptions 	 cif_imo;
 #endif
 #ifdef INET6
 	struct ip6_moptions 	 cif_im6o;
 #endif
 	struct ifnet	*cif_ifp;
 	struct mtx	cif_mtx;
 	uint32_t	cif_flags;
 #define	CIF_PROMISC	0x00000001
 };
 
 #define	CARP_INET	0
 #define	CARP_INET6	1
 static int proto_reg[] = {-1, -1};
 
 /*
  * Brief design of carp(4).
  *
  * Any carp-capable ifnet may have a list of carp softcs hanging off
  * its ifp->if_carp pointer. Each softc represents one unique virtual
  * host id, or vhid. The softc has a back pointer to the ifnet. All
  * softcs are joined in a global list, which has quite limited use.
  *
  * Any interface address that takes part in CARP negotiation has a
  * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
  * AF_INET or AF_INET6 address.
  *
  * Although, one can get the softc's backpointer to ifnet and traverse
  * through its ifp->if_addrhead queue to find all interface addresses
  * involved in CARP, we keep a growable array of ifaddr pointers. This
  * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
  * do calls into the network stack, thus avoiding LORs.
  *
  * Locking:
  *
  * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
  * callout-driven events and ioctl()s.
  *
  * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx.
  * To traverse the global list we use the mutex carp_mtx.
  *
  * Known issues with locking:
  *
  * - Sending ad, we put the pointer to the softc in an mtag, and no reference
  *   counting is done on the softc.
  * - On module unload we may race (?) with packet processing thread
  *   dereferencing our function pointers.
  */
 
 /* Accept incoming CARP packets. */
 VNET_DEFINE_STATIC(int, carp_allow) = 1;
 #define	V_carp_allow	VNET(carp_allow)
 
 /* Set DSCP in outgoing CARP packets. */
 VNET_DEFINE_STATIC(int, carp_dscp) = 56;
 #define	V_carp_dscp	VNET(carp_dscp)
 
 /* Preempt slower nodes. */
 VNET_DEFINE_STATIC(int, carp_preempt) = 0;
 #define	V_carp_preempt	VNET(carp_preempt)
 
 /* Log level. */
 VNET_DEFINE_STATIC(int, carp_log) = 1;
 #define	V_carp_log	VNET(carp_log)
 
 /* Global advskew demotion. */
 VNET_DEFINE_STATIC(int, carp_demotion) = 0;
 #define	V_carp_demotion	VNET(carp_demotion)
 
 /* Send error demotion factor. */
 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW;
 #define	V_carp_senderr_adj	VNET(carp_senderr_adj)
 
 /* Iface down demotion factor. */
 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW;
 #define	V_carp_ifdown_adj	VNET(carp_ifdown_adj)
 
 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS);
 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS);
 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "CARP");
 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, carp_allow_sysctl, "I",
     "Accept incoming CARP packets");
 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, carp_dscp_sysctl, "I",
     "DSCP value for carp packets");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(carp_log), 0, "CARP log level");
 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, carp_demote_adj_sysctl, "I",
     "Adjust demotion factor (skew of advskew)");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
     CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor,
     CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(carp_ifdown_adj), 0,
     "Interface down demotion factor adjustment");
 
 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
 VNET_PCPUSTAT_SYSINIT(carpstats);
 VNET_PCPUSTAT_SYSUNINIT(carpstats);
 
 #define	CARPSTATS_ADD(name, val)	\
     counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
 	sizeof(uint64_t)], (val))
 #define	CARPSTATS_INC(name)		CARPSTATS_ADD(name, 1)
 
 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
     carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
 
 #define	CARP_LOCK_INIT(sc)	mtx_init(&(sc)->sc_mtx, "carp_softc",   \
 	NULL, MTX_DEF)
 #define	CARP_LOCK_DESTROY(sc)	mtx_destroy(&(sc)->sc_mtx)
 #define	CARP_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
 #define	CARP_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
 #define	CARP_UNLOCK(sc)		mtx_unlock(&(sc)->sc_mtx)
 #define	CIF_LOCK_INIT(cif)	mtx_init(&(cif)->cif_mtx, "carp_if",   \
 	NULL, MTX_DEF)
 #define	CIF_LOCK_DESTROY(cif)	mtx_destroy(&(cif)->cif_mtx)
 #define	CIF_LOCK_ASSERT(cif)	mtx_assert(&(cif)->cif_mtx, MA_OWNED)
 #define	CIF_LOCK(cif)		mtx_lock(&(cif)->cif_mtx)
 #define	CIF_UNLOCK(cif)		mtx_unlock(&(cif)->cif_mtx)
 #define	CIF_FREE(cif)	do {				\
 		CIF_LOCK(cif);				\
 		if (TAILQ_EMPTY(&(cif)->cif_vrs))	\
 			carp_free_if(cif);		\
 		else					\
 			CIF_UNLOCK(cif);		\
 } while (0)
 
 #define	CARP_LOG(...)	do {				\
 	if (V_carp_log > 0)				\
 		log(LOG_INFO, "carp: " __VA_ARGS__);	\
 } while (0)
 
 #define	CARP_DEBUG(...)	do {				\
 	if (V_carp_log > 1)				\
 		log(LOG_DEBUG, __VA_ARGS__);		\
 } while (0)
 
 #define	IFNET_FOREACH_IFA(ifp, ifa)					\
 	CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link)	\
 		if ((ifa)->ifa_carp != NULL)
 
 #define	CARP_FOREACH_IFA(sc, ifa)					\
 	CARP_LOCK_ASSERT(sc);						\
 	for (int _i = 0;						\
 		_i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&		\
 		((ifa) = sc->sc_ifas[_i]) != NULL;			\
 		++_i)
 
 #define	IFNET_FOREACH_CARP(ifp, sc)					\
 	KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) ||			\
 	    sx_xlocked(&carp_sx), ("cif_vrs not locked"));		\
 	TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
 
 #define	DEMOTE_ADVSKEW(sc)					\
     (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ?	\
     CARP_MAXSKEW :						\
         (((sc)->sc_advskew + V_carp_demotion < 0) ?		\
         0 : ((sc)->sc_advskew + V_carp_demotion)))
 
 static void	carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
 static struct carp_softc
 		*carp_alloc(struct ifnet *);
 static void	carp_destroy(struct carp_softc *);
 static struct carp_if
 		*carp_alloc_if(struct ifnet *);
 static void	carp_free_if(struct carp_if *);
 static void	carp_set_state(struct carp_softc *, int, const char* reason);
 static void	carp_sc_state(struct carp_softc *);
 static void	carp_setrun(struct carp_softc *, sa_family_t);
 static void	carp_master_down(void *);
 static void	carp_master_down_locked(struct carp_softc *,
     		    const char* reason);
 static void	carp_send_ad(void *);
 static void	carp_send_ad_locked(struct carp_softc *);
 static void	carp_addroute(struct carp_softc *);
 static void	carp_ifa_addroute(struct ifaddr *);
 static void	carp_delroute(struct carp_softc *);
 static void	carp_ifa_delroute(struct ifaddr *);
 static void	carp_send_ad_all(void *, int);
 static void	carp_demote_adj(int, char *);
 
 static LIST_HEAD(, carp_softc) carp_list;
 static struct mtx carp_mtx;
 static struct sx carp_sx;
 static struct task carp_sendall_task =
     TASK_INITIALIZER(0, carp_send_ad_all, NULL);
 
 static void
 carp_hmac_prepare(struct carp_softc *sc)
 {
 	uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
 	uint8_t vhid = sc->sc_vhid & 0xff;
 	struct ifaddr *ifa;
 	int i, found;
 #ifdef INET
 	struct in_addr last, cur, in;
 #endif
 #ifdef INET6
 	struct in6_addr last6, cur6, in6;
 #endif
 
 	CARP_LOCK_ASSERT(sc);
 
 	/* Compute ipad from key. */
 	bzero(sc->sc_pad, sizeof(sc->sc_pad));
 	bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
 	for (i = 0; i < sizeof(sc->sc_pad); i++)
 		sc->sc_pad[i] ^= 0x36;
 
 	/* Precompute first part of inner hash. */
 	SHA1Init(&sc->sc_sha1);
 	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
 	SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
 	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
 	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
 #ifdef INET
 	cur.s_addr = 0;
 	do {
 		found = 0;
 		last = cur;
 		cur.s_addr = 0xffffffff;
 		CARP_FOREACH_IFA(sc, ifa) {
 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
 			if (ifa->ifa_addr->sa_family == AF_INET &&
 			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
 				cur.s_addr = in.s_addr;
 				found++;
 			}
 		}
 		if (found)
 			SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
 	} while (found);
 #endif /* INET */
 #ifdef INET6
 	memset(&cur6, 0, sizeof(cur6));
 	do {
 		found = 0;
 		last6 = cur6;
 		memset(&cur6, 0xff, sizeof(cur6));
 		CARP_FOREACH_IFA(sc, ifa) {
 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
 			if (IN6_IS_SCOPE_EMBED(&in6))
 				in6.s6_addr16[1] = 0;
 			if (ifa->ifa_addr->sa_family == AF_INET6 &&
 			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
 				cur6 = in6;
 				found++;
 			}
 		}
 		if (found)
 			SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
 	} while (found);
 #endif /* INET6 */
 
 	/* convert ipad to opad */
 	for (i = 0; i < sizeof(sc->sc_pad); i++)
 		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
 }
 
 static void
 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
     unsigned char md[20])
 {
 	SHA1_CTX sha1ctx;
 
 	CARP_LOCK_ASSERT(sc);
 
 	/* fetch first half of inner hash */
 	bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
 
 	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
 	SHA1Final(md, &sha1ctx);
 
 	/* outer hash */
 	SHA1Init(&sha1ctx);
 	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
 	SHA1Update(&sha1ctx, md, 20);
 	SHA1Final(md, &sha1ctx);
 }
 
 static int
 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
     unsigned char md[20])
 {
 	unsigned char md2[20];
 
 	CARP_LOCK_ASSERT(sc);
 
 	carp_hmac_generate(sc, counter, md2);
 
 	return (bcmp(md, md2, sizeof(md2)));
 }
 
 /*
  * process input packet.
  * we have rearranged checks order compared to the rfc,
  * but it seems more efficient this way or not possible otherwise.
  */
 #ifdef INET
 int
 carp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ip *ip = mtod(m, struct ip *);
 	struct carp_header *ch;
 	int iplen, len;
 
 	iplen = *offp;
 	*mp = NULL;
 
 	CARPSTATS_INC(carps_ipackets);
 
 	if (!V_carp_allow) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* verify that the IP TTL is 255.  */
 	if (ip->ip_ttl != CARP_DFLTTL) {
 		CARPSTATS_INC(carps_badttl);
 		CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
 		    ip->ip_ttl,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	iplen = ip->ip_hl << 2;
 
 	if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
 		CARPSTATS_INC(carps_badlen);
 		CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
 		    "on %s\n", __func__, m->m_len - sizeof(struct ip),
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if (iplen + sizeof(*ch) < m->m_len) {
 		if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
 			CARPSTATS_INC(carps_hdrops);
 			CARP_DEBUG("%s: pullup failed\n", __func__);
 			return (IPPROTO_DONE);
 		}
 		ip = mtod(m, struct ip *);
 	}
 	ch = (struct carp_header *)((char *)ip + iplen);
 
 	/*
 	 * verify that the received packet length is
 	 * equal to the CARP header
 	 */
 	len = iplen + sizeof(*ch);
 	if (len > m->m_pkthdr.len) {
 		CARPSTATS_INC(carps_badlen);
 		CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
 		    m->m_pkthdr.len,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if ((m = m_pullup(m, len)) == NULL) {
 		CARPSTATS_INC(carps_hdrops);
 		return (IPPROTO_DONE);
 	}
 	ip = mtod(m, struct ip *);
 	ch = (struct carp_header *)((char *)ip + iplen);
 
 	/* verify the CARP checksum */
 	m->m_data += iplen;
 	if (in_cksum(m, len - iplen)) {
 		CARPSTATS_INC(carps_badsum);
 		CARP_DEBUG("%s: checksum failed on %s\n", __func__,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 	m->m_data -= iplen;
 
 	carp_input_c(m, ch, AF_INET);
 	return (IPPROTO_DONE);
 }
 #endif
 
 #ifdef INET6
 int
 carp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct carp_header *ch;
 	u_int len;
 
 	CARPSTATS_INC(carps_ipackets6);
 
 	if (!V_carp_allow) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* check if received on a valid carp interface */
 	if (m->m_pkthdr.rcvif->if_carp == NULL) {
 		CARPSTATS_INC(carps_badif);
 		CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
 		    __func__, m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* verify that the IP TTL is 255 */
 	if (ip6->ip6_hlim != CARP_DFLTTL) {
 		CARPSTATS_INC(carps_badttl);
 		CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
 		    ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* verify that we have a complete carp packet */
 	if (m->m_len < *offp + sizeof(*ch)) {
 		len = m->m_len;
 		m = m_pullup(m, *offp + sizeof(*ch));
 		if (m == NULL) {
 			CARPSTATS_INC(carps_badlen);
 			CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
 			return (IPPROTO_DONE);
 		}
 	}
 	ch = (struct carp_header *)(mtod(m, char *) + *offp);
 
 	/* verify the CARP checksum */
 	m->m_data += *offp;
 	if (in_cksum(m, sizeof(*ch))) {
 		CARPSTATS_INC(carps_badsum);
 		CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 	m->m_data -= *offp;
 
 	carp_input_c(m, ch, AF_INET6);
 	return (IPPROTO_DONE);
 }
 #endif /* INET6 */
 
 /*
  * This routine should not be necessary at all, but some switches
  * (VMWare ESX vswitches) can echo our own packets back at us,
  * and we must ignore them or they will cause us to drop out of
  * MASTER mode.
  *
  * We cannot catch all cases of network loops.  Instead, what we
  * do here is catch any packet that arrives with a carp header
  * with a VHID of 0, that comes from an address that is our own.
  * These packets are by definition "from us" (even if they are from
  * a misconfigured host that is pretending to be us).
  *
  * The VHID test is outside this mini-function.
  */
 static int
 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
 {
 #ifdef INET
 	struct ip *ip4;
 	struct in_addr in4;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	struct in6_addr in6;
 #endif
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		ip4 = mtod(m, struct ip *);
 		in4 = ifatoia(ifa)->ia_addr.sin_addr;
 		return (in4.s_addr == ip4->ip_src.s_addr);
 #endif
 #ifdef INET6
 	case AF_INET6:
 		ip6 = mtod(m, struct ip6_hdr *);
 		in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
 		return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0);
 #endif
 	default:
 		break;
 	}
 	return (0);
 }
 
 static void
 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ifaddr *ifa, *match;
 	struct carp_softc *sc;
 	uint64_t tmp_counter;
 	struct timeval sc_tv, ch_tv;
 	int error;
 
 	NET_EPOCH_ASSERT();
 
 	/*
 	 * Verify that the VHID is valid on the receiving interface.
 	 *
 	 * There should be just one match.  If there are none
 	 * the VHID is not valid and we drop the packet.  If
 	 * there are multiple VHID matches, take just the first
 	 * one, for compatibility with previous code.  While we're
 	 * scanning, check for obvious loops in the network topology
 	 * (these should never happen, and as noted above, we may
 	 * miss real loops; this is just a double-check).
 	 */
 	error = 0;
 	match = NULL;
 	IFNET_FOREACH_IFA(ifp, ifa) {
 		if (match == NULL && ifa->ifa_carp != NULL &&
 		    ifa->ifa_addr->sa_family == af &&
 		    ifa->ifa_carp->sc_vhid == ch->carp_vhid)
 			match = ifa;
 		if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af))
 			error = ELOOP;
 	}
 	ifa = error ? NULL : match;
 	if (ifa != NULL)
 		ifa_ref(ifa);
 
 	if (ifa == NULL) {
 		if (error == ELOOP) {
 			CARP_DEBUG("dropping looped packet on interface %s\n",
 			    ifp->if_xname);
 			CARPSTATS_INC(carps_badif);	/* ??? */
 		} else {
 			CARPSTATS_INC(carps_badvhid);
 		}
 		m_freem(m);
 		return;
 	}
 
 	/* verify the CARP version. */
 	if (ch->carp_version != CARP_VERSION) {
 		CARPSTATS_INC(carps_badver);
 		CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
 		    ch->carp_version);
 		ifa_free(ifa);
 		m_freem(m);
 		return;
 	}
 
 	sc = ifa->ifa_carp;
 	CARP_LOCK(sc);
 	ifa_free(ifa);
 
 	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
 		CARPSTATS_INC(carps_badauth);
 		CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
 		    sc->sc_vhid, ifp->if_xname);
 		goto out;
 	}
 
 	tmp_counter = ntohl(ch->carp_counter[0]);
 	tmp_counter = tmp_counter<<32;
 	tmp_counter += ntohl(ch->carp_counter[1]);
 
 	/* XXX Replay protection goes here */
 
 	sc->sc_init_counter = 0;
 	sc->sc_counter = tmp_counter;
 
 	sc_tv.tv_sec = sc->sc_advbase;
 	sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
 	ch_tv.tv_sec = ch->carp_advbase;
 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
 
 	switch (sc->sc_state) {
 	case INIT:
 		break;
 	case MASTER:
 		/*
 		 * If we receive an advertisement from a master who's going to
 		 * be more frequent than us, go into BACKUP state.
 		 */
 		if (timevalcmp(&sc_tv, &ch_tv, >) ||
 		    timevalcmp(&sc_tv, &ch_tv, ==)) {
 			callout_stop(&sc->sc_ad_tmo);
 			carp_set_state(sc, BACKUP,
 			    "more frequent advertisement received");
 			carp_setrun(sc, 0);
 			carp_delroute(sc);
 		}
 		break;
 	case BACKUP:
 		/*
 		 * If we're pre-empting masters who advertise slower than us,
 		 * and this one claims to be slower, treat him as down.
 		 */
 		if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
 			carp_master_down_locked(sc,
 			    "preempting a slower master");
 			break;
 		}
 
 		/*
 		 *  If the master is going to advertise at such a low frequency
 		 *  that he's guaranteed to time out, we'd might as well just
 		 *  treat him as timed out now.
 		 */
 		sc_tv.tv_sec = sc->sc_advbase * 3;
 		if (timevalcmp(&sc_tv, &ch_tv, <)) {
 			carp_master_down_locked(sc, "master will time out");
 			break;
 		}
 
 		/*
 		 * Otherwise, we reset the counter and wait for the next
 		 * advertisement.
 		 */
 		carp_setrun(sc, af);
 		break;
 	}
 
 out:
 	CARP_UNLOCK(sc);
 	m_freem(m);
 }
 
 static int
 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
 {
 	struct m_tag *mtag;
 
 	if (sc->sc_init_counter) {
 		/* this could also be seconds since unix epoch */
 		sc->sc_counter = arc4random();
 		sc->sc_counter = sc->sc_counter << 32;
 		sc->sc_counter += arc4random();
 	} else
 		sc->sc_counter++;
 
 	ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
 	ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
 
 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
 
 	/* Tag packet for carp_output */
 	if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
 	    M_NOWAIT)) == NULL) {
 		m_freem(m);
 		CARPSTATS_INC(carps_onomem);
 		return (ENOMEM);
 	}
 	bcopy(&sc, mtag + 1, sizeof(sc));
 	m_tag_prepend(m, mtag);
 
 	return (0);
 }
 
 /*
  * To avoid LORs and possible recursions this function shouldn't
  * be called directly, but scheduled via taskqueue.
  */
 static void
 carp_send_ad_all(void *ctx __unused, int pending __unused)
 {
 	struct carp_softc *sc;
 	struct epoch_tracker et;
 
 	NET_EPOCH_ENTER(et);
 	mtx_lock(&carp_mtx);
 	LIST_FOREACH(sc, &carp_list, sc_next)
 		if (sc->sc_state == MASTER) {
 			CARP_LOCK(sc);
 			CURVNET_SET(sc->sc_carpdev->if_vnet);
 			carp_send_ad_locked(sc);
 			CURVNET_RESTORE();
 			CARP_UNLOCK(sc);
 		}
 	mtx_unlock(&carp_mtx);
 	NET_EPOCH_EXIT(et);
 }
 
 /* Send a periodic advertisement, executed in callout context. */
 static void
 carp_send_ad(void *v)
 {
 	struct carp_softc *sc = v;
 	struct epoch_tracker et;
 
 	NET_EPOCH_ENTER(et);
 	CARP_LOCK_ASSERT(sc);
 	CURVNET_SET(sc->sc_carpdev->if_vnet);
 	carp_send_ad_locked(sc);
 	CURVNET_RESTORE();
 	CARP_UNLOCK(sc);
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 carp_send_ad_error(struct carp_softc *sc, int error)
 {
 
 	/*
 	 * We track errors and successfull sends with this logic:
 	 * - Any error resets success counter to 0.
 	 * - MAX_ERRORS triggers demotion.
 	 * - MIN_SUCCESS successes resets error counter to 0.
 	 * - MIN_SUCCESS reverts demotion, if it was triggered before.
 	 */
 	if (error) {
 		if (sc->sc_sendad_errors < INT_MAX)
 			sc->sc_sendad_errors++;
 		if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
 			static const char fmt[] = "send error %d on %s";
 			char msg[sizeof(fmt) + IFNAMSIZ];
 
 			sprintf(msg, fmt, error, sc->sc_carpdev->if_xname);
 			carp_demote_adj(V_carp_senderr_adj, msg);
 		}
 		sc->sc_sendad_success = 0;
 	} else if (sc->sc_sendad_errors > 0) {
 		if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
 				static const char fmt[] = "send ok on %s";
 				char msg[sizeof(fmt) + IFNAMSIZ];
 
 				sprintf(msg, fmt, sc->sc_carpdev->if_xname);
 				carp_demote_adj(-V_carp_senderr_adj, msg);
 			}
 			sc->sc_sendad_errors = 0;
 		}
 	}
 }
 
 /*
  * Pick the best ifaddr on the given ifp for sending CARP
  * advertisements.
  *
  * "Best" here is defined by ifa_preferred().  This function is much
  * much like ifaof_ifpforaddr() except that we just use ifa_preferred().
  *
  * (This could be simplified to return the actual address, except that
  * it has a different format in AF_INET and AF_INET6.)
  */
 static struct ifaddr *
 carp_best_ifa(int af, struct ifnet *ifp)
 {
 	struct ifaddr *ifa, *best;
 
 	NET_EPOCH_ASSERT();
 
 	if (af >= AF_MAX)
 		return (NULL);
 	best = NULL;
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family == af &&
 		    (best == NULL || ifa_preferred(best, ifa)))
 			best = ifa;
 	}
 	if (best != NULL)
 		ifa_ref(best);
 	return (best);
 }
 
 static void
 carp_send_ad_locked(struct carp_softc *sc)
 {
 	struct carp_header ch;
 	struct timeval tv;
 	struct ifaddr *ifa;
 	struct carp_header *ch_ptr;
 	struct mbuf *m;
 	int len, advskew;
 
 	NET_EPOCH_ASSERT();
 	CARP_LOCK_ASSERT(sc);
 
 	advskew = DEMOTE_ADVSKEW(sc);
 	tv.tv_sec = sc->sc_advbase;
 	tv.tv_usec = advskew * 1000000 / 256;
 
 	ch.carp_version = CARP_VERSION;
 	ch.carp_type = CARP_ADVERTISEMENT;
 	ch.carp_vhid = sc->sc_vhid;
 	ch.carp_advbase = sc->sc_advbase;
 	ch.carp_advskew = advskew;
 	ch.carp_authlen = 7;	/* XXX DEFINE */
 	ch.carp_pad1 = 0;	/* must be zero */
 	ch.carp_cksum = 0;
 
 	/* XXXGL: OpenBSD picks first ifaddr with needed family. */
 
 #ifdef INET
 	if (sc->sc_naddrs) {
 		struct ip *ip;
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			CARPSTATS_INC(carps_onomem);
 			goto resched;
 		}
 		len = sizeof(*ip) + sizeof(ch);
 		m->m_pkthdr.len = len;
 		m->m_pkthdr.rcvif = NULL;
 		m->m_len = len;
 		M_ALIGN(m, m->m_len);
 		m->m_flags |= M_MCAST;
 		ip = mtod(m, struct ip *);
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(*ip) >> 2;
 		ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET;
 		ip->ip_len = htons(len);
 		ip->ip_off = htons(IP_DF);
 		ip->ip_ttl = CARP_DFLTTL;
 		ip->ip_p = IPPROTO_CARP;
 		ip->ip_sum = 0;
 		ip_fillid(ip);
 
 		ifa = carp_best_ifa(AF_INET, sc->sc_carpdev);
 		if (ifa != NULL) {
 			ip->ip_src.s_addr =
 			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
 			ifa_free(ifa);
 		} else
 			ip->ip_src.s_addr = 0;
 		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
 
 		ch_ptr = (struct carp_header *)(&ip[1]);
 		bcopy(&ch, ch_ptr, sizeof(ch));
 		if (carp_prepare_ad(m, sc, ch_ptr))
 			goto resched;
 
 		m->m_data += sizeof(*ip);
 		ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
 		m->m_data -= sizeof(*ip);
 
 		CARPSTATS_INC(carps_opackets);
 
 		carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
 		    &sc->sc_carpdev->if_carp->cif_imo, NULL));
 	}
 #endif /* INET */
 #ifdef INET6
 	if (sc->sc_naddrs6) {
 		struct ip6_hdr *ip6;
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			CARPSTATS_INC(carps_onomem);
 			goto resched;
 		}
 		len = sizeof(*ip6) + sizeof(ch);
 		m->m_pkthdr.len = len;
 		m->m_pkthdr.rcvif = NULL;
 		m->m_len = len;
 		M_ALIGN(m, m->m_len);
 		m->m_flags |= M_MCAST;
 		ip6 = mtod(m, struct ip6_hdr *);
 		bzero(ip6, sizeof(*ip6));
 		ip6->ip6_vfc |= IPV6_VERSION;
 		/* Traffic class isn't defined in ip6 struct instead
 		 * it gets offset into flowid field */
 		ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN +
 		    IPTOS_DSCP_OFFSET));
 		ip6->ip6_hlim = CARP_DFLTTL;
 		ip6->ip6_nxt = IPPROTO_CARP;
 
 		/* set the source address */
 		ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev);
 		if (ifa != NULL) {
 			bcopy(IFA_IN6(ifa), &ip6->ip6_src,
 			    sizeof(struct in6_addr));
 			ifa_free(ifa);
 		} else
 			/* This should never happen with IPv6. */
 			bzero(&ip6->ip6_src, sizeof(struct in6_addr));
 
 		/* Set the multicast destination. */
 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
 		ip6->ip6_dst.s6_addr8[15] = 0x12;
 		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
 			m_freem(m);
 			CARP_DEBUG("%s: in6_setscope failed\n", __func__);
 			goto resched;
 		}
 
 		ch_ptr = (struct carp_header *)(&ip6[1]);
 		bcopy(&ch, ch_ptr, sizeof(ch));
 		if (carp_prepare_ad(m, sc, ch_ptr))
 			goto resched;
 
 		m->m_data += sizeof(*ip6);
 		ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
 		m->m_data -= sizeof(*ip6);
 
 		CARPSTATS_INC(carps_opackets6);
 
 		carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
 		    &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
 	}
 #endif /* INET6 */
 
 resched:
 	callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
 }
 
 static void
 carp_addroute(struct carp_softc *sc)
 {
 	struct ifaddr *ifa;
 
 	CARP_FOREACH_IFA(sc, ifa)
 		carp_ifa_addroute(ifa);
 }
 
 static void
 carp_ifa_addroute(struct ifaddr *ifa)
 {
 
 	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 		in_addprefix(ifatoia(ifa));
 		ifa_add_loopback_route(ifa,
 		    (struct sockaddr *)&ifatoia(ifa)->ia_addr);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		ifa_add_loopback_route(ifa,
 		    (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
 		nd6_add_ifa_lle(ifatoia6(ifa));
 		break;
 #endif
 	}
 }
 
 static void
 carp_delroute(struct carp_softc *sc)
 {
 	struct ifaddr *ifa;
 
 	CARP_FOREACH_IFA(sc, ifa)
 		carp_ifa_delroute(ifa);
 }
 
 static void
 carp_ifa_delroute(struct ifaddr *ifa)
 {
 
 	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 		ifa_del_loopback_route(ifa,
 		    (struct sockaddr *)&ifatoia(ifa)->ia_addr);
 		in_scrubprefix(ifatoia(ifa), LLE_STATIC);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		ifa_del_loopback_route(ifa,
 		    (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
 		nd6_rem_ifa_lle(ifatoia6(ifa), 1);
 		break;
 #endif
 	}
 }
 
 int
 carp_master(struct ifaddr *ifa)
 {
 	struct carp_softc *sc = ifa->ifa_carp;
 
 	return (sc->sc_state == MASTER);
 }
 
 #ifdef INET
 /*
  * Broadcast a gratuitous ARP request containing
  * the virtual router MAC address for each IP address
  * associated with the virtual router.
  */
 static void
 carp_send_arp(struct carp_softc *sc)
 {
 	struct ifaddr *ifa;
 	struct in_addr addr;
 
 	NET_EPOCH_ASSERT();
 
 	CARP_FOREACH_IFA(sc, ifa) {
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 		addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
 		arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr));
 	}
 }
 
 int
 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
 {
 	struct carp_softc *sc = ifa->ifa_carp;
 
 	if (sc->sc_state == MASTER) {
 		*enaddr = LLADDR(&sc->sc_addr);
 		return (1);
 	}
 
 	return (0);
 }
 #endif
 
 #ifdef INET6
 static void
 carp_send_na(struct carp_softc *sc)
 {
 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 	struct ifaddr *ifa;
 	struct in6_addr *in6;
 
 	CARP_FOREACH_IFA(sc, ifa) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		in6 = IFA_IN6(ifa);
 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
 		DELAY(1000);	/* XXX */
 	}
 }
 
 /*
  * Returns ifa in case it's a carp address and it is MASTER, or if the address
  * matches and is not a carp address.  Returns NULL otherwise.
  */
 struct ifaddr *
 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
 {
 	struct ifaddr *ifa;
 
 	NET_EPOCH_ASSERT();
 
 	ifa = NULL;
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
 			continue;
 		if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
 			ifa = NULL;
 		else
 			ifa_ref(ifa);
 		break;
 	}
 
 	return (ifa);
 }
 
 char *
 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
 {
 	struct ifaddr *ifa;
 
 	NET_EPOCH_ASSERT();
 
 	IFNET_FOREACH_IFA(ifp, ifa)
 		if (ifa->ifa_addr->sa_family == AF_INET6 &&
 		    IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
 			struct carp_softc *sc = ifa->ifa_carp;
 			struct m_tag *mtag;
 
 			mtag = m_tag_get(PACKET_TAG_CARP,
 			    sizeof(struct carp_softc *), M_NOWAIT);
 			if (mtag == NULL)
 				/* Better a bit than nothing. */
 				return (LLADDR(&sc->sc_addr));
 
 			bcopy(&sc, mtag + 1, sizeof(sc));
 			m_tag_prepend(m, mtag);
 
 			return (LLADDR(&sc->sc_addr));
 		}
 
 	return (NULL);
 }
 #endif /* INET6 */
 
 int
 carp_forus(struct ifnet *ifp, u_char *dhost)
 {
 	struct carp_softc *sc;
 	uint8_t *ena = dhost;
 
 	if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
 		return (0);
 
 	CIF_LOCK(ifp->if_carp);
 	IFNET_FOREACH_CARP(ifp, sc) {
 		/*
 		 * CARP_LOCK() is not here, since would protect nothing, but
 		 * cause deadlock with if_bridge, calling this under its lock.
 		 */
 		if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
 		    ETHER_ADDR_LEN)) {
 			CIF_UNLOCK(ifp->if_carp);
 			return (1);
 		}
 	}
 	CIF_UNLOCK(ifp->if_carp);
 
 	return (0);
 }
 
 /* Master down timeout event, executed in callout context. */
 static void
 carp_master_down(void *v)
 {
 	struct carp_softc *sc = v;
 	struct epoch_tracker et;
 
 	NET_EPOCH_ENTER(et);
 	CARP_LOCK_ASSERT(sc);
 
 	CURVNET_SET(sc->sc_carpdev->if_vnet);
 	if (sc->sc_state == BACKUP) {
 		carp_master_down_locked(sc, "master timed out");
 	}
 	CURVNET_RESTORE();
 
 	CARP_UNLOCK(sc);
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 carp_master_down_locked(struct carp_softc *sc, const char *reason)
 {
 
 	NET_EPOCH_ASSERT();
 	CARP_LOCK_ASSERT(sc);
 
 	switch (sc->sc_state) {
 	case BACKUP:
 		carp_set_state(sc, MASTER, reason);
 		carp_send_ad_locked(sc);
 #ifdef INET
 		carp_send_arp(sc);
 #endif
 #ifdef INET6
 		carp_send_na(sc);
 #endif
 		carp_setrun(sc, 0);
 		carp_addroute(sc);
 		break;
 	case INIT:
 	case MASTER:
 #ifdef INVARIANTS
 		panic("carp: VHID %u@%s: master_down event in %s state\n",
 		    sc->sc_vhid,
 		    sc->sc_carpdev->if_xname,
 		    sc->sc_state ? "MASTER" : "INIT");
 #endif
 		break;
 	}
 }
 
 /*
  * When in backup state, af indicates whether to reset the master down timer
  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
  */
 static void
 carp_setrun(struct carp_softc *sc, sa_family_t af)
 {
 	struct timeval tv;
 
 	CARP_LOCK_ASSERT(sc);
 
 	if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
 	    sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 	    (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) ||
 	    !V_carp_allow)
 		return;
 
 	switch (sc->sc_state) {
 	case INIT:
 		carp_set_state(sc, BACKUP, "initialization complete");
 		carp_setrun(sc, 0);
 		break;
 	case BACKUP:
 		callout_stop(&sc->sc_ad_tmo);
 		tv.tv_sec = 3 * sc->sc_advbase;
 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 			    carp_master_down, sc);
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 			    carp_master_down, sc);
 			break;
 #endif
 		default:
 #ifdef INET
 			if (sc->sc_naddrs)
 				callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 				    carp_master_down, sc);
 #endif
 #ifdef INET6
 			if (sc->sc_naddrs6)
 				callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 				    carp_master_down, sc);
 #endif
 			break;
 		}
 		break;
 	case MASTER:
 		tv.tv_sec = sc->sc_advbase;
 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 		callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 		    carp_send_ad, sc);
 		break;
 	}
 }
 
 /*
  * Setup multicast structures.
  */
 static int
 carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
 {
 	struct ifnet *ifp = cif->cif_ifp;
 	int error = 0;
 
 	switch (sa) {
 #ifdef INET
 	case AF_INET:
 	    {
 		struct ip_moptions *imo = &cif->cif_imo;
 		struct in_mfilter *imf;
 		struct in_addr addr;
 
 		if (ip_mfilter_first(&imo->imo_head) != NULL)
 			return (0);
 
 		imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
 		ip_mfilter_init(&imo->imo_head);
 		imo->imo_multicast_vif = -1;
 
 		addr.s_addr = htonl(INADDR_CARP_GROUP);
 		if ((error = in_joingroup(ifp, &addr, NULL,
 		    &imf->imf_inm)) != 0) {
 			ip_mfilter_free(imf);
 			break;
 		}
 
 		ip_mfilter_insert(&imo->imo_head, imf);
 		imo->imo_multicast_ifp = ifp;
 		imo->imo_multicast_ttl = CARP_DFLTTL;
 		imo->imo_multicast_loop = 0;
 		break;
 	   }
 #endif
 #ifdef INET6
 	case AF_INET6:
 	    {
 		struct ip6_moptions *im6o = &cif->cif_im6o;
 		struct in6_mfilter *im6f[2];
 		struct in6_addr in6;
 
 		if (ip6_mfilter_first(&im6o->im6o_head))
 			return (0);
 
 		im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
 		im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
 
 		ip6_mfilter_init(&im6o->im6o_head);
 		im6o->im6o_multicast_hlim = CARP_DFLTTL;
 		im6o->im6o_multicast_ifp = ifp;
 
 		/* Join IPv6 CARP multicast group. */
 		bzero(&in6, sizeof(in6));
 		in6.s6_addr16[0] = htons(0xff02);
 		in6.s6_addr8[15] = 0x12;
 		if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
 			ip6_mfilter_free(im6f[0]);
 			ip6_mfilter_free(im6f[1]);
 			break;
 		}
 		if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) {
 			ip6_mfilter_free(im6f[0]);
 			ip6_mfilter_free(im6f[1]);
 			break;
 		}
 
 		/* Join solicited multicast address. */
 		bzero(&in6, sizeof(in6));
 		in6.s6_addr16[0] = htons(0xff02);
 		in6.s6_addr32[1] = 0;
 		in6.s6_addr32[2] = htonl(1);
 		in6.s6_addr32[3] = 0;
 		in6.s6_addr8[12] = 0xff;
 
 		if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
 			ip6_mfilter_free(im6f[0]);
 			ip6_mfilter_free(im6f[1]);
 			break;
 		}
 
 		if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) {
 			in6_leavegroup(im6f[0]->im6f_in6m, NULL);
 			ip6_mfilter_free(im6f[0]);
 			ip6_mfilter_free(im6f[1]);
 			break;
 		}
 		ip6_mfilter_insert(&im6o->im6o_head, im6f[0]);
 		ip6_mfilter_insert(&im6o->im6o_head, im6f[1]);
 		break;
 	    }
 #endif
 	}
 
 	return (error);
 }
 
 /*
  * Free multicast structures.
  */
 static void
 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
 {
 #ifdef INET
 	struct ip_moptions *imo = &cif->cif_imo;
 	struct in_mfilter *imf;
 #endif
 #ifdef INET6
 	struct ip6_moptions *im6o = &cif->cif_im6o;
 	struct in6_mfilter *im6f;
 #endif
 	sx_assert(&carp_sx, SA_XLOCKED);
 
 	switch (sa) {
 #ifdef INET
 	case AF_INET:
 		if (cif->cif_naddrs != 0)
 			break;
 
 		while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
 			ip_mfilter_remove(&imo->imo_head, imf);
 			in_leavegroup(imf->imf_inm, NULL);
 			ip_mfilter_free(imf);
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (cif->cif_naddrs6 != 0)
 			break;
 
 		while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
 			ip6_mfilter_remove(&im6o->im6o_head, im6f);
 			in6_leavegroup(im6f->im6f_in6m, NULL);
 			ip6_mfilter_free(im6f);
 		}
 		break;
 #endif
 	}
 }
 
 int
 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
 {
 	struct m_tag *mtag;
 	struct carp_softc *sc;
 
 	if (!sa)
 		return (0);
 
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		break;
 #endif
 	default:
 		return (0);
 	}
 
 	mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
 	if (mtag == NULL)
 		return (0);
 
 	bcopy(mtag + 1, &sc, sizeof(sc));
 
 	/* Set the source MAC address to the Virtual Router MAC Address. */
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_BRIDGE:
 	case IFT_L2VLAN: {
 			struct ether_header *eh;
 
 			eh = mtod(m, struct ether_header *);
 			eh->ether_shost[0] = 0;
 			eh->ether_shost[1] = 0;
 			eh->ether_shost[2] = 0x5e;
 			eh->ether_shost[3] = 0;
 			eh->ether_shost[4] = 1;
 			eh->ether_shost[5] = sc->sc_vhid;
 		}
 		break;
 	default:
 		printf("%s: carp is not supported for the %d interface type\n",
 		    ifp->if_xname, ifp->if_type);
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static struct carp_softc*
 carp_alloc(struct ifnet *ifp)
 {
 	struct carp_softc *sc;
 	struct carp_if *cif;
 
 	sx_assert(&carp_sx, SA_XLOCKED);
 
 	if ((cif = ifp->if_carp) == NULL)
 		cif = carp_alloc_if(ifp);
 
 	sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
 
 	sc->sc_advbase = CARP_DFLTINTV;
 	sc->sc_vhid = -1;	/* required setting */
 	sc->sc_init_counter = 1;
 	sc->sc_state = INIT;
 
 	sc->sc_ifasiz = sizeof(struct ifaddr *);
 	sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
 	sc->sc_carpdev = ifp;
 
 	CARP_LOCK_INIT(sc);
 #ifdef INET
 	callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 #endif
 #ifdef INET6
 	callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 #endif
 	callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 
 	CIF_LOCK(cif);
 	TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
 	CIF_UNLOCK(cif);
 
 	mtx_lock(&carp_mtx);
 	LIST_INSERT_HEAD(&carp_list, sc, sc_next);
 	mtx_unlock(&carp_mtx);
 
 	return (sc);
 }
 
 static void
 carp_grow_ifas(struct carp_softc *sc)
 {
 	struct ifaddr **new;
 
 	new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO);
 	CARP_LOCK(sc);
 	bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
 	free(sc->sc_ifas, M_CARP);
 	sc->sc_ifas = new;
 	sc->sc_ifasiz *= 2;
 	CARP_UNLOCK(sc);
 }
 
 static void
 carp_destroy(struct carp_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_carpdev;
 	struct carp_if *cif = ifp->if_carp;
 
 	sx_assert(&carp_sx, SA_XLOCKED);
 
 	if (sc->sc_suppress)
 		carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
 	CARP_UNLOCK(sc);
 
 	CIF_LOCK(cif);
 	TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
 	CIF_UNLOCK(cif);
 
 	mtx_lock(&carp_mtx);
 	LIST_REMOVE(sc, sc_next);
 	mtx_unlock(&carp_mtx);
 
 	callout_drain(&sc->sc_ad_tmo);
 #ifdef INET
 	callout_drain(&sc->sc_md_tmo);
 #endif
 #ifdef INET6
 	callout_drain(&sc->sc_md6_tmo);
 #endif
 	CARP_LOCK_DESTROY(sc);
 
 	free(sc->sc_ifas, M_CARP);
 	free(sc, M_CARP);
 }
 
 static struct carp_if*
 carp_alloc_if(struct ifnet *ifp)
 {
 	struct carp_if *cif;
 	int error;
 
 	cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
 
 	if ((error = ifpromisc(ifp, 1)) != 0)
 		printf("%s: ifpromisc(%s) failed: %d\n",
 		    __func__, ifp->if_xname, error);
 	else
 		cif->cif_flags |= CIF_PROMISC;
 
 	CIF_LOCK_INIT(cif);
 	cif->cif_ifp = ifp;
 	TAILQ_INIT(&cif->cif_vrs);
 
 	IF_ADDR_WLOCK(ifp);
 	ifp->if_carp = cif;
 	if_ref(ifp);
 	IF_ADDR_WUNLOCK(ifp);
 
 	return (cif);
 }
 
 static void
 carp_free_if(struct carp_if *cif)
 {
 	struct ifnet *ifp = cif->cif_ifp;
 
 	CIF_LOCK_ASSERT(cif);
 	KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
 	    __func__));
 
 	IF_ADDR_WLOCK(ifp);
 	ifp->if_carp = NULL;
 	IF_ADDR_WUNLOCK(ifp);
 
 	CIF_LOCK_DESTROY(cif);
 
 	if (cif->cif_flags & CIF_PROMISC)
 		ifpromisc(ifp, 0);
 	if_rele(ifp);
 
 	free(cif, M_CARP);
 }
 
 static void
 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
 {
 
 	CARP_LOCK(sc);
 	carpr->carpr_state = sc->sc_state;
 	carpr->carpr_vhid = sc->sc_vhid;
 	carpr->carpr_advbase = sc->sc_advbase;
 	carpr->carpr_advskew = sc->sc_advskew;
 	if (priv)
 		bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
 	else
 		bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
 	CARP_UNLOCK(sc);
 }
 
 int
 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
 {
 	struct carpreq carpr;
 	struct ifnet *ifp;
 	struct carp_softc *sc = NULL;
 	int error = 0, locked = 0;
 
 	if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr)))
 		return (error);
 
 	ifp = ifunit_ref(ifr->ifr_name);
 	if (ifp == NULL)
 		return (ENXIO);
 
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_L2VLAN:
 	case IFT_BRIDGE:
 		break;
 	default:
 		error = EOPNOTSUPP;
 		goto out;
 	}
 
 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 		error = EADDRNOTAVAIL;
 		goto out;
 	}
 
 	sx_xlock(&carp_sx);
 	switch (cmd) {
 	case SIOCSVH:
 		if ((error = priv_check(td, PRIV_NETINET_CARP)))
 			break;
 		if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
 		    carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
 			error = EINVAL;
 			break;
 		}
 
 		if (ifp->if_carp) {
 			IFNET_FOREACH_CARP(ifp, sc)
 				if (sc->sc_vhid == carpr.carpr_vhid)
 					break;
 		}
 		if (sc == NULL) {
 			sc = carp_alloc(ifp);
 			CARP_LOCK(sc);
 			sc->sc_vhid = carpr.carpr_vhid;
 			LLADDR(&sc->sc_addr)[0] = 0;
 			LLADDR(&sc->sc_addr)[1] = 0;
 			LLADDR(&sc->sc_addr)[2] = 0x5e;
 			LLADDR(&sc->sc_addr)[3] = 0;
 			LLADDR(&sc->sc_addr)[4] = 1;
 			LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
 		} else
 			CARP_LOCK(sc);
 		locked = 1;
 		if (carpr.carpr_advbase > 0) {
 			if (carpr.carpr_advbase > 255 ||
 			    carpr.carpr_advbase < CARP_DFLTINTV) {
 				error = EINVAL;
 				break;
 			}
 			sc->sc_advbase = carpr.carpr_advbase;
 		}
 		if (carpr.carpr_advskew >= 255) {
 			error = EINVAL;
 			break;
 		}
 		sc->sc_advskew = carpr.carpr_advskew;
 		if (carpr.carpr_key[0] != '\0') {
 			bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
 			carp_hmac_prepare(sc);
 		}
 		if (sc->sc_state != INIT &&
 		    carpr.carpr_state != sc->sc_state) {
 			switch (carpr.carpr_state) {
 			case BACKUP:
 				callout_stop(&sc->sc_ad_tmo);
 				carp_set_state(sc, BACKUP,
 				    "user requested via ifconfig");
 				carp_setrun(sc, 0);
 				carp_delroute(sc);
 				break;
 			case MASTER:
 				carp_master_down_locked(sc,
 				    "user requested via ifconfig");
 				break;
 			default:
 				break;
 			}
 		}
 		break;
 
 	case SIOCGVH:
 	    {
 		int priveleged;
 
 		if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
 			error = EINVAL;
 			break;
 		}
 		if (carpr.carpr_count < 1) {
 			error = EMSGSIZE;
 			break;
 		}
 		if (ifp->if_carp == NULL) {
 			error = ENOENT;
 			break;
 		}
 
 		priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
 		if (carpr.carpr_vhid != 0) {
 			IFNET_FOREACH_CARP(ifp, sc)
 				if (sc->sc_vhid == carpr.carpr_vhid)
 					break;
 			if (sc == NULL) {
 				error = ENOENT;
 				break;
 			}
 			carp_carprcp(&carpr, sc, priveleged);
 			error = copyout(&carpr, ifr_data_get_ptr(ifr),
 			    sizeof(carpr));
 		} else  {
 			int i, count;
 
 			count = 0;
 			IFNET_FOREACH_CARP(ifp, sc)
 				count++;
 
 			if (count > carpr.carpr_count) {
 				CIF_UNLOCK(ifp->if_carp);
 				error = EMSGSIZE;
 				break;
 			}
 
 			i = 0;
 			IFNET_FOREACH_CARP(ifp, sc) {
 				carp_carprcp(&carpr, sc, priveleged);
 				carpr.carpr_count = count;
 				error = copyout(&carpr,
 				    (char *)ifr_data_get_ptr(ifr) +
 				    (i * sizeof(carpr)), sizeof(carpr));
 				if (error) {
 					CIF_UNLOCK(ifp->if_carp);
 					break;
 				}
 				i++;
 			}
 		}
 		break;
 	    }
 	default:
 		error = EINVAL;
 	}
 	sx_xunlock(&carp_sx);
 
 out:
 	if (locked)
 		CARP_UNLOCK(sc);
 	if_rele(ifp);
 
 	return (error);
 }
 
 static int
 carp_get_vhid(struct ifaddr *ifa)
 {
 
 	if (ifa == NULL || ifa->ifa_carp == NULL)
 		return (0);
 
 	return (ifa->ifa_carp->sc_vhid);
 }
 
 int
 carp_attach(struct ifaddr *ifa, int vhid)
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct carp_if *cif = ifp->if_carp;
 	struct carp_softc *sc;
 	int index, error;
 
 	KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa));
 
 	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 #endif
 #ifdef INET6
 	case AF_INET6:
 #endif
 		break;
 	default:
 		return (EPROTOTYPE);
 	}
 
 	sx_xlock(&carp_sx);
 	if (ifp->if_carp == NULL) {
 		sx_xunlock(&carp_sx);
 		return (ENOPROTOOPT);
 	}
 
 	IFNET_FOREACH_CARP(ifp, sc)
 		if (sc->sc_vhid == vhid)
 			break;
 	if (sc == NULL) {
 		sx_xunlock(&carp_sx);
 		return (ENOENT);
 	}
 
 	error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
 	if (error) {
 		CIF_FREE(cif);
 		sx_xunlock(&carp_sx);
 		return (error);
 	}
 
 	index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
 	if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
 		carp_grow_ifas(sc);
 
 	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 		cif->cif_naddrs++;
 		sc->sc_naddrs++;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		cif->cif_naddrs6++;
 		sc->sc_naddrs6++;
 		break;
 #endif
 	}
 
 	ifa_ref(ifa);
 
 	CARP_LOCK(sc);
 	sc->sc_ifas[index - 1] = ifa;
 	ifa->ifa_carp = sc;
 	carp_hmac_prepare(sc);
 	carp_sc_state(sc);
 	CARP_UNLOCK(sc);
 
 	sx_xunlock(&carp_sx);
 
 	return (0);
 }
 
 void
 carp_detach(struct ifaddr *ifa, bool keep_cif)
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct carp_if *cif = ifp->if_carp;
 	struct carp_softc *sc = ifa->ifa_carp;
 	int i, index;
 
 	KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
 
 	sx_xlock(&carp_sx);
 
 	CARP_LOCK(sc);
 	/* Shift array. */
 	index = sc->sc_naddrs + sc->sc_naddrs6;
 	for (i = 0; i < index; i++)
 		if (sc->sc_ifas[i] == ifa)
 			break;
 	KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
 	for (; i < index - 1; i++)
 		sc->sc_ifas[i] = sc->sc_ifas[i+1];
 	sc->sc_ifas[index - 1] = NULL;
 
 	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 		cif->cif_naddrs--;
 		sc->sc_naddrs--;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		cif->cif_naddrs6--;
 		sc->sc_naddrs6--;
 		break;
 #endif
 	}
 
 	carp_ifa_delroute(ifa);
 	carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
 
 	ifa->ifa_carp = NULL;
 	ifa_free(ifa);
 
 	carp_hmac_prepare(sc);
 	carp_sc_state(sc);
 
 	if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)
 		carp_destroy(sc);
 	else
 		CARP_UNLOCK(sc);
 
 	if (!keep_cif)
 		CIF_FREE(cif);
 
 	sx_xunlock(&carp_sx);
 }
 
 static void
 carp_set_state(struct carp_softc *sc, int state, const char *reason)
 {
 
 	CARP_LOCK_ASSERT(sc);
 
 	if (sc->sc_state != state) {
 		const char *carp_states[] = { CARP_STATES };
 		char subsys[IFNAMSIZ+5];
 
 		snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
 		    sc->sc_carpdev->if_xname);
 
 		CARP_LOG("%s: %s -> %s (%s)\n", subsys,
 		    carp_states[sc->sc_state], carp_states[state], reason);
 
 		sc->sc_state = state;
 
 		devctl_notify("CARP", subsys, carp_states[state], NULL);
 	}
 }
 
 static void
 carp_linkstate(struct ifnet *ifp)
 {
 	struct carp_softc *sc;
 
 	CIF_LOCK(ifp->if_carp);
 	IFNET_FOREACH_CARP(ifp, sc) {
 		CARP_LOCK(sc);
 		carp_sc_state(sc);
 		CARP_UNLOCK(sc);
 	}
 	CIF_UNLOCK(ifp->if_carp);
 }
 
 static void
 carp_sc_state(struct carp_softc *sc)
 {
 
 	CARP_LOCK_ASSERT(sc);
 
 	if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 	    !(sc->sc_carpdev->if_flags & IFF_UP) ||
 	    !V_carp_allow) {
 		callout_stop(&sc->sc_ad_tmo);
 #ifdef INET
 		callout_stop(&sc->sc_md_tmo);
 #endif
 #ifdef INET6
 		callout_stop(&sc->sc_md6_tmo);
 #endif
 		carp_set_state(sc, INIT, "hardware interface down");
 		carp_setrun(sc, 0);
 		if (!sc->sc_suppress)
 			carp_demote_adj(V_carp_ifdown_adj, "interface down");
 		sc->sc_suppress = 1;
 	} else {
 		carp_set_state(sc, INIT, "hardware interface up");
 		carp_setrun(sc, 0);
 		if (sc->sc_suppress)
 			carp_demote_adj(-V_carp_ifdown_adj, "interface up");
 		sc->sc_suppress = 0;
 	}
 }
 
 static void
 carp_demote_adj(int adj, char *reason)
 {
 	atomic_add_int(&V_carp_demotion, adj);
 	CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
 	taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
 }
 
 static int
 carp_allow_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int new, error;
 	struct carp_softc *sc;
 
 	new = V_carp_allow;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	if (V_carp_allow != new) {
 		V_carp_allow = new;
 
 		mtx_lock(&carp_mtx);
 		LIST_FOREACH(sc, &carp_list, sc_next) {
 			CARP_LOCK(sc);
 			if (curvnet == sc->sc_carpdev->if_vnet)
 				carp_sc_state(sc);
 			CARP_UNLOCK(sc);
 		}
 		mtx_unlock(&carp_mtx);
 	}
 
 	return (0);
 }
 
 static int
 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int new, error;
 
 	new = V_carp_dscp;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	if (new < 0 || new > 63)
 		return (EINVAL);
 
 	V_carp_dscp = new;
 
 	return (0);
 }
 
 static int
 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int new, error;
 
 	new = V_carp_demotion;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	carp_demote_adj(new, "sysctl");
 
 	return (0);
 }
 
 #ifdef INET
 extern  struct domain inetdomain;
 static struct protosw in_carp_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_CARP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		carp_input,
-	.pr_output =		rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 #endif
 
 #ifdef INET6
 extern	struct domain inet6domain;
 static struct protosw in6_carp_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_CARP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		carp6_input,
-	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 };
 #endif
 
 static void
 carp_mod_cleanup(void)
 {
 
 #ifdef INET
 	if (proto_reg[CARP_INET] == 0) {
 		(void)ipproto_unregister(IPPROTO_CARP);
 		pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
 		proto_reg[CARP_INET] = -1;
 	}
 	carp_iamatch_p = NULL;
 #endif
 #ifdef INET6
 	if (proto_reg[CARP_INET6] == 0) {
 		(void)ip6proto_unregister(IPPROTO_CARP);
 		pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
 		proto_reg[CARP_INET6] = -1;
 	}
 	carp_iamatch6_p = NULL;
 	carp_macmatch6_p = NULL;
 #endif
 	carp_ioctl_p = NULL;
 	carp_attach_p = NULL;
 	carp_detach_p = NULL;
 	carp_get_vhid_p = NULL;
 	carp_linkstate_p = NULL;
 	carp_forus_p = NULL;
 	carp_output_p = NULL;
 	carp_demote_adj_p = NULL;
 	carp_master_p = NULL;
 	mtx_unlock(&carp_mtx);
 	taskqueue_drain(taskqueue_swi, &carp_sendall_task);
 	mtx_destroy(&carp_mtx);
 	sx_destroy(&carp_sx);
 }
 
 static int
 carp_mod_load(void)
 {
 	int err;
 
 	mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
 	sx_init(&carp_sx, "carp_sx");
 	LIST_INIT(&carp_list);
 	carp_get_vhid_p = carp_get_vhid;
 	carp_forus_p = carp_forus;
 	carp_output_p = carp_output;
 	carp_linkstate_p = carp_linkstate;
 	carp_ioctl_p = carp_ioctl;
 	carp_attach_p = carp_attach;
 	carp_detach_p = carp_detach;
 	carp_demote_adj_p = carp_demote_adj;
 	carp_master_p = carp_master;
 #ifdef INET6
 	carp_iamatch6_p = carp_iamatch6;
 	carp_macmatch6_p = carp_macmatch6;
 	proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
 	    (struct protosw *)&in6_carp_protosw);
 	if (proto_reg[CARP_INET6]) {
 		printf("carp: error %d attaching to PF_INET6\n",
 		    proto_reg[CARP_INET6]);
 		carp_mod_cleanup();
 		return (proto_reg[CARP_INET6]);
 	}
 	err = ip6proto_register(IPPROTO_CARP);
 	if (err) {
 		printf("carp: error %d registering with INET6\n", err);
 		carp_mod_cleanup();
 		return (err);
 	}
 #endif
 #ifdef INET
 	carp_iamatch_p = carp_iamatch;
 	proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
 	if (proto_reg[CARP_INET]) {
 		printf("carp: error %d attaching to PF_INET\n",
 		    proto_reg[CARP_INET]);
 		carp_mod_cleanup();
 		return (proto_reg[CARP_INET]);
 	}
 	err = ipproto_register(IPPROTO_CARP);
 	if (err) {
 		printf("carp: error %d registering with INET\n", err);
 		carp_mod_cleanup();
 		return (err);
 	}
 #endif
 	return (0);
 }
 
 static int
 carp_modevent(module_t mod, int type, void *data)
 {
 	switch (type) {
 	case MOD_LOAD:
 		return carp_mod_load();
 		/* NOTREACHED */
 	case MOD_UNLOAD:
 		mtx_lock(&carp_mtx);
 		if (LIST_EMPTY(&carp_list))
 			carp_mod_cleanup();
 		else {
 			mtx_unlock(&carp_mtx);
 			return (EBUSY);
 		}
 		break;
 
 	default:
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static moduledata_t carp_mod = {
 	"carp",
 	carp_modevent,
 	0
 };
 
 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c
index 6ccd90ac6fdd..b47b726a9a71 100644
--- a/sys/netinet6/in6_proto.c
+++ b/sys/netinet6/in6_proto.c
@@ -1,607 +1,600 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6_proto.c,v 1.91 2001/05/27 13:28:35 itojun Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_proto.c	8.1 (Berkeley) 6/10/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_ipstealth.h"
 #include "opt_sctp.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/radix.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/icmp6.h>
 
 #include <netinet/tcp.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet6/tcp6_var.h>
 #include <netinet6/raw_ip6.h>
 #include <netinet6/udp6_var.h>
 #include <netinet6/pim6_var.h>
 #include <netinet6/nd6.h>
 
 #ifdef SCTP
 #include <netinet/in_pcb.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctp.h>
 #include <netinet/sctp_var.h>
 #include <netinet6/sctp6_var.h>
 #endif /* SCTP */
 
 #include <netinet6/ip6protosw.h>
 
 /*
  * TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
  */
 FEATURE(inet6, "Internet Protocol version 6");
 
 extern	struct domain inet6domain;
 static	struct pr_usrreqs nousrreqs;
 
 #define PR_LISTEN	0
 #define PR_ABRTACPTDIS	0
 
 /* Spacer for loadable protocols. */
 #define IP6PROTOSPACER   			\
 {						\
 	.pr_domain =		&inet6domain,	\
 	.pr_protocol =		PROTO_SPACER,	\
 	.pr_usrreqs =		&nousrreqs	\
 }
 
 struct protosw inet6sw[] = {
 {
 	.pr_type =		0,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_IPV6,
 	.pr_flags =		PR_CAPATTACH,
 	.pr_slowtimo =		frag6_slowtimo,
 	.pr_drain =		frag6_drain,
 	.pr_usrreqs =		&nousrreqs,
 },
 {
 	.pr_type =		SOCK_DGRAM,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_UDP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_CAPATTACH,
 	.pr_input =		udp6_input,
 	.pr_ctlinput =		udp6_ctlinput,
 	.pr_ctloutput =		ip6_ctloutput,
 	.pr_usrreqs =		&udp6_usrreqs,
 },
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_TCP,
 	.pr_flags =		PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD|
 				    PR_LISTEN|PR_CAPATTACH,
 	.pr_input =		tcp6_input,
 	.pr_ctlinput =		tcp6_ctlinput,
 	.pr_ctloutput =		tcp_ctloutput,
 #ifndef INET	/* don't call initialization, timeout, and drain routines twice */
 	.pr_slowtimo =		tcp_slowtimo,
 	.pr_drain =		tcp_drain,
 #endif
 	.pr_usrreqs =		&tcp6_usrreqs,
 },
 #ifdef SCTP
 {
 	.pr_type =		SOCK_SEQPACKET,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_WANTRCVD|PR_LASTHDR,
 	.pr_input =		sctp6_input,
 	.pr_ctlinput =		sctp6_ctlinput,
 	.pr_ctloutput =	sctp_ctloutput,
 #ifndef INET	/* Do not call initialization and drain routines twice. */
 	.pr_drain =		sctp_drain,
 #endif
 	.pr_usrreqs =		&sctp6_usrreqs
 },
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD|PR_LASTHDR,
 	.pr_input =		sctp6_input,
 	.pr_ctlinput =		sctp6_ctlinput,
 	.pr_ctloutput =		sctp_ctloutput,
 	.pr_drain =		NULL, /* Covered by the SOCK_SEQPACKET entry. */
 	.pr_usrreqs =		&sctp6_usrreqs
 },
 #endif /* SCTP */
 {
 	.pr_type =		SOCK_DGRAM,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_UDPLITE,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_CAPATTACH,
 	.pr_input =		udp6_input,
 	.pr_ctlinput =		udplite6_ctlinput,
 	.pr_ctloutput =		udp_ctloutput,
 	.pr_usrreqs =		&udp6_usrreqs,
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_RAW,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		rip6_input,
-	.pr_output =		rip6_output,
 	.pr_ctlinput =		rip6_ctlinput,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_ICMPV6,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		icmp6_input,
-	.pr_output =		rip6_output,
 	.pr_ctlinput =		rip6_ctlinput,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_fasttimo =		icmp6_fasttimo,
 	.pr_slowtimo =		icmp6_slowtimo,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_DSTOPTS,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		dest6_input,
 	.pr_usrreqs =		&nousrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_ROUTING,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		route6_input,
 	.pr_usrreqs =		&nousrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_FRAGMENT,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		frag6_input,
 	.pr_usrreqs =		&nousrreqs
 },
 #ifdef INET
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_IPV4,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		encap6_input,
-	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 #endif /* INET */
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_IPV6,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		encap6_input,
-	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_ETHERIP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		encap6_input,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_GRE,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		encap6_input,
-	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		encap6_input,
-	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 /* Spacer n-times for loadable protocols. */
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 IP6PROTOSPACER,
 /* raw wildcard */
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		rip6_input,
-	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 };
 
 struct domain inet6domain = {
 	.dom_family =		AF_INET6,
 	.dom_name =		"internet6",
 	.dom_protosw =		(struct protosw *)inet6sw,
 	.dom_protoswNPROTOSW =	(struct protosw *)&inet6sw[nitems(inet6sw)],
 	.dom_rtattach =		in6_inithead,
 #ifdef VIMAGE
 	.dom_rtdetach =		in6_detachhead,
 #endif
 	.dom_ifattach =		in6_domifattach,
 	.dom_ifdetach =		in6_domifdetach,
 	.dom_ifmtu    =		in6_domifmtu
 };
 
 DOMAIN_SET(inet6);
 
 /*
  * Internet configuration info
  */
 #ifndef	IPV6FORWARDING
 #ifdef GATEWAY6
 #define	IPV6FORWARDING	1	/* forward IP6 packets not for us */
 #else
 #define	IPV6FORWARDING	0	/* don't forward IP6 packets not for us */
 #endif /* GATEWAY6 */
 #endif /* !IPV6FORWARDING */
 
 #ifndef	IPV6_SENDREDIRECTS
 #define	IPV6_SENDREDIRECTS	1
 #endif
 
 VNET_DEFINE(int, ip6_forwarding) = IPV6FORWARDING;	/* act as router? */
 VNET_DEFINE(int, ip6_sendredirects) = IPV6_SENDREDIRECTS;
 VNET_DEFINE(int, ip6_defhlim) = IPV6_DEFHLIM;
 VNET_DEFINE(int, ip6_defmcasthlim) = IPV6_DEFAULT_MULTICAST_HOPS;
 VNET_DEFINE(int, ip6_accept_rtadv) = 0;
 VNET_DEFINE(int, ip6_no_radr) = 0;
 VNET_DEFINE(int, ip6_norbit_raif) = 0;
 VNET_DEFINE(int, ip6_rfc6204w3) = 0;
 VNET_DEFINE(int, ip6_log_interval) = 5;
 VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we
 					 * process? */
 VNET_DEFINE(int, ip6_dad_count) = 1;	/* DupAddrDetectionTransmits */
 VNET_DEFINE(int, ip6_auto_flowlabel) = 1;
 VNET_DEFINE(int, ip6_use_deprecated) = 1;/* allow deprecated addr
 					 * (RFC2462 5.5.4) */
 VNET_DEFINE(int, ip6_rr_prune) = 5;	/* router renumbering prefix
 					 * walk list every 5 sec. */
 VNET_DEFINE(int, ip6_mcast_pmtu) = 0;	/* enable pMTU discovery for multicast? */
 VNET_DEFINE(int, ip6_v6only) = 1;
 
 VNET_DEFINE(time_t, ip6_log_time) = (time_t)0L;
 #ifdef IPSTEALTH
 VNET_DEFINE(int, ip6stealth) = 0;
 #endif
 VNET_DEFINE(int, nd6_onlink_ns_rfc4861) = 0;/* allow 'on-link' nd6 NS
 					     * (RFC 4861) */
 
 /* icmp6 */
 /*
  * BSDI4 defines these variables in in_proto.c...
  * XXX: what if we don't define INET? Should we define pmtu6_expire
  * or so? (jinmei@kame.net 19990310)
  */
 VNET_DEFINE(int, pmtu_expire) = 60*10;
 VNET_DEFINE(int, pmtu_probe) = 60*2;
 
 /* ICMPV6 parameters */
 VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */
 VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60;	/* 10 minutes */
 VNET_DEFINE(int, icmp6errppslim) = 100;		/* 100pps */
 /* control how to respond to NI queries */
 VNET_DEFINE(int, icmp6_nodeinfo) =
     (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
 VNET_DEFINE(int, icmp6_nodeinfo_oldmcprefix) = 1;
 
 /*
  * sysctl related items.
  */
 SYSCTL_NODE(_net, PF_INET6, inet6, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Internet6 Family");
 
 /* net.inet6 */
 SYSCTL_NODE(_net_inet6,	IPPROTO_IPV6, ip6, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IP6");
 SYSCTL_NODE(_net_inet6,	IPPROTO_ICMPV6, icmp6, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "ICMP6");
 SYSCTL_NODE(_net_inet6,	IPPROTO_UDP, udp6, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "UDP6");
 SYSCTL_NODE(_net_inet6,	IPPROTO_TCP, tcp6, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "TCP6");
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 SYSCTL_NODE(_net_inet6,	IPPROTO_SCTP, sctp6, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SCTP6");
 #endif
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 SYSCTL_NODE(_net_inet6,	IPPROTO_ESP, ipsec6, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IPSEC6");
 #endif /* IPSEC */
 
 /* net.inet6.ip6 */
 static int
 sysctl_ip6_temppltime(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = V_ip6_temp_preferred_lifetime;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || !req->newptr)
 		return (error);
 	if (val < V_ip6_desync_factor + V_ip6_temp_regen_advance)
 		return (EINVAL);
 	V_ip6_temp_preferred_lifetime = val;
 	return (0);
 }
 
 static int
 sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = V_ip6_temp_valid_lifetime;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || !req->newptr)
 		return (error);
 	if (val < V_ip6_temp_preferred_lifetime)
 		return (EINVAL);
 	V_ip6_temp_valid_lifetime = val;
 	return (0);
 }
 
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0,
 	"Enable forwarding of IPv6 packets between interfaces");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_sendredirects), 0,
 	"Send ICMPv6 redirects for unforwardable IPv6 packets");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defhlim), 0,
 	"Default hop limit to use for outgoing IPv6 packets");
 SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
 	ip6stat,
 	"IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0,
 	"Default value of per-interface flag for accepting ICMPv6 RA messages");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0,
 	"Default value of per-interface flag to control whether routers "
 	"sending ICMPv6 RA messages on that interface are added into the "
 	"default router list");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_norbit_raif), 0,
 	"Always set clear the R flag in ICMPv6 NA messages when accepting RA "
 	"on the interface");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0,
 	"Accept the default router list from ICMPv6 RA messages even "
 	"when packet forwarding is enabled");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0,
 	"Frequency in seconds at which to log IPv6 forwarding errors");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0,
 	"Default maximum number of IPv6 extension headers permitted on "
 	"incoming IPv6 packets, 0 for no artificial limit");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_dad_count), 0,
 	"Number of ICMPv6 NS messages sent during duplicate address detection");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_flowlabel), 0,
 	"Provide an IPv6 flowlabel in outbound packets");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0,
 	"Default hop limit for IPv6 multicast packets originating from this "
 	"node");
 SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION, kame_version,
 	CTLFLAG_RD, __KAME_VERSION, 0,
 	"KAME version string");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_deprecated), 0,
 	"Allow the use of addresses whose preferred lifetimes have expired");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rr_prune), 0,
 	""); /* XXX unused */
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR, use_tempaddr,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_tempaddr), 0,
 	"Create RFC3041 temporary addresses for autoconfigured addresses");
 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
 	NULL, 0, sysctl_ip6_temppltime, "I",
 	"Maximum preferred lifetime for temporary addresses");
 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
 	NULL, 0, sysctl_ip6_tempvltime, "I",
 	"Maximum valid lifetime for temporary addresses");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_v6only), 0,
 	"Restrict AF_INET6 sockets to IPv6 addresses only");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0,
 	"Default value of per-interface flag for automatically adding an IPv6 "
 	"link-local address to interfaces when attached");
 SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats,
 	struct rip6stat, rip6stat,
 	"Raw IP6 statistics (struct rip6stat, netinet6/raw_ip6.h)");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0,
 	"Prefer RFC3041 temporary addresses in source address selection");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,
 	"Use the default scope zone when none is specified");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0,
 	"Enable path MTU discovery for multicast packets");
 #ifdef IPSTEALTH
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ip6stealth), 0,
 	"Forward IPv6 packets without decrementing their TTL");
 #endif
 
 /* net.inet6.icmp6 */
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0,
 	"Accept ICMPv6 redirect messages");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0,
 	"Delay in seconds before expiring redirect route");
 SYSCTL_VNET_PCPUSTAT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats,
 	struct icmp6stat, icmp6stat,
 	"ICMPv6 statistics (struct icmp6stat, netinet/icmp6.h)");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_prune), 0,
 	"Frequency in seconds of checks for expired prefixes and routers");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_delay), 0,
 	"Delay in seconds before probing for reachability");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0,
 	"Number of ICMPv6 NS messages sent during reachability detection");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0,
 	"Number of ICMPv6 NS messages sent during address resolution");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0,
 	"Create a loopback route when configuring an IPv6 address");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_nodeinfo), 0,
 	"Mask of enabled RFC4620 node information query types");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX,
 	nodeinfo_oldmcprefix, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0,
 	"Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup "
 	"for compatibility with KAME implementation");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0,
 	"Maximum number of ICMPv6 error messages per second");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0,
 	""); /* XXX unused */
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_debug), 0,
 	"Log NDP debug messages");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861,
 	nd6_onlink_ns_rfc4861, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(nd6_onlink_ns_rfc4861), 0,
 	"Accept 'on-link' ICMPv6 NS messages in compliance with RFC 4861");
 #ifdef EXPERIMENTAL
 SYSCTL_INT(_net_inet6_icmp6, OID_AUTO,
 	nd6_ignore_ipv6_only_ra, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(nd6_ignore_ipv6_only_ra), 0,
 	"Ignore the 'IPv6-Only flag' in RA messages in compliance with "
 	"draft-ietf-6man-ipv6only-flag");
 #endif
diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c
index 180bf4440455..47d68b3f00a1 100644
--- a/sys/netpfil/pf/if_pfsync.c
+++ b/sys/netpfil/pf/if_pfsync.c
@@ -1,2565 +1,2564 @@
 /*-
  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND ISC)
  *
  * Copyright (c) 2002 Michael Shalayeff
  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 /*
  * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
  *
  * Revisions picked from OpenBSD after revision 1.110 import:
  * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
  * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
  * 1.120, 1.175 - use monotonic time_uptime
  * 1.122 - reduce number of updates for non-TCP sessions
  * 1.125, 1.127 - rewrite merge or stale processing
  * 1.128 - cleanups
  * 1.146 - bzero() mbuf before sparsely filling it with data
  * 1.170 - SIOCSIFMTU checks
  * 1.126, 1.142 - deferred packets processing
  * 1.173 - correct expire time processing
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_pf.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/vnet.h>
 #include <net/pfvar.h>
 #include <net/if_pfsync.h>
 
 #include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 
 #define PFSYNC_MINPKT ( \
 	sizeof(struct ip) + \
 	sizeof(struct pfsync_header) + \
 	sizeof(struct pfsync_subheader) )
 
 struct pfsync_bucket;
 
 struct pfsync_pkt {
 	struct ip *ip;
 	struct in_addr src;
 	u_int8_t flags;
 };
 
 static int	pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *,
 		    struct pfsync_state_peer *);
 static int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
 
 static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
 	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
 	pfsync_in_ins,			/* PFSYNC_ACT_INS */
 	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
 	pfsync_in_upd,			/* PFSYNC_ACT_UPD */
 	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
 	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
 	pfsync_in_del,			/* PFSYNC_ACT_DEL */
 	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
 	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
 	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
 	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
 	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
 	pfsync_in_eof			/* PFSYNC_ACT_EOF */
 };
 
 struct pfsync_q {
 	void		(*write)(struct pf_kstate *, void *);
 	size_t		len;
 	u_int8_t	action;
 };
 
 /* we have one of these for every PFSYNC_S_ */
 static void	pfsync_out_state(struct pf_kstate *, void *);
 static void	pfsync_out_iack(struct pf_kstate *, void *);
 static void	pfsync_out_upd_c(struct pf_kstate *, void *);
 static void	pfsync_out_del(struct pf_kstate *, void *);
 
 static struct pfsync_q pfsync_qs[] = {
 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
 };
 
 static void	pfsync_q_ins(struct pf_kstate *, int, bool);
 static void	pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *);
 
 static void	pfsync_update_state(struct pf_kstate *);
 
 struct pfsync_upd_req_item {
 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
 	struct pfsync_upd_req			ur_msg;
 };
 
 struct pfsync_deferral {
 	struct pfsync_softc		*pd_sc;
 	TAILQ_ENTRY(pfsync_deferral)	pd_entry;
 	u_int				pd_refs;
 	struct callout			pd_tmo;
 
 	struct pf_kstate		*pd_st;
 	struct mbuf			*pd_m;
 };
 
 struct pfsync_sofct;
 
 struct pfsync_bucket
 {
 	int			b_id;
 	struct pfsync_softc	*b_sc;
 	struct mtx		b_mtx;
 	struct callout		b_tmo;
 	int			b_flags;
 #define	PFSYNCF_BUCKET_PUSH	0x00000001
 
 	size_t			b_len;
 	TAILQ_HEAD(, pf_kstate)			b_qs[PFSYNC_S_COUNT];
 	TAILQ_HEAD(, pfsync_upd_req_item)	b_upd_req_list;
 	TAILQ_HEAD(, pfsync_deferral)		b_deferrals;
 	u_int			b_deferred;
 	void			*b_plus;
 	size_t			b_pluslen;
 
 	struct  ifaltq b_snd;
 };
 
 struct pfsync_softc {
 	/* Configuration */
 	struct ifnet		*sc_ifp;
 	struct ifnet		*sc_sync_if;
 	struct ip_moptions	sc_imo;
 	struct in_addr		sc_sync_peer;
 	uint32_t		sc_flags;
 	uint8_t			sc_maxupdates;
 	struct ip		sc_template;
 	struct mtx		sc_mtx;
 
 	/* Queued data */
 	struct pfsync_bucket	*sc_buckets;
 
 	/* Bulk update info */
 	struct mtx		sc_bulk_mtx;
 	uint32_t		sc_ureq_sent;
 	int			sc_bulk_tries;
 	uint32_t		sc_ureq_received;
 	int			sc_bulk_hashid;
 	uint64_t		sc_bulk_stateid;
 	uint32_t		sc_bulk_creatorid;
 	struct callout		sc_bulk_tmo;
 	struct callout		sc_bulkfail_tmo;
 };
 
 #define	PFSYNC_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
 #define	PFSYNC_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
 #define	PFSYNC_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
 
 #define PFSYNC_BUCKET_LOCK(b)		mtx_lock(&(b)->b_mtx)
 #define PFSYNC_BUCKET_UNLOCK(b)		mtx_unlock(&(b)->b_mtx)
 #define PFSYNC_BUCKET_LOCK_ASSERT(b)	mtx_assert(&(b)->b_mtx, MA_OWNED)
 
 #define	PFSYNC_BLOCK(sc)	mtx_lock(&(sc)->sc_bulk_mtx)
 #define	PFSYNC_BUNLOCK(sc)	mtx_unlock(&(sc)->sc_bulk_mtx)
 #define	PFSYNC_BLOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
 
 static const char pfsyncname[] = "pfsync";
 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
 VNET_DEFINE_STATIC(struct pfsync_softc	*, pfsyncif) = NULL;
 #define	V_pfsyncif		VNET(pfsyncif)
 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL;
 #define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie);
 #define	V_pfsync_swi_ie		VNET(pfsync_swi_ie)
 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats);
 #define	V_pfsyncstats		VNET(pfsyncstats)
 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW;
 #define	V_pfsync_carp_adj	VNET(pfsync_carp_adj)
 
 static void	pfsync_timeout(void *);
 static void	pfsync_push(struct pfsync_bucket *);
 static void	pfsync_push_all(struct pfsync_softc *);
 static void	pfsyncintr(void *);
 static int	pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
 		    struct in_mfilter *imf);
 static void	pfsync_multicast_cleanup(struct pfsync_softc *);
 static void	pfsync_pointers_init(void);
 static void	pfsync_pointers_uninit(void);
 static int	pfsync_init(void);
 static void	pfsync_uninit(void);
 
 static unsigned long pfsync_buckets;
 
 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "PFSYNC");
 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(pfsyncstats), pfsyncstats,
     "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN,
     &pfsync_buckets, 0, "Number of pfsync hash buckets");
 
 static int	pfsync_clone_create(struct if_clone *, int, caddr_t);
 static void	pfsync_clone_destroy(struct ifnet *);
 static int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
 		    struct pf_state_peer *);
 static int	pfsyncoutput(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 static int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
 
 static int	pfsync_defer(struct pf_kstate *, struct mbuf *);
 static void	pfsync_undefer(struct pfsync_deferral *, int);
 static void	pfsync_undefer_state(struct pf_kstate *, int);
 static void	pfsync_defer_tmo(void *);
 
 static void	pfsync_request_update(u_int32_t, u_int64_t);
 static bool	pfsync_update_state_req(struct pf_kstate *);
 
 static void	pfsync_drop(struct pfsync_softc *);
 static void	pfsync_sendout(int, int);
 static void	pfsync_send_plus(void *, size_t);
 
 static void	pfsync_bulk_start(void);
 static void	pfsync_bulk_status(u_int8_t);
 static void	pfsync_bulk_update(void *);
 static void	pfsync_bulk_fail(void *);
 
 static void	pfsync_detach_ifnet(struct ifnet *);
 #ifdef IPSEC
 static void	pfsync_update_net_tdb(struct pfsync_tdb *);
 #endif
 static struct pfsync_bucket	*pfsync_get_bucket(struct pfsync_softc *,
 		    struct pf_kstate *);
 
 #define PFSYNC_MAX_BULKTRIES	12
 #define PFSYNC_DEFER_TIMEOUT	((20 * hz) / 1000)
 
 VNET_DEFINE(struct if_clone *, pfsync_cloner);
 #define	V_pfsync_cloner	VNET(pfsync_cloner)
 
 static int
 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
 {
 	struct pfsync_softc *sc;
 	struct ifnet *ifp;
 	struct pfsync_bucket *b;
 	int c, q;
 
 	if (unit != 0)
 		return (EINVAL);
 
 	if (! pfsync_buckets)
 		pfsync_buckets = mp_ncpus * 2;
 
 	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
 	sc->sc_flags |= PFSYNCF_OK;
 	sc->sc_maxupdates = 128;
 
 	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
 	if (ifp == NULL) {
 		free(sc, M_PFSYNC);
 		return (ENOSPC);
 	}
 	if_initname(ifp, pfsyncname, unit);
 	ifp->if_softc = sc;
 	ifp->if_ioctl = pfsyncioctl;
 	ifp->if_output = pfsyncoutput;
 	ifp->if_type = IFT_PFSYNC;
 	ifp->if_hdrlen = sizeof(struct pfsync_header);
 	ifp->if_mtu = ETHERMTU;
 	mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
 	mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
 	callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
 	callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
 
 	if_attach(ifp);
 
 	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
 
 	sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets),
 	    M_PFSYNC, M_ZERO | M_WAITOK);
 	for (c = 0; c < pfsync_buckets; c++) {
 		b = &sc->sc_buckets[c];
 		mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF);
 
 		b->b_id = c;
 		b->b_sc = sc;
 		b->b_len = PFSYNC_MINPKT;
 
 		for (q = 0; q < PFSYNC_S_COUNT; q++)
 			TAILQ_INIT(&b->b_qs[q]);
 
 		TAILQ_INIT(&b->b_upd_req_list);
 		TAILQ_INIT(&b->b_deferrals);
 
 		callout_init(&b->b_tmo, 1);
 
 		b->b_snd.ifq_maxlen = ifqmaxlen;
 	}
 
 	V_pfsyncif = sc;
 
 	return (0);
 }
 
 static void
 pfsync_clone_destroy(struct ifnet *ifp)
 {
 	struct pfsync_softc *sc = ifp->if_softc;
 	struct pfsync_bucket *b;
 	int c;
 
 	for (c = 0; c < pfsync_buckets; c++) {
 		b = &sc->sc_buckets[c];
 		/*
 		 * At this stage, everything should have already been
 		 * cleared by pfsync_uninit(), and we have only to
 		 * drain callouts.
 		 */
 		while (b->b_deferred > 0) {
 			struct pfsync_deferral *pd =
 			    TAILQ_FIRST(&b->b_deferrals);
 
 			TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
 			b->b_deferred--;
 			if (callout_stop(&pd->pd_tmo) > 0) {
 				pf_release_state(pd->pd_st);
 				m_freem(pd->pd_m);
 				free(pd, M_PFSYNC);
 			} else {
 				pd->pd_refs++;
 				callout_drain(&pd->pd_tmo);
 				free(pd, M_PFSYNC);
 			}
 		}
 
 		callout_drain(&b->b_tmo);
 	}
 
 	callout_drain(&sc->sc_bulkfail_tmo);
 	callout_drain(&sc->sc_bulk_tmo);
 
 	if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 		(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
 	bpfdetach(ifp);
 	if_detach(ifp);
 
 	pfsync_drop(sc);
 
 	if_free(ifp);
 	pfsync_multicast_cleanup(sc);
 	mtx_destroy(&sc->sc_mtx);
 	mtx_destroy(&sc->sc_bulk_mtx);
 
 	free(sc->sc_buckets, M_PFSYNC);
 	free(sc, M_PFSYNC);
 
 	V_pfsyncif = NULL;
 }
 
 static int
 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
     struct pf_state_peer *d)
 {
 	if (s->scrub.scrub_flag && d->scrub == NULL) {
 		d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
 		if (d->scrub == NULL)
 			return (ENOMEM);
 	}
 
 	return (0);
 }
 
 static int
 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 #ifndef	__NO_STRICT_ALIGNMENT
 	struct pfsync_state_key key[2];
 #endif
 	struct pfsync_state_key *kw, *ks;
 	struct pf_kstate	*st = NULL;
 	struct pf_state_key *skw = NULL, *sks = NULL;
 	struct pf_krule *r = NULL;
 	struct pfi_kkif	*kif;
 	int error;
 
 	PF_RULES_RASSERT();
 
 	if (sp->creatorid == 0) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("%s: invalid creator id: %08x\n", __func__,
 			    ntohl(sp->creatorid));
 		return (EINVAL);
 	}
 
 	if ((kif = pfi_kkif_find(sp->ifname)) == NULL) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("%s: unknown interface: %s\n", __func__,
 			    sp->ifname);
 		if (flags & PFSYNC_SI_IOCTL)
 			return (EINVAL);
 		return (0);	/* skip this state */
 	}
 
 	/*
 	 * If the ruleset checksums match or the state is coming from the ioctl,
 	 * it's safe to associate the state with the rule of that number.
 	 */
 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
 	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
 		r = pf_main_ruleset.rules[
 		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
 	else
 		r = &V_pf_default_rule;
 
 	if ((r->max_states &&
 	    counter_u64_fetch(r->states_cur) >= r->max_states))
 		goto cleanup;
 
 	/*
 	 * XXXGL: consider M_WAITOK in ioctl path after.
 	 */
 	st = pf_alloc_state(M_NOWAIT);
 	if (__predict_false(st == NULL))
 		goto cleanup;
 
 	if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
 		goto cleanup;
 
 #ifndef	__NO_STRICT_ALIGNMENT
 	bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2);
 	kw = &key[PF_SK_WIRE];
 	ks = &key[PF_SK_STACK];
 #else
 	kw = &sp->key[PF_SK_WIRE];
 	ks = &sp->key[PF_SK_STACK];
 #endif
 
 	if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) ||
 	    PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) ||
 	    kw->port[0] != ks->port[0] ||
 	    kw->port[1] != ks->port[1]) {
 		sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
 		if (sks == NULL)
 			goto cleanup;
 	} else
 		sks = skw;
 
 	/* allocate memory for scrub info */
 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
 		goto cleanup;
 
 	/* Copy to state key(s). */
 	skw->addr[0] = kw->addr[0];
 	skw->addr[1] = kw->addr[1];
 	skw->port[0] = kw->port[0];
 	skw->port[1] = kw->port[1];
 	skw->proto = sp->proto;
 	skw->af = sp->af;
 	if (sks != skw) {
 		sks->addr[0] = ks->addr[0];
 		sks->addr[1] = ks->addr[1];
 		sks->port[0] = ks->port[0];
 		sks->port[1] = ks->port[1];
 		sks->proto = sp->proto;
 		sks->af = sp->af;
 	}
 
 	/* copy to state */
 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
 	st->creation = time_uptime - ntohl(sp->creation);
 	st->expire = time_uptime;
 	if (sp->expire) {
 		uint32_t timeout;
 
 		timeout = r->timeout[sp->timeout];
 		if (!timeout)
 			timeout = V_pf_default_rule.timeout[sp->timeout];
 
 		/* sp->expire may have been adaptively scaled by export. */
 		st->expire -= timeout - ntohl(sp->expire);
 	}
 
 	st->direction = sp->direction;
 	st->log = sp->log;
 	st->timeout = sp->timeout;
 	st->state_flags = sp->state_flags;
 
 	st->id = sp->id;
 	st->creatorid = sp->creatorid;
 	pf_state_peer_ntoh(&sp->src, &st->src);
 	pf_state_peer_ntoh(&sp->dst, &st->dst);
 
 	st->rule.ptr = r;
 	st->nat_rule.ptr = NULL;
 	st->anchor.ptr = NULL;
 	st->rt_kif = NULL;
 
 	st->pfsync_time = time_uptime;
 	st->sync_state = PFSYNC_S_NONE;
 
 	if (!(flags & PFSYNC_SI_IOCTL))
 		st->state_flags |= PFSTATE_NOSYNC;
 
 	if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0)
 		goto cleanup_state;
 
 	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
 	counter_u64_add(r->states_cur, 1);
 	counter_u64_add(r->states_tot, 1);
 
 	if (!(flags & PFSYNC_SI_IOCTL)) {
 		st->state_flags &= ~PFSTATE_NOSYNC;
 		if (st->state_flags & PFSTATE_ACK) {
 			pfsync_q_ins(st, PFSYNC_S_IACK, true);
 			pfsync_push_all(sc);
 		}
 	}
 	st->state_flags &= ~PFSTATE_ACK;
 	PF_STATE_UNLOCK(st);
 
 	return (0);
 
 cleanup:
 	error = ENOMEM;
 	if (skw == sks)
 		sks = NULL;
 	if (skw != NULL)
 		uma_zfree(V_pf_state_key_z, skw);
 	if (sks != NULL)
 		uma_zfree(V_pf_state_key_z, sks);
 
 cleanup_state:	/* pf_state_insert() frees the state keys. */
 	if (st) {
 		pf_free_state(st);
 	}
 	return (error);
 }
 
 static int
 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_pkt pkt;
 	struct mbuf *m = *mp;
 	struct ip *ip = mtod(m, struct ip *);
 	struct pfsync_header *ph;
 	struct pfsync_subheader subh;
 
 	int offset, len;
 	int rv;
 	uint16_t count;
 
 	PF_RULES_RLOCK_TRACKER;
 
 	*mp = NULL;
 	V_pfsyncstats.pfsyncs_ipackets++;
 
 	/* Verify that we have a sync interface configured. */
 	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
 	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		goto done;
 
 	/* verify that the packet came in on the right interface */
 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
 		V_pfsyncstats.pfsyncs_badif++;
 		goto done;
 	}
 
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	/* verify that the IP TTL is 255. */
 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
 		V_pfsyncstats.pfsyncs_badttl++;
 		goto done;
 	}
 
 	offset = ip->ip_hl << 2;
 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
 		V_pfsyncstats.pfsyncs_hdrops++;
 		goto done;
 	}
 
 	if (offset + sizeof(*ph) > m->m_len) {
 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
 			V_pfsyncstats.pfsyncs_hdrops++;
 			return (IPPROTO_DONE);
 		}
 		ip = mtod(m, struct ip *);
 	}
 	ph = (struct pfsync_header *)((char *)ip + offset);
 
 	/* verify the version */
 	if (ph->version != PFSYNC_VERSION) {
 		V_pfsyncstats.pfsyncs_badver++;
 		goto done;
 	}
 
 	len = ntohs(ph->len) + offset;
 	if (m->m_pkthdr.len < len) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		goto done;
 	}
 
 	/* Cheaper to grab this now than having to mess with mbufs later */
 	pkt.ip = ip;
 	pkt.src = ip->ip_src;
 	pkt.flags = 0;
 
 	/*
 	 * Trusting pf_chksum during packet processing, as well as seeking
 	 * in interface name tree, require holding PF_RULES_RLOCK().
 	 */
 	PF_RULES_RLOCK();
 	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
 		pkt.flags |= PFSYNC_SI_CKSUM;
 
 	offset += sizeof(*ph);
 	while (offset <= len - sizeof(subh)) {
 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
 		offset += sizeof(subh);
 
 		if (subh.action >= PFSYNC_ACT_MAX) {
 			V_pfsyncstats.pfsyncs_badact++;
 			PF_RULES_RUNLOCK();
 			goto done;
 		}
 
 		count = ntohs(subh.count);
 		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
 		rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count);
 		if (rv == -1) {
 			PF_RULES_RUNLOCK();
 			return (IPPROTO_DONE);
 		}
 
 		offset += rv;
 	}
 	PF_RULES_RUNLOCK();
 
 done:
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 static int
 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_clr *clr;
 	struct mbuf *mp;
 	int len = sizeof(*clr) * count;
 	int i, offp;
 	u_int32_t creatorid;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	clr = (struct pfsync_clr *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		creatorid = clr[i].creatorid;
 
 		if (clr[i].ifname[0] != '\0' &&
 		    pfi_kkif_find(clr[i].ifname) == NULL)
 			continue;
 
 		for (int i = 0; i <= pf_hashmask; i++) {
 			struct pf_idhash *ih = &V_pf_idhash[i];
 			struct pf_kstate *s;
 relock:
 			PF_HASHROW_LOCK(ih);
 			LIST_FOREACH(s, &ih->states, entry) {
 				if (s->creatorid == creatorid) {
 					s->state_flags |= PFSTATE_NOSYNC;
 					pf_unlink_state(s);
 					goto relock;
 				}
 			}
 			PF_HASHROW_UNLOCK(ih);
 		}
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct mbuf *mp;
 	struct pfsync_state *sa, *sp;
 	int len = sizeof(*sp) * count;
 	int i, offp;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		/* Check for invalid values. */
 		if (sp->timeout >= PFTM_MAX ||
 		    sp->src.state > PF_TCPS_PROXY_DST ||
 		    sp->dst.state > PF_TCPS_PROXY_DST ||
 		    sp->direction > PF_OUT ||
 		    (sp->af != AF_INET && sp->af != AF_INET6)) {
 			if (V_pf_status.debug >= PF_DEBUG_MISC)
 				printf("%s: invalid value\n", __func__);
 			V_pfsyncstats.pfsyncs_badval++;
 			continue;
 		}
 
 		if (pfsync_state_import(sp, pkt->flags) == ENOMEM)
 			/* Drop out, but process the rest of the actions. */
 			break;
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_ins_ack *ia, *iaa;
 	struct pf_kstate *st;
 
 	struct mbuf *mp;
 	int len = count * sizeof(*ia);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		ia = &iaa[i];
 
 		st = pf_find_state_byid(ia->id, ia->creatorid);
 		if (st == NULL)
 			continue;
 
 		if (st->state_flags & PFSTATE_ACK) {
 			pfsync_undefer_state(st, 0);
 		}
 		PF_STATE_UNLOCK(st);
 	}
 	/*
 	 * XXX this is not yet implemented, but we know the size of the
 	 * message so we can skip it.
 	 */
 
 	return (count * sizeof(struct pfsync_ins_ack));
 }
 
 static int
 pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src,
     struct pfsync_state_peer *dst)
 {
 	int sync = 0;
 
 	PF_STATE_LOCK_ASSERT(st);
 
 	/*
 	 * The state should never go backwards except
 	 * for syn-proxy states.  Neither should the
 	 * sequence window slide backwards.
 	 */
 	if ((st->src.state > src->state &&
 	    (st->src.state < PF_TCPS_PROXY_SRC ||
 	    src->state >= PF_TCPS_PROXY_SRC)) ||
 
 	    (st->src.state == src->state &&
 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
 		sync++;
 	else
 		pf_state_peer_ntoh(src, &st->src);
 
 	if ((st->dst.state > dst->state) ||
 
 	    (st->dst.state >= TCPS_SYN_SENT &&
 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
 		sync++;
 	else
 		pf_state_peer_ntoh(dst, &st->dst);
 
 	return (sync);
 }
 
 static int
 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_state *sa, *sp;
 	struct pf_kstate *st;
 	int sync;
 
 	struct mbuf *mp;
 	int len = count * sizeof(*sp);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		/* check for invalid values */
 		if (sp->timeout >= PFTM_MAX ||
 		    sp->src.state > PF_TCPS_PROXY_DST ||
 		    sp->dst.state > PF_TCPS_PROXY_DST) {
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				printf("pfsync_input: PFSYNC_ACT_UPD: "
 				    "invalid value\n");
 			}
 			V_pfsyncstats.pfsyncs_badval++;
 			continue;
 		}
 
 		st = pf_find_state_byid(sp->id, sp->creatorid);
 		if (st == NULL) {
 			/* insert the update */
 			if (pfsync_state_import(sp, pkt->flags))
 				V_pfsyncstats.pfsyncs_badstate++;
 			continue;
 		}
 
 		if (st->state_flags & PFSTATE_ACK) {
 			pfsync_undefer_state(st, 1);
 		}
 
 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
 		else {
 			sync = 0;
 
 			/*
 			 * Non-TCP protocol state machine always go
 			 * forwards
 			 */
 			if (st->src.state > sp->src.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&sp->src, &st->src);
 			if (st->dst.state > sp->dst.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&sp->dst, &st->dst);
 		}
 		if (sync < 2) {
 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
 			pf_state_peer_ntoh(&sp->dst, &st->dst);
 			st->expire = time_uptime;
 			st->timeout = sp->timeout;
 		}
 		st->pfsync_time = time_uptime;
 
 		if (sync) {
 			V_pfsyncstats.pfsyncs_stale++;
 
 			pfsync_update_state(st);
 			PF_STATE_UNLOCK(st);
 			pfsync_push_all(sc);
 			continue;
 		}
 		PF_STATE_UNLOCK(st);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_upd_c *ua, *up;
 	struct pf_kstate *st;
 	int len = count * sizeof(*up);
 	int sync;
 	struct mbuf *mp;
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		up = &ua[i];
 
 		/* check for invalid values */
 		if (up->timeout >= PFTM_MAX ||
 		    up->src.state > PF_TCPS_PROXY_DST ||
 		    up->dst.state > PF_TCPS_PROXY_DST) {
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				printf("pfsync_input: "
 				    "PFSYNC_ACT_UPD_C: "
 				    "invalid value\n");
 			}
 			V_pfsyncstats.pfsyncs_badval++;
 			continue;
 		}
 
 		st = pf_find_state_byid(up->id, up->creatorid);
 		if (st == NULL) {
 			/* We don't have this state. Ask for it. */
 			PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
 			pfsync_request_update(up->creatorid, up->id);
 			PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
 			continue;
 		}
 
 		if (st->state_flags & PFSTATE_ACK) {
 			pfsync_undefer_state(st, 1);
 		}
 
 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
 		else {
 			sync = 0;
 
 			/*
 			 * Non-TCP protocol state machine always go
 			 * forwards
 			 */
 			if (st->src.state > up->src.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&up->src, &st->src);
 			if (st->dst.state > up->dst.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&up->dst, &st->dst);
 		}
 		if (sync < 2) {
 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
 			pf_state_peer_ntoh(&up->dst, &st->dst);
 			st->expire = time_uptime;
 			st->timeout = up->timeout;
 		}
 		st->pfsync_time = time_uptime;
 
 		if (sync) {
 			V_pfsyncstats.pfsyncs_stale++;
 
 			pfsync_update_state(st);
 			PF_STATE_UNLOCK(st);
 			pfsync_push_all(sc);
 			continue;
 		}
 		PF_STATE_UNLOCK(st);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_upd_req *ur, *ura;
 	struct mbuf *mp;
 	int len = count * sizeof(*ur);
 	int i, offp;
 
 	struct pf_kstate *st;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		ur = &ura[i];
 
 		if (ur->id == 0 && ur->creatorid == 0)
 			pfsync_bulk_start();
 		else {
 			st = pf_find_state_byid(ur->id, ur->creatorid);
 			if (st == NULL) {
 				V_pfsyncstats.pfsyncs_badstate++;
 				continue;
 			}
 			if (st->state_flags & PFSTATE_NOSYNC) {
 				PF_STATE_UNLOCK(st);
 				continue;
 			}
 
 			pfsync_update_state_req(st);
 			PF_STATE_UNLOCK(st);
 		}
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct mbuf *mp;
 	struct pfsync_state *sa, *sp;
 	struct pf_kstate *st;
 	int len = count * sizeof(*sp);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		st = pf_find_state_byid(sp->id, sp->creatorid);
 		if (st == NULL) {
 			V_pfsyncstats.pfsyncs_badstate++;
 			continue;
 		}
 		st->state_flags |= PFSTATE_NOSYNC;
 		pf_unlink_state(st);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct mbuf *mp;
 	struct pfsync_del_c *sa, *sp;
 	struct pf_kstate *st;
 	int len = count * sizeof(*sp);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_del_c *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		st = pf_find_state_byid(sp->id, sp->creatorid);
 		if (st == NULL) {
 			V_pfsyncstats.pfsyncs_badstate++;
 			continue;
 		}
 
 		st->state_flags |= PFSTATE_NOSYNC;
 		pf_unlink_state(st);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_bus *bus;
 	struct mbuf *mp;
 	int len = count * sizeof(*bus);
 	int offp;
 
 	PFSYNC_BLOCK(sc);
 
 	/* If we're not waiting for a bulk update, who cares. */
 	if (sc->sc_ureq_sent == 0) {
 		PFSYNC_BUNLOCK(sc);
 		return (len);
 	}
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		PFSYNC_BUNLOCK(sc);
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	bus = (struct pfsync_bus *)(mp->m_data + offp);
 
 	switch (bus->status) {
 	case PFSYNC_BUS_START:
 		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
 		    V_pf_limits[PF_LIMIT_STATES].limit /
 		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
 		    sizeof(struct pfsync_state)),
 		    pfsync_bulk_fail, sc);
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("pfsync: received bulk update start\n");
 		break;
 
 	case PFSYNC_BUS_END:
 		if (time_uptime - ntohl(bus->endtime) >=
 		    sc->sc_ureq_sent) {
 			/* that's it, we're happy */
 			sc->sc_ureq_sent = 0;
 			sc->sc_bulk_tries = 0;
 			callout_stop(&sc->sc_bulkfail_tmo);
 			if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 				(*carp_demote_adj_p)(-V_pfsync_carp_adj,
 				    "pfsync bulk done");
 			sc->sc_flags |= PFSYNCF_OK;
 			if (V_pf_status.debug >= PF_DEBUG_MISC)
 				printf("pfsync: received valid "
 				    "bulk update end\n");
 		} else {
 			if (V_pf_status.debug >= PF_DEBUG_MISC)
 				printf("pfsync: received invalid "
 				    "bulk update end: bad timestamp\n");
 		}
 		break;
 	}
 	PFSYNC_BUNLOCK(sc);
 
 	return (len);
 }
 
 static int
 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	int len = count * sizeof(struct pfsync_tdb);
 
 #if defined(IPSEC)
 	struct pfsync_tdb *tp;
 	struct mbuf *mp;
 	int offp;
 	int i;
 	int s;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	tp = (struct pfsync_tdb *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++)
 		pfsync_update_net_tdb(&tp[i]);
 #endif
 
 	return (len);
 }
 
 #if defined(IPSEC)
 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
 static void
 pfsync_update_net_tdb(struct pfsync_tdb *pt)
 {
 	struct tdb		*tdb;
 	int			 s;
 
 	/* check for invalid values */
 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
 	    (pt->dst.sa.sa_family != AF_INET &&
 	    pt->dst.sa.sa_family != AF_INET6))
 		goto bad;
 
 	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
 	if (tdb) {
 		pt->rpl = ntohl(pt->rpl);
 		pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
 
 		/* Neither replay nor byte counter should ever decrease. */
 		if (pt->rpl < tdb->tdb_rpl ||
 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
 			goto bad;
 		}
 
 		tdb->tdb_rpl = pt->rpl;
 		tdb->tdb_cur_bytes = pt->cur_bytes;
 	}
 	return;
 
 bad:
 	if (V_pf_status.debug >= PF_DEBUG_MISC)
 		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
 		    "invalid value\n");
 	V_pfsyncstats.pfsyncs_badstate++;
 	return;
 }
 #endif
 
 static int
 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	/* check if we are at the right place in the packet */
 	if (offset != m->m_pkthdr.len)
 		V_pfsyncstats.pfsyncs_badlen++;
 
 	/* we're done. free and let the caller return */
 	m_freem(m);
 	return (-1);
 }
 
 static int
 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	V_pfsyncstats.pfsyncs_badact++;
 
 	m_freem(m);
 	return (-1);
 }
 
 static int
 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	struct route *rt)
 {
 	m_freem(m);
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct pfsync_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct pfsyncreq pfsyncr;
 	int error;
 	int c;
 
 	switch (cmd) {
 	case SIOCSIFFLAGS:
 		PFSYNC_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			ifp->if_drv_flags |= IFF_DRV_RUNNING;
 			PFSYNC_UNLOCK(sc);
 			pfsync_pointers_init();
 		} else {
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			PFSYNC_UNLOCK(sc);
 			pfsync_pointers_uninit();
 		}
 		break;
 	case SIOCSIFMTU:
 		if (!sc->sc_sync_if ||
 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
 			return (EINVAL);
 		if (ifr->ifr_mtu < ifp->if_mtu) {
 			for (c = 0; c < pfsync_buckets; c++) {
 				PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
 				if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT)
 					pfsync_sendout(1, c);
 				PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
 			}
 		}
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 	case SIOCGETPFSYNC:
 		bzero(&pfsyncr, sizeof(pfsyncr));
 		PFSYNC_LOCK(sc);
 		if (sc->sc_sync_if) {
 			strlcpy(pfsyncr.pfsyncr_syncdev,
 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
 		}
 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
 		pfsyncr.pfsyncr_defer = sc->sc_flags;
 		PFSYNC_UNLOCK(sc);
 		return (copyout(&pfsyncr, ifr_data_get_ptr(ifr),
 		    sizeof(pfsyncr)));
 
 	case SIOCSETPFSYNC:
 	    {
 		struct in_mfilter *imf = NULL;
 		struct ifnet *sifp;
 		struct ip *ip;
 
 		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
 			return (error);
 		if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr,
 		    sizeof(pfsyncr))))
 			return (error);
 
 		if (pfsyncr.pfsyncr_maxupdates > 255)
 			return (EINVAL);
 
 		if (pfsyncr.pfsyncr_syncdev[0] == 0)
 			sifp = NULL;
 		else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL)
 			return (EINVAL);
 
 		if (sifp != NULL && (
 		    pfsyncr.pfsyncr_syncpeer.s_addr == 0 ||
 		    pfsyncr.pfsyncr_syncpeer.s_addr ==
 		    htonl(INADDR_PFSYNC_GROUP)))
 			imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
 
 		PFSYNC_LOCK(sc);
 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
 			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
 		else
 			sc->sc_sync_peer.s_addr =
 			    pfsyncr.pfsyncr_syncpeer.s_addr;
 
 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
 		if (pfsyncr.pfsyncr_defer & PFSYNCF_DEFER) {
 			sc->sc_flags |= PFSYNCF_DEFER;
 			V_pfsync_defer_ptr = pfsync_defer;
 		} else {
 			sc->sc_flags &= ~PFSYNCF_DEFER;
 			V_pfsync_defer_ptr = NULL;
 		}
 
 		if (sifp == NULL) {
 			if (sc->sc_sync_if)
 				if_rele(sc->sc_sync_if);
 			sc->sc_sync_if = NULL;
 			pfsync_multicast_cleanup(sc);
 			PFSYNC_UNLOCK(sc);
 			break;
 		}
 
 		for (c = 0; c < pfsync_buckets; c++) {
 			PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
 			if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT &&
 			    (sifp->if_mtu < sc->sc_ifp->if_mtu ||
 			    (sc->sc_sync_if != NULL &&
 			    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
 			    sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
 				pfsync_sendout(1, c);
 			PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
 		}
 
 		pfsync_multicast_cleanup(sc);
 
 		if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
 			error = pfsync_multicast_setup(sc, sifp, imf);
 			if (error) {
 				if_rele(sifp);
 				ip_mfilter_free(imf);
 				PFSYNC_UNLOCK(sc);
 				return (error);
 			}
 		}
 		if (sc->sc_sync_if)
 			if_rele(sc->sc_sync_if);
 		sc->sc_sync_if = sifp;
 
 		ip = &sc->sc_template;
 		bzero(ip, sizeof(*ip));
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
 		ip->ip_tos = IPTOS_LOWDELAY;
 		/* len and id are set later. */
 		ip->ip_off = htons(IP_DF);
 		ip->ip_ttl = PFSYNC_DFLTTL;
 		ip->ip_p = IPPROTO_PFSYNC;
 		ip->ip_src.s_addr = INADDR_ANY;
 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
 
 		/* Request a full state table update. */
 		if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 			(*carp_demote_adj_p)(V_pfsync_carp_adj,
 			    "pfsync bulk start");
 		sc->sc_flags &= ~PFSYNCF_OK;
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("pfsync: requesting bulk update\n");
 		PFSYNC_UNLOCK(sc);
 		PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
 		pfsync_request_update(0, 0);
 		PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
 		PFSYNC_BLOCK(sc);
 		sc->sc_ureq_sent = time_uptime;
 		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
 		    sc);
 		PFSYNC_BUNLOCK(sc);
 
 		break;
 	    }
 	default:
 		return (ENOTTY);
 	}
 
 	return (0);
 }
 
 static void
 pfsync_out_state(struct pf_kstate *st, void *buf)
 {
 	struct pfsync_state *sp = buf;
 
 	pfsync_state_export(sp, st);
 }
 
 static void
 pfsync_out_iack(struct pf_kstate *st, void *buf)
 {
 	struct pfsync_ins_ack *iack = buf;
 
 	iack->id = st->id;
 	iack->creatorid = st->creatorid;
 }
 
 static void
 pfsync_out_upd_c(struct pf_kstate *st, void *buf)
 {
 	struct pfsync_upd_c *up = buf;
 
 	bzero(up, sizeof(*up));
 	up->id = st->id;
 	pf_state_peer_hton(&st->src, &up->src);
 	pf_state_peer_hton(&st->dst, &up->dst);
 	up->creatorid = st->creatorid;
 	up->timeout = st->timeout;
 }
 
 static void
 pfsync_out_del(struct pf_kstate *st, void *buf)
 {
 	struct pfsync_del_c *dp = buf;
 
 	dp->id = st->id;
 	dp->creatorid = st->creatorid;
 	st->state_flags |= PFSTATE_NOSYNC;
 }
 
 static void
 pfsync_drop(struct pfsync_softc *sc)
 {
 	struct pf_kstate *st, *next;
 	struct pfsync_upd_req_item *ur;
 	struct pfsync_bucket *b;
 	int c, q;
 
 	for (c = 0; c < pfsync_buckets; c++) {
 		b = &sc->sc_buckets[c];
 		for (q = 0; q < PFSYNC_S_COUNT; q++) {
 			if (TAILQ_EMPTY(&b->b_qs[q]))
 				continue;
 
 			TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) {
 				KASSERT(st->sync_state == q,
 					("%s: st->sync_state == q",
 						__func__));
 				st->sync_state = PFSYNC_S_NONE;
 				pf_release_state(st);
 			}
 			TAILQ_INIT(&b->b_qs[q]);
 		}
 
 		while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
 			TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
 			free(ur, M_PFSYNC);
 		}
 
 		b->b_len = PFSYNC_MINPKT;
 		b->b_plus = NULL;
 	}
 }
 
 static void
 pfsync_sendout(int schedswi, int c)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct mbuf *m;
 	struct ip *ip;
 	struct pfsync_header *ph;
 	struct pfsync_subheader *subh;
 	struct pf_kstate *st, *st_next;
 	struct pfsync_upd_req_item *ur;
 	struct pfsync_bucket *b = &sc->sc_buckets[c];
 	int offset;
 	int q, count = 0;
 
 	KASSERT(sc != NULL, ("%s: null sc", __func__));
 	KASSERT(b->b_len > PFSYNC_MINPKT,
 	    ("%s: sc_len %zu", __func__, b->b_len));
 	PFSYNC_BUCKET_LOCK_ASSERT(b);
 
 	if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
 		pfsync_drop(sc);
 		return;
 	}
 
 	m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 		V_pfsyncstats.pfsyncs_onomem++;
 		return;
 	}
 	m->m_data += max_linkhdr;
 	m->m_len = m->m_pkthdr.len = b->b_len;
 
 	/* build the ip header */
 	ip = (struct ip *)m->m_data;
 	bcopy(&sc->sc_template, ip, sizeof(*ip));
 	offset = sizeof(*ip);
 
 	ip->ip_len = htons(m->m_pkthdr.len);
 	ip_fillid(ip);
 
 	/* build the pfsync header */
 	ph = (struct pfsync_header *)(m->m_data + offset);
 	bzero(ph, sizeof(*ph));
 	offset += sizeof(*ph);
 
 	ph->version = PFSYNC_VERSION;
 	ph->len = htons(b->b_len - sizeof(*ip));
 	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
 
 	/* walk the queues */
 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
 		if (TAILQ_EMPTY(&b->b_qs[q]))
 			continue;
 
 		subh = (struct pfsync_subheader *)(m->m_data + offset);
 		offset += sizeof(*subh);
 
 		count = 0;
 		TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) {
 			KASSERT(st->sync_state == q,
 				("%s: st->sync_state == q",
 					__func__));
 			/*
 			 * XXXGL: some of write methods do unlocked reads
 			 * of state data :(
 			 */
 			pfsync_qs[q].write(st, m->m_data + offset);
 			offset += pfsync_qs[q].len;
 			st->sync_state = PFSYNC_S_NONE;
 			pf_release_state(st);
 			count++;
 		}
 		TAILQ_INIT(&b->b_qs[q]);
 
 		bzero(subh, sizeof(*subh));
 		subh->action = pfsync_qs[q].action;
 		subh->count = htons(count);
 		V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
 	}
 
 	if (!TAILQ_EMPTY(&b->b_upd_req_list)) {
 		subh = (struct pfsync_subheader *)(m->m_data + offset);
 		offset += sizeof(*subh);
 
 		count = 0;
 		while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
 			TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
 
 			bcopy(&ur->ur_msg, m->m_data + offset,
 			    sizeof(ur->ur_msg));
 			offset += sizeof(ur->ur_msg);
 			free(ur, M_PFSYNC);
 			count++;
 		}
 
 		bzero(subh, sizeof(*subh));
 		subh->action = PFSYNC_ACT_UPD_REQ;
 		subh->count = htons(count);
 		V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
 	}
 
 	/* has someone built a custom region for us to add? */
 	if (b->b_plus != NULL) {
 		bcopy(b->b_plus, m->m_data + offset, b->b_pluslen);
 		offset += b->b_pluslen;
 
 		b->b_plus = NULL;
 	}
 
 	subh = (struct pfsync_subheader *)(m->m_data + offset);
 	offset += sizeof(*subh);
 
 	bzero(subh, sizeof(*subh));
 	subh->action = PFSYNC_ACT_EOF;
 	subh->count = htons(1);
 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
 
 	/* we're done, let's put it on the wire */
 	if (ifp->if_bpf) {
 		m->m_data += sizeof(*ip);
 		m->m_len = m->m_pkthdr.len = b->b_len - sizeof(*ip);
 		BPF_MTAP(ifp, m);
 		m->m_data -= sizeof(*ip);
 		m->m_len = m->m_pkthdr.len = b->b_len;
 	}
 
 	if (sc->sc_sync_if == NULL) {
 		b->b_len = PFSYNC_MINPKT;
 		m_freem(m);
 		return;
 	}
 
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 	b->b_len = PFSYNC_MINPKT;
 
 	if (!_IF_QFULL(&b->b_snd))
 		_IF_ENQUEUE(&b->b_snd, m);
 	else {
 		m_freem(m);
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
 	}
 	if (schedswi)
 		swi_sched(V_pfsync_swi_cookie, 0);
 }
 
 static void
 pfsync_insert_state(struct pf_kstate *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
 	if (st->state_flags & PFSTATE_NOSYNC)
 		return;
 
 	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
 		st->state_flags |= PFSTATE_NOSYNC;
 		return;
 	}
 
 	KASSERT(st->sync_state == PFSYNC_S_NONE,
 		("%s: st->sync_state %u", __func__, st->sync_state));
 
 	PFSYNC_BUCKET_LOCK(b);
 	if (b->b_len == PFSYNC_MINPKT)
 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
 
 	pfsync_q_ins(st, PFSYNC_S_INS, true);
 	PFSYNC_BUCKET_UNLOCK(b);
 
 	st->sync_updates = 0;
 }
 
 static int
 pfsync_defer(struct pf_kstate *st, struct mbuf *m)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_deferral *pd;
 	struct pfsync_bucket *b;
 
 	if (m->m_flags & (M_BCAST|M_MCAST))
 		return (0);
 
 	if (sc == NULL)
 		return (0);
 
 	b = pfsync_get_bucket(sc, st);
 
 	PFSYNC_LOCK(sc);
 
 	if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
 	    !(sc->sc_flags & PFSYNCF_DEFER)) {
 		PFSYNC_UNLOCK(sc);
 		return (0);
 	}
 
 	PFSYNC_BUCKET_LOCK(b);
 	PFSYNC_UNLOCK(sc);
 
 	if (b->b_deferred >= 128)
 		pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0);
 
 	pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
 	if (pd == NULL) {
 		PFSYNC_BUCKET_UNLOCK(b);
 		return (0);
 	}
 	b->b_deferred++;
 
 	m->m_flags |= M_SKIP_FIREWALL;
 	st->state_flags |= PFSTATE_ACK;
 
 	pd->pd_sc = sc;
 	pd->pd_refs = 0;
 	pd->pd_st = st;
 	pf_ref_state(st);
 	pd->pd_m = m;
 
 	TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry);
 	callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED);
 	callout_reset(&pd->pd_tmo, PFSYNC_DEFER_TIMEOUT, pfsync_defer_tmo, pd);
 
 	pfsync_push(b);
 	PFSYNC_BUCKET_UNLOCK(b);
 
 	return (1);
 }
 
 static void
 pfsync_undefer(struct pfsync_deferral *pd, int drop)
 {
 	struct pfsync_softc *sc = pd->pd_sc;
 	struct mbuf *m = pd->pd_m;
 	struct pf_kstate *st = pd->pd_st;
 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
 	PFSYNC_BUCKET_LOCK_ASSERT(b);
 
 	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
 	b->b_deferred--;
 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
 	free(pd, M_PFSYNC);
 	pf_release_state(st);
 
 	if (drop)
 		m_freem(m);
 	else {
 		_IF_ENQUEUE(&b->b_snd, m);
 		pfsync_push(b);
 	}
 }
 
 static void
 pfsync_defer_tmo(void *arg)
 {
 	struct epoch_tracker et;
 	struct pfsync_deferral *pd = arg;
 	struct pfsync_softc *sc = pd->pd_sc;
 	struct mbuf *m = pd->pd_m;
 	struct pf_kstate *st = pd->pd_st;
 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
 	PFSYNC_BUCKET_LOCK_ASSERT(b);
 
 	NET_EPOCH_ENTER(et);
 	CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
 
 	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
 	b->b_deferred--;
 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
 	if (pd->pd_refs == 0)
 		free(pd, M_PFSYNC);
 	PFSYNC_BUCKET_UNLOCK(b);
 
 	ip_output(m, NULL, NULL, 0, NULL, NULL);
 
 	pf_release_state(st);
 
 	CURVNET_RESTORE();
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 pfsync_undefer_state(struct pf_kstate *st, int drop)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_deferral *pd;
 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
 	PFSYNC_BUCKET_LOCK(b);
 
 	TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) {
 		 if (pd->pd_st == st) {
 			if (callout_stop(&pd->pd_tmo) > 0)
 				pfsync_undefer(pd, drop);
 
 			PFSYNC_BUCKET_UNLOCK(b);
 			return;
 		}
 	}
 	PFSYNC_BUCKET_UNLOCK(b);
 
 	panic("%s: unable to find deferred state", __func__);
 }
 
 static struct pfsync_bucket*
 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st)
 {
 	int c = PF_IDHASH(st) % pfsync_buckets;
 	return &sc->sc_buckets[c];
 }
 
 static void
 pfsync_update_state(struct pf_kstate *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	bool sync = false, ref = true;
 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
 	PF_STATE_LOCK_ASSERT(st);
 	PFSYNC_BUCKET_LOCK(b);
 
 	if (st->state_flags & PFSTATE_ACK)
 		pfsync_undefer_state(st, 0);
 	if (st->state_flags & PFSTATE_NOSYNC) {
 		if (st->sync_state != PFSYNC_S_NONE)
 			pfsync_q_del(st, true, b);
 		PFSYNC_BUCKET_UNLOCK(b);
 		return;
 	}
 
 	if (b->b_len == PFSYNC_MINPKT)
 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
 
 	switch (st->sync_state) {
 	case PFSYNC_S_UPD_C:
 	case PFSYNC_S_UPD:
 	case PFSYNC_S_INS:
 		/* we're already handling it */
 
 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
 			st->sync_updates++;
 			if (st->sync_updates >= sc->sc_maxupdates)
 				sync = true;
 		}
 		break;
 
 	case PFSYNC_S_IACK:
 		pfsync_q_del(st, false, b);
 		ref = false;
 		/* FALLTHROUGH */
 
 	case PFSYNC_S_NONE:
 		pfsync_q_ins(st, PFSYNC_S_UPD_C, ref);
 		st->sync_updates = 0;
 		break;
 
 	default:
 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
 	}
 
 	if (sync || (time_uptime - st->pfsync_time) < 2)
 		pfsync_push(b);
 
 	PFSYNC_BUCKET_UNLOCK(b);
 }
 
 static void
 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_bucket *b = &sc->sc_buckets[0];
 	struct pfsync_upd_req_item *item;
 	size_t nlen = sizeof(struct pfsync_upd_req);
 
 	PFSYNC_BUCKET_LOCK_ASSERT(b);
 
 	/*
 	 * This code does a bit to prevent multiple update requests for the
 	 * same state being generated. It searches current subheader queue,
 	 * but it doesn't lookup into queue of already packed datagrams.
 	 */
 	TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry)
 		if (item->ur_msg.id == id &&
 		    item->ur_msg.creatorid == creatorid)
 			return;
 
 	item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
 	if (item == NULL)
 		return; /* XXX stats */
 
 	item->ur_msg.id = id;
 	item->ur_msg.creatorid = creatorid;
 
 	if (TAILQ_EMPTY(&b->b_upd_req_list))
 		nlen += sizeof(struct pfsync_subheader);
 
 	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
 		pfsync_sendout(0, 0);
 
 		nlen = sizeof(struct pfsync_subheader) +
 		    sizeof(struct pfsync_upd_req);
 	}
 
 	TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry);
 	b->b_len += nlen;
 
 	pfsync_push(b);
 }
 
 static bool
 pfsync_update_state_req(struct pf_kstate *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	bool ref = true, full = false;
 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
 	PF_STATE_LOCK_ASSERT(st);
 	PFSYNC_BUCKET_LOCK(b);
 
 	if (st->state_flags & PFSTATE_NOSYNC) {
 		if (st->sync_state != PFSYNC_S_NONE)
 			pfsync_q_del(st, true, b);
 		PFSYNC_BUCKET_UNLOCK(b);
 		return (full);
 	}
 
 	switch (st->sync_state) {
 	case PFSYNC_S_UPD_C:
 	case PFSYNC_S_IACK:
 		pfsync_q_del(st, false, b);
 		ref = false;
 		/* FALLTHROUGH */
 
 	case PFSYNC_S_NONE:
 		pfsync_q_ins(st, PFSYNC_S_UPD, ref);
 		pfsync_push(b);
 		break;
 
 	case PFSYNC_S_INS:
 	case PFSYNC_S_UPD:
 	case PFSYNC_S_DEL:
 		/* we're already handling it */
 		break;
 
 	default:
 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
 	}
 
 	if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(struct pfsync_state))
 		full = true;
 
 	PFSYNC_BUCKET_UNLOCK(b);
 
 	return (full);
 }
 
 static void
 pfsync_delete_state(struct pf_kstate *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 	bool ref = true;
 
 	PFSYNC_BUCKET_LOCK(b);
 	if (st->state_flags & PFSTATE_ACK)
 		pfsync_undefer_state(st, 1);
 	if (st->state_flags & PFSTATE_NOSYNC) {
 		if (st->sync_state != PFSYNC_S_NONE)
 			pfsync_q_del(st, true, b);
 		PFSYNC_BUCKET_UNLOCK(b);
 		return;
 	}
 
 	if (b->b_len == PFSYNC_MINPKT)
 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
 
 	switch (st->sync_state) {
 	case PFSYNC_S_INS:
 		/* We never got to tell the world so just forget about it. */
 		pfsync_q_del(st, true, b);
 		break;
 
 	case PFSYNC_S_UPD_C:
 	case PFSYNC_S_UPD:
 	case PFSYNC_S_IACK:
 		pfsync_q_del(st, false, b);
 		ref = false;
 		/* FALLTHROUGH */
 
 	case PFSYNC_S_NONE:
 		pfsync_q_ins(st, PFSYNC_S_DEL, ref);
 		break;
 
 	default:
 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
 	}
 
 	PFSYNC_BUCKET_UNLOCK(b);
 }
 
 static void
 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
 {
 	struct {
 		struct pfsync_subheader subh;
 		struct pfsync_clr clr;
 	} __packed r;
 
 	bzero(&r, sizeof(r));
 
 	r.subh.action = PFSYNC_ACT_CLR;
 	r.subh.count = htons(1);
 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
 
 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
 	r.clr.creatorid = creatorid;
 
 	pfsync_send_plus(&r, sizeof(r));
 }
 
 static void
 pfsync_q_ins(struct pf_kstate *st, int q, bool ref)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	size_t nlen = pfsync_qs[q].len;
 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
 	PFSYNC_BUCKET_LOCK_ASSERT(b);
 
 	KASSERT(st->sync_state == PFSYNC_S_NONE,
 		("%s: st->sync_state %u", __func__, st->sync_state));
 	KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
 	    b->b_len));
 
 	if (TAILQ_EMPTY(&b->b_qs[q]))
 		nlen += sizeof(struct pfsync_subheader);
 
 	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
 		pfsync_sendout(1, b->b_id);
 
 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
 	}
 
 	b->b_len += nlen;
 	TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list);
 	st->sync_state = q;
 	if (ref)
 		pf_ref_state(st);
 }
 
 static void
 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b)
 {
 	int q = st->sync_state;
 
 	PFSYNC_BUCKET_LOCK_ASSERT(b);
 	KASSERT(st->sync_state != PFSYNC_S_NONE,
 		("%s: st->sync_state != PFSYNC_S_NONE", __func__));
 
 	b->b_len -= pfsync_qs[q].len;
 	TAILQ_REMOVE(&b->b_qs[q], st, sync_list);
 	st->sync_state = PFSYNC_S_NONE;
 	if (unref)
 		pf_release_state(st);
 
 	if (TAILQ_EMPTY(&b->b_qs[q]))
 		b->b_len -= sizeof(struct pfsync_subheader);
 }
 
 static void
 pfsync_bulk_start(void)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	if (V_pf_status.debug >= PF_DEBUG_MISC)
 		printf("pfsync: received bulk update request\n");
 
 	PFSYNC_BLOCK(sc);
 
 	sc->sc_ureq_received = time_uptime;
 	sc->sc_bulk_hashid = 0;
 	sc->sc_bulk_stateid = 0;
 	pfsync_bulk_status(PFSYNC_BUS_START);
 	callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
 	PFSYNC_BUNLOCK(sc);
 }
 
 static void
 pfsync_bulk_update(void *arg)
 {
 	struct pfsync_softc *sc = arg;
 	struct pf_kstate *s;
 	int i;
 
 	PFSYNC_BLOCK_ASSERT(sc);
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 
 	/*
 	 * Start with last state from previous invocation.
 	 * It may had gone, in this case start from the
 	 * hash slot.
 	 */
 	s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
 
 	if (s != NULL)
 		i = PF_IDHASH(s);
 	else
 		i = sc->sc_bulk_hashid;
 
 	for (; i <= pf_hashmask; i++) {
 		struct pf_idhash *ih = &V_pf_idhash[i];
 
 		if (s != NULL)
 			PF_HASHROW_ASSERT(ih);
 		else {
 			PF_HASHROW_LOCK(ih);
 			s = LIST_FIRST(&ih->states);
 		}
 
 		for (; s; s = LIST_NEXT(s, entry)) {
 			if (s->sync_state == PFSYNC_S_NONE &&
 			    s->timeout < PFTM_MAX &&
 			    s->pfsync_time <= sc->sc_ureq_received) {
 				if (pfsync_update_state_req(s)) {
 					/* We've filled a packet. */
 					sc->sc_bulk_hashid = i;
 					sc->sc_bulk_stateid = s->id;
 					sc->sc_bulk_creatorid = s->creatorid;
 					PF_HASHROW_UNLOCK(ih);
 					callout_reset(&sc->sc_bulk_tmo, 1,
 					    pfsync_bulk_update, sc);
 					goto full;
 				}
 			}
 		}
 		PF_HASHROW_UNLOCK(ih);
 	}
 
 	/* We're done. */
 	pfsync_bulk_status(PFSYNC_BUS_END);
 full:
 	CURVNET_RESTORE();
 }
 
 static void
 pfsync_bulk_status(u_int8_t status)
 {
 	struct {
 		struct pfsync_subheader subh;
 		struct pfsync_bus bus;
 	} __packed r;
 
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	bzero(&r, sizeof(r));
 
 	r.subh.action = PFSYNC_ACT_BUS;
 	r.subh.count = htons(1);
 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
 
 	r.bus.creatorid = V_pf_status.hostid;
 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
 	r.bus.status = status;
 
 	pfsync_send_plus(&r, sizeof(r));
 }
 
 static void
 pfsync_bulk_fail(void *arg)
 {
 	struct pfsync_softc *sc = arg;
 	struct pfsync_bucket *b = &sc->sc_buckets[0];
 
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 
 	PFSYNC_BLOCK_ASSERT(sc);
 
 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
 		/* Try again */
 		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
 		    pfsync_bulk_fail, V_pfsyncif);
 		PFSYNC_BUCKET_LOCK(b);
 		pfsync_request_update(0, 0);
 		PFSYNC_BUCKET_UNLOCK(b);
 	} else {
 		/* Pretend like the transfer was ok. */
 		sc->sc_ureq_sent = 0;
 		sc->sc_bulk_tries = 0;
 		PFSYNC_LOCK(sc);
 		if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 			(*carp_demote_adj_p)(-V_pfsync_carp_adj,
 			    "pfsync bulk fail");
 		sc->sc_flags |= PFSYNCF_OK;
 		PFSYNC_UNLOCK(sc);
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("pfsync: failed to receive bulk update\n");
 	}
 
 	CURVNET_RESTORE();
 }
 
 static void
 pfsync_send_plus(void *plus, size_t pluslen)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_bucket *b = &sc->sc_buckets[0];
 
 	PFSYNC_BUCKET_LOCK(b);
 
 	if (b->b_len + pluslen > sc->sc_ifp->if_mtu)
 		pfsync_sendout(1, b->b_id);
 
 	b->b_plus = plus;
 	b->b_len += (b->b_pluslen = pluslen);
 
 	pfsync_sendout(1, b->b_id);
 	PFSYNC_BUCKET_UNLOCK(b);
 }
 
 static void
 pfsync_timeout(void *arg)
 {
 	struct pfsync_bucket *b = arg;
 
 	CURVNET_SET(b->b_sc->sc_ifp->if_vnet);
 	PFSYNC_BUCKET_LOCK(b);
 	pfsync_push(b);
 	PFSYNC_BUCKET_UNLOCK(b);
 	CURVNET_RESTORE();
 }
 
 static void
 pfsync_push(struct pfsync_bucket *b)
 {
 
 	PFSYNC_BUCKET_LOCK_ASSERT(b);
 
 	b->b_flags |= PFSYNCF_BUCKET_PUSH;
 	swi_sched(V_pfsync_swi_cookie, 0);
 }
 
 static void
 pfsync_push_all(struct pfsync_softc *sc)
 {
 	int c;
 	struct pfsync_bucket *b;
 
 	for (c = 0; c < pfsync_buckets; c++) {
 		b = &sc->sc_buckets[c];
 
 		PFSYNC_BUCKET_LOCK(b);
 		pfsync_push(b);
 		PFSYNC_BUCKET_UNLOCK(b);
 	}
 }
 
 static void
 pfsyncintr(void *arg)
 {
 	struct epoch_tracker et;
 	struct pfsync_softc *sc = arg;
 	struct pfsync_bucket *b;
 	struct mbuf *m, *n;
 	int c;
 
 	NET_EPOCH_ENTER(et);
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 
 	for (c = 0; c < pfsync_buckets; c++) {
 		b = &sc->sc_buckets[c];
 
 		PFSYNC_BUCKET_LOCK(b);
 		if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) {
 			pfsync_sendout(0, b->b_id);
 			b->b_flags &= ~PFSYNCF_BUCKET_PUSH;
 		}
 		_IF_DEQUEUE_ALL(&b->b_snd, m);
 		PFSYNC_BUCKET_UNLOCK(b);
 
 		for (; m != NULL; m = n) {
 			n = m->m_nextpkt;
 			m->m_nextpkt = NULL;
 
 			/*
 			 * We distinguish between a deferral packet and our
 			 * own pfsync packet based on M_SKIP_FIREWALL
 			 * flag. This is XXX.
 			 */
 			if (m->m_flags & M_SKIP_FIREWALL)
 				ip_output(m, NULL, NULL, 0, NULL, NULL);
 			else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
 			    NULL) == 0)
 				V_pfsyncstats.pfsyncs_opackets++;
 			else
 				V_pfsyncstats.pfsyncs_oerrors++;
 		}
 	}
 	CURVNET_RESTORE();
 	NET_EPOCH_EXIT(et);
 }
 
 static int
 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp,
     struct in_mfilter *imf)
 {
 	struct ip_moptions *imo = &sc->sc_imo;
 	int error;
 
 	if (!(ifp->if_flags & IFF_MULTICAST))
 		return (EADDRNOTAVAIL);
 
 	imo->imo_multicast_vif = -1;
 
 	if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL,
 	    &imf->imf_inm)) != 0)
 		return (error);
 
 	ip_mfilter_init(&imo->imo_head);
 	ip_mfilter_insert(&imo->imo_head, imf);
 	imo->imo_multicast_ifp = ifp;
 	imo->imo_multicast_ttl = PFSYNC_DFLTTL;
 	imo->imo_multicast_loop = 0;
 
 	return (0);
 }
 
 static void
 pfsync_multicast_cleanup(struct pfsync_softc *sc)
 {
 	struct ip_moptions *imo = &sc->sc_imo;
 	struct in_mfilter *imf;
 
 	while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
 		ip_mfilter_remove(&imo->imo_head, imf);
 		in_leavegroup(imf->imf_inm, NULL);
 		ip_mfilter_free(imf);
 	}
 	imo->imo_multicast_ifp = NULL;
 }
 
 void
 pfsync_detach_ifnet(struct ifnet *ifp)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	if (sc == NULL)
 		return;
 
 	PFSYNC_LOCK(sc);
 
 	if (sc->sc_sync_if == ifp) {
 		/* We don't need mutlicast cleanup here, because the interface
 		 * is going away. We do need to ensure we don't try to do
 		 * cleanup later.
 		 */
 		ip_mfilter_init(&sc->sc_imo.imo_head);
 		sc->sc_imo.imo_multicast_ifp = NULL;
 		sc->sc_sync_if = NULL;
 	}
 
 	PFSYNC_UNLOCK(sc);
 }
 
 #ifdef INET
 extern  struct domain inetdomain;
 static struct protosw in_pfsync_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_PFSYNC,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		pfsync_input,
-	.pr_output =		rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 #endif
 
 static void
 pfsync_pointers_init(void)
 {
 
 	PF_RULES_WLOCK();
 	V_pfsync_state_import_ptr = pfsync_state_import;
 	V_pfsync_insert_state_ptr = pfsync_insert_state;
 	V_pfsync_update_state_ptr = pfsync_update_state;
 	V_pfsync_delete_state_ptr = pfsync_delete_state;
 	V_pfsync_clear_states_ptr = pfsync_clear_states;
 	V_pfsync_defer_ptr = pfsync_defer;
 	PF_RULES_WUNLOCK();
 }
 
 static void
 pfsync_pointers_uninit(void)
 {
 
 	PF_RULES_WLOCK();
 	V_pfsync_state_import_ptr = NULL;
 	V_pfsync_insert_state_ptr = NULL;
 	V_pfsync_update_state_ptr = NULL;
 	V_pfsync_delete_state_ptr = NULL;
 	V_pfsync_clear_states_ptr = NULL;
 	V_pfsync_defer_ptr = NULL;
 	PF_RULES_WUNLOCK();
 }
 
 static void
 vnet_pfsync_init(const void *unused __unused)
 {
 	int error;
 
 	V_pfsync_cloner = if_clone_simple(pfsyncname,
 	    pfsync_clone_create, pfsync_clone_destroy, 1);
 	error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif,
 	    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
 	if (error) {
 		if_clone_detach(V_pfsync_cloner);
 		log(LOG_INFO, "swi_add() failed in %s\n", __func__);
 	}
 
 	pfsync_pointers_init();
 }
 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
     vnet_pfsync_init, NULL);
 
 static void
 vnet_pfsync_uninit(const void *unused __unused)
 {
 	int ret __diagused;
 
 	pfsync_pointers_uninit();
 
 	if_clone_detach(V_pfsync_cloner);
 	ret = swi_remove(V_pfsync_swi_cookie);
 	MPASS(ret == 0);
 	ret = intr_event_destroy(V_pfsync_swi_ie);
 	MPASS(ret == 0);
 }
 
 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
     vnet_pfsync_uninit, NULL);
 
 static int
 pfsync_init(void)
 {
 #ifdef INET
 	int error;
 
 	pfsync_detach_ifnet_ptr = pfsync_detach_ifnet;
 
 	error = pf_proto_register(PF_INET, &in_pfsync_protosw);
 	if (error)
 		return (error);
 	error = ipproto_register(IPPROTO_PFSYNC);
 	if (error) {
 		pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
 		return (error);
 	}
 #endif
 
 	return (0);
 }
 
 static void
 pfsync_uninit(void)
 {
 	pfsync_detach_ifnet_ptr = NULL;
 
 #ifdef INET
 	ipproto_unregister(IPPROTO_PFSYNC);
 	pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
 #endif
 }
 
 static int
 pfsync_modevent(module_t mod, int type, void *data)
 {
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		error = pfsync_init();
 		break;
 	case MOD_UNLOAD:
 		pfsync_uninit();
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return (error);
 }
 
 static moduledata_t pfsync_mod = {
 	pfsyncname,
 	pfsync_modevent,
 	0
 };
 
 #define PFSYNC_MODVER 1
 
 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */
 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
 MODULE_VERSION(pfsync, PFSYNC_MODVER);
 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h
index 85761583c30a..22e057e5691a 100644
--- a/sys/sys/protosw.h
+++ b/sys/sys/protosw.h
@@ -1,355 +1,346 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)protosw.h	8.1 (Berkeley) 6/2/93
  * $FreeBSD$
  */
 
 #ifndef _SYS_PROTOSW_H_
 #define _SYS_PROTOSW_H_
 
 #include <sys/queue.h>
 
 /* Forward declare these structures referenced from prototypes below. */
 struct kaiocb;
 struct mbuf;
 struct thread;
 struct sockaddr;
 struct socket;
 struct sockopt;
 
 /*#ifdef _KERNEL*/
 /*
  * Protocol switch table.
  *
  * Each protocol has a handle initializing one of these structures,
  * which is used for protocol-protocol and system-protocol communication.
  *
  * Thereafter it is called every 200ms through the pr_fasttimo entry and
  * every 500ms through the pr_slowtimo for timer based actions.
  * The system will call the pr_drain entry if it is low on space and
  * this should throw away any non-critical data.
  *
- * Protocols pass data between themselves as chains of mbufs using
- * the pr_input and pr_output hooks.  Pr_input passes data up (towards
- * the users) and pr_output passes it down (towards the interfaces); control
- * information passes up and down on pr_ctlinput and pr_ctloutput.
- * The protocol is responsible for the space occupied by any the
- * arguments to these entries and must dispose it.
- *
  * In retrospect, it would be a lot nicer to use an interface
  * similar to the vnode VOP interface.
  */
 /* USE THESE FOR YOUR PROTOTYPES ! */
 typedef int	pr_input_t (struct mbuf **, int*, int);
-typedef int	pr_output_t (struct mbuf *, struct socket *, ...);
 typedef void	pr_ctlinput_t (int, struct sockaddr *, void *);
 typedef int	pr_ctloutput_t (struct socket *, struct sockopt *);
 typedef	void	pr_fasttimo_t (void);
 typedef	void	pr_slowtimo_t (void);
 typedef	void	pr_drain_t (void);
 
 struct protosw {
 	short	pr_type;		/* socket type used for */
 	struct	domain *pr_domain;	/* domain protocol a member of */
 	short	pr_protocol;		/* protocol number */
 	short	pr_flags;		/* see below */
 /* protocol-protocol hooks */
 	pr_input_t *pr_input;		/* input to protocol (from below) */
-	pr_output_t *pr_output;		/* output to protocol (from above) */
 	pr_ctlinput_t *pr_ctlinput;	/* control input (from below) */
 	pr_ctloutput_t *pr_ctloutput;	/* control output (from above) */
 /* utility hooks */
 	pr_fasttimo_t *pr_fasttimo;	/* fast timeout (200ms) */
 	pr_slowtimo_t *pr_slowtimo;	/* slow timeout (500ms) */
 	pr_drain_t *pr_drain;		/* flush any excess space possible */
 
 	struct	pr_usrreqs *pr_usrreqs;	/* user-protocol hook */
 	LIST_ENTRY(protosw)  pr_fasttimos;
 	LIST_ENTRY(protosw)  pr_slowtimos;
 };
 /*#endif*/
 
 #define	PR_SLOWHZ	2		/* 2 slow timeouts per second */
 #define	PR_FASTHZ	5		/* 5 fast timeouts per second */
 
 /*
  * This number should be defined again within each protocol family to avoid
  * confusion.
  */
 #define	PROTO_SPACER	32767		/* spacer for loadable protocols */
 
 /*
  * Values for pr_flags.
  * PR_ADDR requires PR_ATOMIC;
  * PR_ADDR and PR_CONNREQUIRED are mutually exclusive.
  * PR_IMPLOPCL means that the protocol allows sendto without prior connect,
  *	and the protocol understands the MSG_EOF flag.  The first property is
  *	is only relevant if PR_CONNREQUIRED is set (otherwise sendto is allowed
  *	anyhow).
  * PR_SOCKBUF requires protocol to initialize and destroy its socket buffers
  * in its pr_attach and pr_detach.
  */
 #define	PR_ATOMIC	0x01		/* exchange atomic messages only */
 #define	PR_ADDR		0x02		/* addresses given with messages */
 #define	PR_CONNREQUIRED	0x04		/* connection required by protocol */
 #define	PR_WANTRCVD	0x08		/* want PRU_RCVD calls */
 #define	PR_RIGHTS	0x10		/* passes capabilities */
 #define PR_IMPLOPCL	0x20		/* implied open/close */
 #define	PR_LASTHDR	0x40		/* enforce ipsec policy; last header */
 #define	PR_CAPATTACH	0x80		/* socket can attach in cap mode */
 #define	PR_SOCKBUF	0x100		/* private implementation of buffers */
 
 /*
  * In earlier BSD network stacks, a single pr_usrreq() function pointer was
  * invoked with an operation number indicating what operation was desired.
  * We now provide individual function pointers which protocols can implement,
  * which offers a number of benefits (such as type checking for arguments).
  * These older constants are still present in order to support TCP debugging.
  */
 #define	PRU_ATTACH		0	/* attach protocol to up */
 #define	PRU_DETACH		1	/* detach protocol from up */
 #define	PRU_BIND		2	/* bind socket to address */
 #define	PRU_LISTEN		3	/* listen for connection */
 #define	PRU_CONNECT		4	/* establish connection to peer */
 #define	PRU_ACCEPT		5	/* accept connection from peer */
 #define	PRU_DISCONNECT		6	/* disconnect from peer */
 #define	PRU_SHUTDOWN		7	/* won't send any more data */
 #define	PRU_RCVD		8	/* have taken data; more room now */
 #define	PRU_SEND		9	/* send this data */
 #define	PRU_ABORT		10	/* abort (fast DISCONNECT, DETATCH) */
 #define	PRU_CONTROL		11	/* control operations on protocol */
 #define	PRU_SENSE		12	/* return status into m */
 #define	PRU_RCVOOB		13	/* retrieve out of band data */
 #define	PRU_SENDOOB		14	/* send out of band data */
 #define	PRU_SOCKADDR		15	/* fetch socket's address */
 #define	PRU_PEERADDR		16	/* fetch peer's address */
 #define	PRU_CONNECT2		17	/* connect two sockets */
 /* begin for protocols internal use */
 #define	PRU_FASTTIMO		18	/* 200ms timeout */
 #define	PRU_SLOWTIMO		19	/* 500ms timeout */
 #define	PRU_PROTORCV		20	/* receive from below */
 #define	PRU_PROTOSEND		21	/* send to below */
 /* end for protocol's internal use */
 #define PRU_SEND_EOF		22	/* send and close */
 #define	PRU_SOSETLABEL		23	/* MAC label change */
 #define	PRU_CLOSE		24	/* socket close */
 #define	PRU_FLUSH		25	/* flush the socket */
 #define	PRU_NREQ		25
 
 #ifdef PRUREQUESTS
 const char *prurequests[] = {
 	"ATTACH",	"DETACH",	"BIND",		"LISTEN",
 	"CONNECT",	"ACCEPT",	"DISCONNECT",	"SHUTDOWN",
 	"RCVD",		"SEND",		"ABORT",	"CONTROL",
 	"SENSE",	"RCVOOB",	"SENDOOB",	"SOCKADDR",
 	"PEERADDR",	"CONNECT2",	"FASTTIMO",	"SLOWTIMO",
 	"PROTORCV",	"PROTOSEND",	"SEND_EOF",	"SOSETLABEL",
 	"CLOSE",	"FLUSH",
 };
 #endif
 
 #ifdef	_KERNEL			/* users shouldn't see this decl */
 
 struct ifnet;
 struct stat;
 struct ucred;
 struct uio;
 
 /*
  * If the ordering here looks odd, that's because it's alphabetical.  These
  * should eventually be merged back into struct protosw.
  *
  * Some fields initialized to defaults if they are NULL.
  */
 struct pr_usrreqs {
 	void	(*pru_abort)(struct socket *so);
 	int	(*pru_accept)(struct socket *so, struct sockaddr **nam);
 	int	(*pru_attach)(struct socket *so, int proto, struct thread *td);
 	int	(*pru_bind)(struct socket *so, struct sockaddr *nam,
 		    struct thread *td);
 	int	(*pru_connect)(struct socket *so, struct sockaddr *nam,
 		    struct thread *td);
 	int	(*pru_connect2)(struct socket *so1, struct socket *so2);
 	int	(*pru_control)(struct socket *so, u_long cmd, caddr_t data,
 		    struct ifnet *ifp, struct thread *td);
 	void	(*pru_detach)(struct socket *so);
 	int	(*pru_disconnect)(struct socket *so);
 	int	(*pru_listen)(struct socket *so, int backlog,
 		    struct thread *td);
 	int	(*pru_peeraddr)(struct socket *so, struct sockaddr **nam);
 	int	(*pru_rcvd)(struct socket *so, int flags);
 	int	(*pru_rcvoob)(struct socket *so, struct mbuf *m, int flags);
 	int	(*pru_send)(struct socket *so, int flags, struct mbuf *m,
 		    struct sockaddr *addr, struct mbuf *control,
 		    struct thread *td);
 #define	PRUS_OOB	0x1
 #define	PRUS_EOF	0x2
 #define	PRUS_MORETOCOME	0x4
 #define	PRUS_NOTREADY	0x8
 #define	PRUS_IPV6	0x10
 	int	(*pru_ready)(struct socket *so, struct mbuf *m, int count);
 	int	(*pru_sense)(struct socket *so, struct stat *sb);
 	int	(*pru_shutdown)(struct socket *so);
 	int	(*pru_flush)(struct socket *so, int direction);
 	int	(*pru_sockaddr)(struct socket *so, struct sockaddr **nam);
 	int	(*pru_sosend)(struct socket *so, struct sockaddr *addr,
 		    struct uio *uio, struct mbuf *top, struct mbuf *control,
 		    int flags, struct thread *td);
 	int	(*pru_soreceive)(struct socket *so, struct sockaddr **paddr,
 		    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 		    int *flagsp);
 	int	(*pru_sopoll)(struct socket *so, int events,
 		    struct ucred *cred, struct thread *td);
 	void	(*pru_sosetlabel)(struct socket *so);
 	void	(*pru_close)(struct socket *so);
 	int	(*pru_bindat)(int fd, struct socket *so, struct sockaddr *nam,
 		    struct thread *td);
 	int	(*pru_connectat)(int fd, struct socket *so,
 		    struct sockaddr *nam, struct thread *td);
 	int	(*pru_aio_queue)(struct socket *so, struct kaiocb *job);
 };
 
 /*
  * All nonvoid pru_*() functions below return EOPNOTSUPP.
  */
 int	pru_accept_notsupp(struct socket *so, struct sockaddr **nam);
 int	pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job);
 int	pru_attach_notsupp(struct socket *so, int proto, struct thread *td);
 int	pru_bind_notsupp(struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	pru_connect_notsupp(struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	pru_connect2_notsupp(struct socket *so1, struct socket *so2);
 int	pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
 	    struct ifnet *ifp, struct thread *td);
 int	pru_disconnect_notsupp(struct socket *so);
 int	pru_listen_notsupp(struct socket *so, int backlog, struct thread *td);
 int	pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam);
 int	pru_rcvd_notsupp(struct socket *so, int flags);
 int	pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags);
 int	pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
 	    struct sockaddr *addr, struct mbuf *control, struct thread *td);
 int	pru_ready_notsupp(struct socket *so, struct mbuf *m, int count);
 int	pru_sense_null(struct socket *so, struct stat *sb);
 int	pru_shutdown_notsupp(struct socket *so);
 int	pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam);
 int	pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
 	    struct uio *uio, struct mbuf *top, struct mbuf *control, int flags,
 	    struct thread *td);
 int	pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 int	pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
 	    struct thread *td);
 
 #endif /* _KERNEL */
 
 /*
  * The arguments to the ctlinput routine are
  *	(*protosw[].pr_ctlinput)(cmd, sa, arg);
  * where cmd is one of the commands below, sa is a pointer to a sockaddr,
  * and arg is a `void *' argument used within a protocol family.
  */
 #define	PRC_ROUTEDEAD		1	/* select new route if possible ??? */
 /* was	PRC_QUENCH2		3	DEC congestion bit says slow down */
 /* was	PRC_QUENCH		4	Deprecated by RFC 6633 */
 #define	PRC_MSGSIZE		5	/* message size forced drop */
 #define	PRC_HOSTDEAD		6	/* host appears to be down */
 #define	PRC_HOSTUNREACH		7	/* deprecated (use PRC_UNREACH_HOST) */
 #define	PRC_UNREACH_NET		8	/* no route to network */
 #define	PRC_UNREACH_HOST	9	/* no route to host */
 #define	PRC_UNREACH_PROTOCOL	10	/* dst says bad protocol */
 #define	PRC_UNREACH_PORT	11	/* bad port # */
 /* was	PRC_UNREACH_NEEDFRAG	12	   (use PRC_MSGSIZE) */
 #define	PRC_UNREACH_SRCFAIL	13	/* source route failed */
 #define	PRC_REDIRECT_NET	14	/* net routing redirect */
 #define	PRC_REDIRECT_HOST	15	/* host routing redirect */
 #define	PRC_REDIRECT_TOSNET	16	/* redirect for type of service & net */
 #define	PRC_REDIRECT_TOSHOST	17	/* redirect for tos & host */
 #define	PRC_TIMXCEED_INTRANS	18	/* packet lifetime expired in transit */
 #define	PRC_TIMXCEED_REASS	19	/* lifetime expired on reass q */
 #define	PRC_PARAMPROB		20	/* header incorrect */
 #define	PRC_UNREACH_ADMIN_PROHIB	21	/* packet administrativly prohibited */
 
 #define	PRC_NCMDS		22
 
 #define	PRC_IS_REDIRECT(cmd)	\
 	((cmd) >= PRC_REDIRECT_NET && (cmd) <= PRC_REDIRECT_TOSHOST)
 
 #ifdef PRCREQUESTS
 char	*prcrequests[] = {
 	"IFDOWN", "ROUTEDEAD", "IFUP", "DEC-BIT-QUENCH2",
 	"QUENCH", "MSGSIZE", "HOSTDEAD", "#7",
 	"NET-UNREACH", "HOST-UNREACH", "PROTO-UNREACH", "PORT-UNREACH",
 	"#12", "SRCFAIL-UNREACH", "NET-REDIRECT", "HOST-REDIRECT",
 	"TOSNET-REDIRECT", "TOSHOST-REDIRECT", "TX-INTRANS", "TX-REASS",
 	"PARAMPROB", "ADMIN-UNREACH"
 };
 #endif
 
 /*
  * The arguments to ctloutput are:
  *	(*protosw[].pr_ctloutput)(req, so, level, optname, optval, p);
  * req is one of the actions listed below, so is a (struct socket *),
  * level is an indication of which protocol layer the option is intended.
  * optname is a protocol dependent socket option request,
  * optval is a pointer to a mbuf-chain pointer, for value-return results.
  * The protocol is responsible for disposal of the mbuf chain *optval
  * if supplied,
  * the caller is responsible for any space held by *optval, when returned.
  * A non-zero return from ctloutput gives an
  * UNIX error number which should be passed to higher level software.
  */
 #define	PRCO_GETOPT	0
 #define	PRCO_SETOPT	1
 
 #define	PRCO_NCMDS	2
 
 #ifdef PRCOREQUESTS
 char	*prcorequests[] = {
 	"GETOPT", "SETOPT",
 };
 #endif
 
 #ifdef _KERNEL
 void	pfctlinput(int, struct sockaddr *);
 struct domain *pffinddomain(int family);
 struct protosw *pffindproto(int family, int protocol, int type);
 struct protosw *pffindtype(int family, int type);
 int	pf_proto_register(int family, struct protosw *npr);
 int	pf_proto_unregister(int family, int protocol, int type);
 #endif
 
 #endif