Index: stable/8/contrib/ntp/ntpd/ntp_crypto.c
===================================================================
--- stable/8/contrib/ntp/ntpd/ntp_crypto.c	(revision 281230)
+++ stable/8/contrib/ntp/ntpd/ntp_crypto.c	(revision 281231)
@@ -1,4201 +1,4235 @@
 /*
  * ntp_crypto.c - NTP version 4 public key routines
  */
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 #ifdef OPENSSL
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/param.h>
 #include <unistd.h>
 #include <fcntl.h>
 
 #include "ntpd.h"
 #include "ntp_stdlib.h"
 #include "ntp_unixtime.h"
 #include "ntp_string.h"
 #include <ntp_random.h>
 
 #include "openssl/asn1_mac.h"
 #include "openssl/bn.h"
 #include "openssl/err.h"
 #include "openssl/evp.h"
 #include "openssl/pem.h"
 #include "openssl/rand.h"
 #include "openssl/x509v3.h"
 
 #ifdef KERNEL_PLL
 #include "ntp_syscall.h"
 #endif /* KERNEL_PLL */
 
 /*
  * Extension field message format
  *
  * These are always signed and saved before sending in network byte
  * order. They must be converted to and from host byte order for
  * processing.
  *
  * +-------+-------+
  * |   op  |  len  | <- extension pointer
  * +-------+-------+
  * |    assocID    |
  * +---------------+
  * |   timestamp   | <- value pointer
  * +---------------+
  * |   filestamp   |
  * +---------------+
  * |   value len   |
  * +---------------+
  * |               |
  * =     value     =
  * |               |
  * +---------------+
  * | signature len |
  * +---------------+
  * |               |
  * =   signature   =
  * |               |
  * +---------------+
  *
  * The CRYPTO_RESP bit is set to 0 for requests, 1 for responses.
  * Requests carry the association ID of the receiver; responses carry
  * the association ID of the sender. Some messages include only the
  * operation/length and association ID words and so have length 8
  * octets. Ohers include the value structure and associated value and
  * signature fields. These messages include the timestamp, filestamp,
  * value and signature words and so have length at least 24 octets. The
  * signature and/or value fields can be empty, in which case the
  * respective length words are zero. An empty value with nonempty
  * signature is syntactically valid, but semantically questionable.
  *
  * The filestamp represents the time when a cryptographic data file such
  * as a public/private key pair is created. It follows every reference
  * depending on that file and serves as a means to obsolete earlier data
  * of the same type. The timestamp represents the time when the
  * cryptographic data of the message were last signed. Creation of a
  * cryptographic data file or signing a message can occur only when the
  * creator or signor is synchronized to an authoritative source and
  * proventicated to a trusted authority.
  *
  * Note there are four conditions required for server trust. First, the
  * public key on the certificate must be verified, which involves a
  * number of format, content and consistency checks. Next, the server
  * identity must be confirmed by one of four schemes: private
  * certificate, IFF scheme, GQ scheme or certificate trail hike to a
  * self signed trusted certificate. Finally, the server signature must
  * be verified.
  */
 /*
  * Cryptodefines
  */
 #define TAI_1972	10	/* initial TAI offset (s) */
 #define MAX_LEAP	100	/* max UTC leapseconds (s) */
 #define VALUE_LEN	(6 * 4) /* min response field length */
+#define MAX_VALLEN	(65535 - VALUE_LEN)
 #define YEAR		(60 * 60 * 24 * 365) /* seconds in year */
 
 /*
  * Global cryptodata in host byte order
  */
 u_int32	crypto_flags = 0x0;	/* status word */
 
 /*
  * Global cryptodata in network byte order
  */
 struct cert_info *cinfo = NULL;	/* certificate info/value */
 struct value hostval;		/* host value */
 struct value pubkey;		/* public key */
 struct value tai_leap;		/* leapseconds table */
 EVP_PKEY *iffpar_pkey = NULL;	/* IFF parameters */
 EVP_PKEY *gqpar_pkey = NULL;	/* GQ parameters */
 EVP_PKEY *mvpar_pkey = NULL;	/* MV parameters */
 char	*iffpar_file = NULL; /* IFF parameters file */
 char	*gqpar_file = NULL;	/* GQ parameters file */
 char	*mvpar_file = NULL;	/* MV parameters file */
 
 /*
  * Private cryptodata in host byte order
  */
 static char *passwd = NULL;	/* private key password */
 static EVP_PKEY *host_pkey = NULL; /* host key */
 static EVP_PKEY *sign_pkey = NULL; /* sign key */
 static const EVP_MD *sign_digest = NULL; /* sign digest */
 static u_int sign_siglen;	/* sign key length */
 static char *rand_file = NULL;	/* random seed file */
 static char *host_file = NULL;	/* host key file */
 static char *sign_file = NULL;	/* sign key file */
 static char *cert_file = NULL;	/* certificate file */
 static char *leap_file = NULL;	/* leapseconds file */
 static tstamp_t if_fstamp = 0;	/* IFF filestamp */
 static tstamp_t gq_fstamp = 0;	/* GQ file stamp */
 static tstamp_t mv_fstamp = 0;	/* MV filestamp */
 static u_int ident_scheme = 0;	/* server identity scheme */
 
 /*
  * Cryptotypes
  */
 static	int	crypto_verify	P((struct exten *, struct value *,
 				    struct peer *));
-static	int	crypto_encrypt	P((struct exten *, struct value *,
-				    keyid_t *));
+static	int	crypto_encrypt	P((const u_char *, u_int, keyid_t *,
+				    struct value *));
 static	int	crypto_alice	P((struct peer *, struct value *));
 static	int	crypto_alice2	P((struct peer *, struct value *));
 static	int	crypto_alice3	P((struct peer *, struct value *));
 static	int	crypto_bob	P((struct exten *, struct value *));
 static	int	crypto_bob2	P((struct exten *, struct value *));
 static	int	crypto_bob3	P((struct exten *, struct value *));
 static	int	crypto_iff	P((struct exten *, struct peer *));
 static	int	crypto_gq	P((struct exten *, struct peer *));
 static	int	crypto_mv	P((struct exten *, struct peer *));
 static	u_int	crypto_send	P((struct exten *, struct value *));
 static	tstamp_t crypto_time	P((void));
 static	u_long	asn2ntp		P((ASN1_TIME *));
 static	struct cert_info *cert_parse P((u_char *, u_int, tstamp_t));
 static	int	cert_sign	P((struct exten *, struct value *));
 static	int	cert_valid	P((struct cert_info *, EVP_PKEY *));
 static	int	cert_install	P((struct exten *, struct peer *));
 static	void	cert_free	P((struct cert_info *));
 static	EVP_PKEY *crypto_key	P((char *, tstamp_t *));
 static	int	bighash		P((BIGNUM *, BIGNUM *));
 static	struct cert_info *crypto_cert P((char *));
 static	void	crypto_tai	P((char *));
 
 #ifdef SYS_WINNT
 int
 readlink(char * link, char * file, int len) {
 	return (-1);
 }
 #endif
 
 /*
  * session_key - generate session key
  *
  * This routine generates a session key from the source address,
  * destination address, key ID and private value. The value of the
  * session key is the MD5 hash of these values, while the next key ID is
  * the first four octets of the hash.
  *
  * Returns the next key ID
  */
 keyid_t
 session_key(
 	struct sockaddr_storage *srcadr, /* source address */
 	struct sockaddr_storage *dstadr, /* destination address */
 	keyid_t	keyno,		/* key ID */
 	keyid_t	private,	/* private value */
 	u_long	lifetime 	/* key lifetime */
 	)
 {
 	EVP_MD_CTX ctx;		/* message digest context */
 	u_char dgst[EVP_MAX_MD_SIZE]; /* message digest */
 	keyid_t	keyid;		/* key identifer */
 	u_int32	header[10];	/* data in network byte order */
 	u_int	hdlen, len;
 
 	if (!dstadr)
 		return 0;
 	
 	/*
 	 * Generate the session key and key ID. If the lifetime is
 	 * greater than zero, install the key and call it trusted.
 	 */
 	hdlen = 0;
 	switch(srcadr->ss_family) {
 	case AF_INET:
 		header[0] = ((struct sockaddr_in *)srcadr)->sin_addr.s_addr;
 		header[1] = ((struct sockaddr_in *)dstadr)->sin_addr.s_addr;
 		header[2] = htonl(keyno);
 		header[3] = htonl(private);
 		hdlen = 4 * sizeof(u_int32);
 		break;
 
 	case AF_INET6:
 		memcpy(&header[0], &GET_INADDR6(*srcadr),
 		    sizeof(struct in6_addr));
 		memcpy(&header[4], &GET_INADDR6(*dstadr),
 		    sizeof(struct in6_addr));
 		header[8] = htonl(keyno);
 		header[9] = htonl(private);
 		hdlen = 10 * sizeof(u_int32);
 		break;
 	}
 	EVP_DigestInit(&ctx, EVP_md5());
 	EVP_DigestUpdate(&ctx, (u_char *)header, hdlen);
 	EVP_DigestFinal(&ctx, dgst, &len);
 	memcpy(&keyid, dgst, 4);
 	keyid = ntohl(keyid);
 	if (lifetime != 0) {
 		MD5auth_setkey(keyno, dgst, len);
 		authtrust(keyno, lifetime);
 	}
 #ifdef DEBUG
 	if (debug > 1)
 		printf(
 		    "session_key: %s > %s %08x %08x hash %08x life %lu\n",
 		    stoa(srcadr), stoa(dstadr), keyno,
 		    private, keyid, lifetime);
 #endif
 	return (keyid);
 }
 
 
 /*
  * make_keylist - generate key list
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PER	host certificate expired
  *
  * This routine constructs a pseudo-random sequence by repeatedly
  * hashing the session key starting from a given source address,
  * destination address, private value and the next key ID of the
  * preceeding session key. The last entry on the list is saved along
  * with its sequence number and public signature.
  */
 int
 make_keylist(
 	struct peer *peer,	/* peer structure pointer */
 	struct interface *dstadr /* interface */
 	)
 {
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	struct autokey *ap;	/* autokey pointer */
 	struct value *vp;	/* value pointer */
 	keyid_t	keyid = 0;	/* next key ID */
 	keyid_t	cookie;		/* private value */
 	u_long	lifetime;
 	u_int	len, mpoll;
 	int	i;
 
 	if (!dstadr)
 		return XEVNT_OK;
 	
 	/*
 	 * Allocate the key list if necessary.
 	 */
 	tstamp = crypto_time();
 	if (peer->keylist == NULL)
 		peer->keylist = emalloc(sizeof(keyid_t) *
 		    NTP_MAXSESSION);
 
 	/*
 	 * Generate an initial key ID which is unique and greater than
 	 * NTP_MAXKEY.
 	 */
 	while (1) {
 		keyid = (ntp_random() + NTP_MAXKEY + 1) & ((1 <<
 		    sizeof(keyid_t)) - 1);
 		if (authhavekey(keyid))
 			continue;
 		break;
 	}
 
 	/*
 	 * Generate up to NTP_MAXSESSION session keys. Stop if the
 	 * next one would not be unique or not a session key ID or if
 	 * it would expire before the next poll. The private value
 	 * included in the hash is zero if broadcast mode, the peer
 	 * cookie if client mode or the host cookie if symmetric modes.
 	 */
 	mpoll = 1 << min(peer->ppoll, peer->hpoll);
 	lifetime = min(sys_automax, NTP_MAXSESSION * mpoll);
 	if (peer->hmode == MODE_BROADCAST)
 		cookie = 0;
 	else
 		cookie = peer->pcookie;
 	for (i = 0; i < NTP_MAXSESSION; i++) {
 		peer->keylist[i] = keyid;
 		peer->keynumber = i;
 		keyid = session_key(&dstadr->sin, &peer->srcadr, keyid,
 		    cookie, lifetime);
 		lifetime -= mpoll;
 		if (auth_havekey(keyid) || keyid <= NTP_MAXKEY ||
 		    lifetime <= mpoll)
 			break;
 	}
 
 	/*
 	 * Save the last session key ID, sequence number and timestamp,
 	 * then sign these values for later retrieval by the clients. Be
 	 * careful not to use invalid key media. Use the public values
 	 * timestamp as filestamp. 
 	 */
 	vp = &peer->sndval;
 	if (vp->ptr == NULL)
 		vp->ptr = emalloc(sizeof(struct autokey));
 	ap = (struct autokey *)vp->ptr;
 	ap->seq = htonl(peer->keynumber);
 	ap->key = htonl(keyid);
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = hostval.tstamp;
 	vp->vallen = htonl(sizeof(struct autokey));
 	vp->siglen = 0;
 	if (tstamp != 0) {
 		if (tstamp < cinfo->first || tstamp > cinfo->last)
 			return (XEVNT_PER);
 
 		if (vp->sig == NULL)
 			vp->sig = emalloc(sign_siglen);
 		EVP_SignInit(&ctx, sign_digest);
 		EVP_SignUpdate(&ctx, (u_char *)vp, 12);
 		EVP_SignUpdate(&ctx, vp->ptr, sizeof(struct autokey));
 		if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 			vp->siglen = htonl(len);
 		else
 			msyslog(LOG_ERR, "make_keys %s\n",
 			    ERR_error_string(ERR_get_error(), NULL));
 		peer->flags |= FLAG_ASSOC;
 	}
 #ifdef DEBUG
 	if (debug)
 		printf("make_keys: %d %08x %08x ts %u fs %u poll %d\n",
 		    ntohl(ap->seq), ntohl(ap->key), cookie,
 		    ntohl(vp->tstamp), ntohl(vp->fstamp), peer->hpoll);
 #endif
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_recv - parse extension fields
  *
  * This routine is called when the packet has been matched to an
  * association and passed sanity, format and MAC checks. We believe the
  * extension field values only if the field has proper format and
  * length, the timestamp and filestamp are valid and the signature has
  * valid length and is verified. There are a few cases where some values
  * are believed even if the signature fails, but only if the proventic
  * bit is not set.
  */
 int
 crypto_recv(
 	struct peer *peer,	/* peer structure pointer */
 	struct recvbuf *rbufp	/* packet buffer pointer */
 	)
 {
 	const EVP_MD *dp;	/* message digest algorithm */
 	u_int32	*pkt;		/* receive packet pointer */
 	struct autokey *ap, *bp; /* autokey pointer */
 	struct exten *ep, *fp;	/* extension pointers */
 	int	has_mac;	/* length of MAC field */
 	int	authlen;	/* offset of MAC field */
 	associd_t associd;	/* association ID */
 	tstamp_t tstamp = 0;	/* timestamp */
 	tstamp_t fstamp = 0;	/* filestamp */
 	u_int	len;		/* extension field length */
 	u_int	code;		/* extension field opcode */
 	u_int	vallen = 0;	/* value length */
 	X509	*cert;		/* X509 certificate */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	keyid_t	cookie;		/* crumbles */
 	int	hismode;	/* packet mode */
 	int	rval = XEVNT_OK;
 	u_char	*ptr;
 	u_int32 temp32;
 
 	/*
 	 * Initialize. Note that the packet has already been checked for
 	 * valid format and extension field lengths. First extract the
 	 * field length, command code and association ID in host byte
 	 * order. These are used with all commands and modes. Then check
 	 * the version number, which must be 2, and length, which must
 	 * be at least 8 for requests and VALUE_LEN (24) for responses.
 	 * Packets that fail either test sink without a trace. The
 	 * association ID is saved only if nonzero.
 	 */
 	authlen = LEN_PKT_NOMAC;
 	hismode = (int)PKT_MODE((&rbufp->recv_pkt)->li_vn_mode);
 	while ((has_mac = rbufp->recv_length - authlen) > MAX_MAC_LEN) {
 		pkt = (u_int32 *)&rbufp->recv_pkt + authlen / 4;
 		ep = (struct exten *)pkt;
 		code = ntohl(ep->opcode) & 0xffff0000;
 		len = ntohl(ep->opcode) & 0x0000ffff;
 		associd = (associd_t) ntohl(pkt[1]);
 		rval = XEVNT_OK;
 #ifdef DEBUG
 		if (debug)
 			printf(
 			    "crypto_recv: flags 0x%x ext offset %d len %u code 0x%x assocID %d\n",
 			    peer->crypto, authlen, len, code >> 16,
 			    associd);
 #endif
 
 		/*
 		 * Check version number and field length. If bad,
 		 * quietly ignore the packet.
 		 */
 		if (((code >> 24) & 0x3f) != CRYPTO_VN || len < 8) {
 			sys_unknownversion++;
 			code |= CRYPTO_ERROR;
 		}
 
 		/*
 		 * Little vulnerability bandage here. If a perp tosses a
 		 * fake association ID over the fence, we better toss it
 		 * out. Only the first one counts.
 		 */
 		if (code & CRYPTO_RESP) {
 			if (peer->assoc == 0)
 				peer->assoc = associd;
 			else if (peer->assoc != associd)
 				code |= CRYPTO_ERROR;
 		}
 		if (len >= VALUE_LEN) {
 			tstamp = ntohl(ep->tstamp);
 			fstamp = ntohl(ep->fstamp);
 			vallen = ntohl(ep->vallen);
+			/*
+			 * Bug 2761: I hope this isn't too early...
+			 */
+			if (   vallen == 0
+			    || len - VALUE_LEN < vallen)
+				return XEVNT_LEN;
 		}
 		switch (code) {
 
 		/*
 		 * Install status word, host name, signature scheme and
 		 * association ID. In OpenSSL the signature algorithm is
 		 * bound to the digest algorithm, so the NID completely
 		 * defines the signature scheme. Note the request and
 		 * response are identical, but neither is validated by
 		 * signature. The request is processed here only in
 		 * symmetric modes. The server name field might be
 		 * useful to implement access controls in future.
 		 */
 		case CRYPTO_ASSOC:
 
 			/*
 			 * If the machine is running when this message
 			 * arrives, the other fellow has reset and so
 			 * must we. Otherwise, pass the extension field
 			 * to the transmit side.
 			 */
 			if (peer->crypto) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			fp = emalloc(len);
 			memcpy(fp, ep, len);
 			temp32 = CRYPTO_RESP;
 			fp->opcode |= htonl(temp32);
 			peer->cmmd = fp;
 			/* fall through */
 
 		case CRYPTO_ASSOC | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if it has already been
 			 * stored or the message has been amputated.
 			 */
 			if (peer->crypto)
 				break;
 
 			if (vallen == 0 || vallen > MAXHOSTNAME ||
-			    len < VALUE_LEN + vallen) {
+			    len - VALUE_LEN < vallen) {
 				rval = XEVNT_LEN;
 				break;
 			}
 
 			/*
 			 * Check the identity schemes are compatible. If
 			 * the client has PC, the server must have PC,
 			 * in which case the server public key and
 			 * identity are presumed valid, so we skip the
 			 * certificate and identity exchanges and move
 			 * immediately to the cookie exchange which
 			 * confirms the server signature.
 			 */
 #ifdef DEBUG
 			if (debug)
 				printf(
 				    "crypto_recv: ident host 0x%x server 0x%x\n",
 				    crypto_flags, fstamp);
 #endif
 			temp32 = (crypto_flags | ident_scheme) &
 			    fstamp & CRYPTO_FLAG_MASK;
 			if (crypto_flags & CRYPTO_FLAG_PRIV) {
 				if (!(fstamp & CRYPTO_FLAG_PRIV)) {
 					rval = XEVNT_KEY;
 					break;
 
 				} else {
 					fstamp |= CRYPTO_FLAG_VALID |
 					    CRYPTO_FLAG_VRFY |
 					    CRYPTO_FLAG_SIGN;
 				}
 			/*
 			 * In symmetric modes it is an error if either
 			 * peer requests identity and the other peer
 			 * does not support it.
 			 */
 			} else if ((hismode == MODE_ACTIVE || hismode ==
 			    MODE_PASSIVE) && ((crypto_flags | fstamp) &
 			    CRYPTO_FLAG_MASK) && !temp32) {
 				rval = XEVNT_KEY;
 				break;
 			/*
 			 * It is an error if the client requests
 			 * identity and the server does not support it.
 			 */
 			} else if (hismode == MODE_CLIENT && (fstamp &
 			    CRYPTO_FLAG_MASK) && !temp32) {
 				rval = XEVNT_KEY;
 				break;
 			}
 
 			/*
 			 * Otherwise, the identity scheme(s) are those
 			 * that both client and server support.
 			 */
 			fstamp = temp32 | (fstamp & ~CRYPTO_FLAG_MASK);
 
 			/*
 			 * Discard the message if the signature digest
 			 * NID is not supported.
 			 */
 			temp32 = (fstamp >> 16) & 0xffff;
 			dp =
 			    (const EVP_MD *)EVP_get_digestbynid(temp32);
 			if (dp == NULL) {
 				rval = XEVNT_MD;
 				break;
 			}
 
 			/*
 			 * Save status word, host name and message
 			 * digest/signature type.
 			 */
 			peer->crypto = fstamp;
 			peer->digest = dp;
 			peer->subject = emalloc(vallen + 1);
 			memcpy(peer->subject, ep->pkt, vallen);
 			peer->subject[vallen] = '\0';
 			peer->issuer = emalloc(vallen + 1);
 			strcpy(peer->issuer, peer->subject);
 			temp32 = (fstamp >> 16) & 0xffff;
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "flags 0x%x host %s signature %s", fstamp,
 			    peer->subject, OBJ_nid2ln(temp32));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Decode X509 certificate in ASN.1 format and extract
 		 * the data containing, among other things, subject
 		 * name and public key. In the default identification
 		 * scheme, the certificate trail is followed to a self
 		 * signed trusted certificate.
 		 */
 		case CRYPTO_CERT | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid.
 			 */
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * Scan the certificate list to delete old
 			 * versions and link the newest version first on
 			 * the list.
 			 */
 			if ((rval = cert_install(ep, peer)) != XEVNT_OK)
 				break;
 
 			/*
 			 * If we snatch the certificate before the
 			 * server certificate has been signed by its
 			 * server, it will be self signed. When it is,
 			 * we chase the certificate issuer, which the
 			 * server has, and keep going until a self
 			 * signed trusted certificate is found. Be sure
 			 * to update the issuer field, since it may
 			 * change.
 			 */
 			if (peer->issuer != NULL)
 				free(peer->issuer);
 			peer->issuer = emalloc(strlen(cinfo->issuer) +
 			    1);
 			strcpy(peer->issuer, cinfo->issuer);
 
 			/*
 			 * We plug in the public key and lifetime from
 			 * the first certificate received. However, note
 			 * that this certificate might not be signed by
 			 * the server, so we can't check the
 			 * signature/digest NID.
 			 */
 			if (peer->pkey == NULL) {
 				ptr = (u_char *)cinfo->cert.ptr;
 				cert = d2i_X509(NULL, &ptr,
 				    ntohl(cinfo->cert.vallen));
 				peer->pkey = X509_get_pubkey(cert);
 				X509_free(cert);
 			}
 			peer->flash &= ~TEST8;
 			temp32 = cinfo->nid;
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "cert %s 0x%x %s (%u) fs %u",
 			    cinfo->subject, cinfo->flags,
 			    OBJ_nid2ln(temp32), temp32,
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Schnorr (IFF)identity scheme. This scheme is designed
 		 * for use with shared secret group keys and where the
 		 * certificate may be generated by a third party. The
 		 * client sends a challenge to the server, which
 		 * performs a calculation and returns the result. A
 		 * positive result is possible only if both client and
 		 * server contain the same secret group key.
 		 */
 		case CRYPTO_IFF | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or certificate
 			 * trail not trusted.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VALID)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * If the the challenge matches the response,
 			 * the certificate public key, as well as the
 			 * server public key, signatyre and identity are
 			 * all verified at the same time. The server is
 			 * declared trusted, so we skip further
 			 * certificate stages and move immediately to
 			 * the cookie stage.
 			 */
 			if ((rval = crypto_iff(ep, peer)) != XEVNT_OK)
 				break;
 
 			peer->crypto |= CRYPTO_FLAG_VRFY |
 			    CRYPTO_FLAG_PROV;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN, "iff fs %u",
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Guillou-Quisquater (GQ) identity scheme. This scheme
 		 * is designed for use with public certificates carrying
 		 * the GQ public key in an extension field. The client
 		 * sends a challenge to the server, which performs a
 		 * calculation and returns the result. A positive result
 		 * is possible only if both client and server contain
 		 * the same group key and the server has the matching GQ
 		 * private key.
 		 */
 		case CRYPTO_GQ | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or certificate
 			 * trail not trusted.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VALID)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * If the the challenge matches the response,
 			 * the certificate public key, as well as the
 			 * server public key, signatyre and identity are
 			 * all verified at the same time. The server is
 			 * declared trusted, so we skip further
 			 * certificate stages and move immediately to
 			 * the cookie stage.
 			 */
 			if ((rval = crypto_gq(ep, peer)) != XEVNT_OK)
 				break;
 
 			peer->crypto |= CRYPTO_FLAG_VRFY |
 			    CRYPTO_FLAG_PROV;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN, "gq fs %u",
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * MV
 		 */
 		case CRYPTO_MV | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or certificate
 			 * trail not trusted.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VALID)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * If the the challenge matches the response,
 			 * the certificate public key, as well as the
 			 * server public key, signatyre and identity are
 			 * all verified at the same time. The server is
 			 * declared trusted, so we skip further
 			 * certificate stages and move immediately to
 			 * the cookie stage.
 			 */
 			if ((rval = crypto_mv(ep, peer)) != XEVNT_OK)
 				break;
 
 			peer->crypto |= CRYPTO_FLAG_VRFY |
 			    CRYPTO_FLAG_PROV;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN, "mv fs %u",
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Cookie request in symmetric modes. Roll a random
 		 * cookie and install in symmetric mode. Encrypt for the
 		 * response, which is transmitted later.
 		 */
 		case CRYPTO_COOK:
 
 			/*
 			 * Discard the message if invalid or certificate
 			 * trail not trusted.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VALID)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * Pass the extension field to the transmit
 			 * side. If already agreed, walk away.
 			 */
 			fp = emalloc(len);
 			memcpy(fp, ep, len);
 			temp32 = CRYPTO_RESP;
 			fp->opcode |= htonl(temp32);
 			peer->cmmd = fp;
 			if (peer->crypto & CRYPTO_FLAG_AGREE) {
 				peer->flash &= ~TEST8;
 				break;
 			}
 
 			/*
 			 * Install cookie values and light the cookie
 			 * bit. The transmit side will pick up and
 			 * encrypt it for the response.
 			 */
 			key_expire(peer);
 			peer->cookval.tstamp = ep->tstamp;
 			peer->cookval.fstamp = ep->fstamp;
 			RAND_bytes((u_char *)&peer->pcookie, 4);
 			peer->crypto &= ~CRYPTO_FLAG_AUTO;
 			peer->crypto |= CRYPTO_FLAG_AGREE;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN, "cook %x ts %u fs %u",
 			    peer->pcookie, ntohl(ep->tstamp),
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Cookie response in client and symmetric modes. If the
 		 * cookie bit is set, the working cookie is the EXOR of
 		 * the current and new values.
 		 */
 		case CRYPTO_COOK | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or identity
 			 * not confirmed or signature not verified with
 			 * respect to the cookie values.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VRFY)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, &peer->cookval,
 			    peer)) != XEVNT_OK)
 				break;
 
 			/*
 			 * Decrypt the cookie, hunting all the time for
 			 * errors.
 			 */
 			if (vallen == (u_int) EVP_PKEY_size(host_pkey)) {
 				u_int32 *cookiebuf = malloc(
 					RSA_size(host_pkey->pkey.rsa));
 				if (cookiebuf == NULL) {
 					rval = XEVNT_CKY;
 					break;
 				}
 				if (RSA_private_decrypt(vallen,
 				    (u_char *)ep->pkt,
 				    (u_char *)cookiebuf,
 				    host_pkey->pkey.rsa,
 				    RSA_PKCS1_OAEP_PADDING) != 4) {
 					rval = XEVNT_CKY;
 					free(cookiebuf);
 					break;
 				} else {
 					cookie = ntohl(*cookiebuf);
 					free(cookiebuf);
 				}
 			} else {
 				rval = XEVNT_CKY;
 				break;
 			}
 
 			/*
 			 * Install cookie values and light the cookie
 			 * bit. If this is not broadcast client mode, we
 			 * are done here.
 			 */
 			key_expire(peer);
 			peer->cookval.tstamp = ep->tstamp;
 			peer->cookval.fstamp = ep->fstamp;
 			if (peer->crypto & CRYPTO_FLAG_AGREE)
 				peer->pcookie ^= cookie;
 			else
 				peer->pcookie = cookie;
 			if (peer->hmode == MODE_CLIENT &&
 			    !(peer->cast_flags & MDF_BCLNT))
 				peer->crypto |= CRYPTO_FLAG_AUTO;
 			else
 				peer->crypto &= ~CRYPTO_FLAG_AUTO;
 			peer->crypto |= CRYPTO_FLAG_AGREE;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN, "cook %x ts %u fs %u",
 			    peer->pcookie, ntohl(ep->tstamp),
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Install autokey values in broadcast client and
 		 * symmetric modes. We have to do this every time the
 		 * sever/peer cookie changes or a new keylist is
 		 * rolled. Ordinarily, this is automatic as this message
 		 * is piggybacked on the first NTP packet sent upon
 		 * either of these events. Note that a broadcast client
 		 * or symmetric peer can receive this response without a
 		 * matching request.
 		 */
 		case CRYPTO_AUTO | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or identity
 			 * not confirmed or signature not verified with
 			 * respect to the receive autokey values.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VRFY)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, &peer->recval,
 			    peer)) != XEVNT_OK)
 				break;
 
 			/*
 			 * Install autokey values and light the
 			 * autokey bit. This is not hard.
 			 */
 			if (peer->recval.ptr == NULL)
 				peer->recval.ptr =
 				    emalloc(sizeof(struct autokey));
 			bp = (struct autokey *)peer->recval.ptr;
 			peer->recval.tstamp = ep->tstamp;
 			peer->recval.fstamp = ep->fstamp;
 			ap = (struct autokey *)ep->pkt;
 			bp->seq = ntohl(ap->seq);
 			bp->key = ntohl(ap->key);
 			peer->pkeyid = bp->key;
 			peer->crypto |= CRYPTO_FLAG_AUTO;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "auto seq %d key %x ts %u fs %u", bp->seq,
 			    bp->key, ntohl(ep->tstamp),
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 	
 		/*
 		 * X509 certificate sign response. Validate the
 		 * certificate signed by the server and install. Later
 		 * this can be provided to clients of this server in
 		 * lieu of the self signed certificate in order to
 		 * validate the public key.
 		 */
 		case CRYPTO_SIGN | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or not
 			 * proventic.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_PROV)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * Scan the certificate list to delete old
 			 * versions and link the newest version first on
 			 * the list.
 			 */
 			if ((rval = cert_install(ep, peer)) != XEVNT_OK)
 				break;
 
 			peer->crypto |= CRYPTO_FLAG_SIGN;
 			peer->flash &= ~TEST8;
 			temp32 = cinfo->nid;
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "sign %s 0x%x %s (%u) fs %u",
 			    cinfo->issuer, cinfo->flags,
 			    OBJ_nid2ln(temp32), temp32,
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Install leapseconds table in symmetric modes. This
 		 * table is proventicated to the NIST primary servers,
 		 * either by copying the file containing the table from
 		 * a NIST server to a trusted server or directly using
 		 * this protocol. While the entire table is installed at
 		 * the server, presently only the current TAI offset is
 		 * provided via the kernel to other applications.
 		 */
 		case CRYPTO_TAI:
 
 			/*
 			 * Discard the message if invalid.
 			 */
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * Pass the extension field to the transmit
 			 * side. Continue below if a leapseconds table
 			 * accompanies the message.
 			 */
 			fp = emalloc(len);
 			memcpy(fp, ep, len);
 			temp32 = CRYPTO_RESP;
 			fp->opcode |= htonl(temp32);
 			peer->cmmd = fp;
 			if (len <= VALUE_LEN) {
 				peer->flash &= ~TEST8;
 				break;
 			}
 			/* fall through */
 
 		case CRYPTO_TAI | CRYPTO_RESP:
 
 			/*
 			 * If this is a response, discard the message if
 			 * signature not verified with respect to the
 			 * leapsecond table values.
 			 */
 			if (peer->cmmd == NULL) {
 				if ((rval = crypto_verify(ep,
 				    &peer->tai_leap, peer)) != XEVNT_OK)
 					break;
 			}
 
 			/*
 			 * Initialize peer variables with latest update.
 			 */
 			peer->tai_leap.tstamp = ep->tstamp;
 			peer->tai_leap.fstamp = ep->fstamp;
 			peer->tai_leap.vallen = ep->vallen;
 
 			/*
 			 * Install the new table if there is no stored
 			 * table or the new table is more recent than
 			 * the stored table. Since a filestamp may have
 			 * changed, recompute the signatures.
 			 */
 			if (ntohl(peer->tai_leap.fstamp) >
 			    ntohl(tai_leap.fstamp)) {
 				tai_leap.fstamp = ep->fstamp;
 				tai_leap.vallen = ep->vallen;
 				if (tai_leap.ptr != NULL)
 					free(tai_leap.ptr);
 				tai_leap.ptr = emalloc(vallen);
 				memcpy(tai_leap.ptr, ep->pkt, vallen);
 				crypto_update();
 			}
 			crypto_flags |= CRYPTO_FLAG_TAI;
 			peer->crypto |= CRYPTO_FLAG_LEAP;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "leap %u ts %u fs %u", vallen,
 			    ntohl(ep->tstamp), ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * We come here in symmetric modes for miscellaneous
 		 * commands that have value fields but are processed on
 		 * the transmit side. All we need do here is check for
 		 * valid field length. Remaining checks are below and on
 		 * the transmit side.
 		 */
 		case CRYPTO_CERT:
 		case CRYPTO_IFF:
 		case CRYPTO_GQ:
 		case CRYPTO_MV:
 		case CRYPTO_SIGN:
 			if (len < VALUE_LEN) {
 				rval = XEVNT_LEN;
 				break;
 			}
 			/* fall through */
 
 		/*
 		 * We come here for miscellaneous requests and unknown
 		 * requests and responses. If an unknown response or
 		 * error, forget it. If a request, save the extension
 		 * field for later. Unknown requests will be caught on
 		 * the transmit side.
 		 */
 		default:
 			if (code & (CRYPTO_RESP | CRYPTO_ERROR)) {
 				rval = XEVNT_ERR;
 			} else if ((rval = crypto_verify(ep, NULL,
 			    peer)) == XEVNT_OK) {
 				fp = emalloc(len);
 				memcpy(fp, ep, len);
 				temp32 = CRYPTO_RESP;
 				fp->opcode |= htonl(temp32);
 				peer->cmmd = fp;
 			}
 		}
 
 		/*
 		 * We don't log length/format/timestamp errors and
 		 * duplicates, which are log clogging vulnerabilities.
 		 * The first error found terminates the extension field
 		 * scan and we return the laundry to the caller. A
 		 * length/format/timestamp error on transmit is
 		 * cheerfully ignored, as the message is not sent.
 		 */
 		if (rval > XEVNT_TSP) {
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "error %x opcode %x ts %u fs %u", rval,
 			    code, tstamp, fstamp);
 			record_crypto_stats(&peer->srcadr, statstr);
 			report_event(rval, peer);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		} else if (rval > XEVNT_OK && (code & CRYPTO_RESP)) {
 			rval = XEVNT_OK;
 		}
 		authlen += len;
 	}
 	return (rval);
 }
 
 
 /*
  * crypto_xmit - construct extension fields
  *
  * This routine is called both when an association is configured and
  * when one is not. The only case where this matters is to retrieve the
  * autokey information, in which case the caller has to provide the
  * association ID to match the association.
  *
  * Returns length of extension field.
  */
 int
 crypto_xmit(
 	struct pkt *xpkt,	/* transmit packet pointer */
 	struct sockaddr_storage *srcadr_sin,	/* active runway */
 	int	start,		/* offset to extension field */
 	struct exten *ep,	/* extension pointer */
 	keyid_t cookie		/* session cookie */
 	)
 {
 	u_int32	*pkt;		/* packet pointer */
 	struct peer *peer;	/* peer structure pointer */
 	u_int	opcode;		/* extension field opcode */
 	struct exten *fp;	/* extension pointers */
 	struct cert_info *cp, *xp; /* certificate info/value pointer */
 	char	certname[MAXHOSTNAME + 1]; /* subject name buffer */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	tstamp_t tstamp;
 	u_int	vallen;
 	u_int	len;
 	struct value vtemp;
 	associd_t associd;
 	int	rval;
 	keyid_t tcookie;
 
 	/*
 	 * Generate the requested extension field request code, length
 	 * and association ID. If this is a response and the host is not
 	 * synchronized, light the error bit and go home.
 	 */
 	pkt = (u_int32 *)xpkt + start / 4;
 	fp = (struct exten *)pkt;
 	opcode = ntohl(ep->opcode);
 	associd = (associd_t) ntohl(ep->associd);
 	fp->associd = htonl(associd);
 	len = 8;
 	rval = XEVNT_OK;
 	tstamp = crypto_time();
 	switch (opcode & 0xffff0000) {
 
 	/*
 	 * Send association request and response with status word and
 	 * host name. Note, this message is not signed and the filestamp
 	 * contains only the status word.
 	 */
 	case CRYPTO_ASSOC | CRYPTO_RESP:
 		len += crypto_send(fp, &hostval);
 		fp->fstamp = htonl(crypto_flags);
 		break;
 
 	case CRYPTO_ASSOC:
 		len += crypto_send(fp, &hostval);
 		fp->fstamp = htonl(crypto_flags | ident_scheme);
 		break;
 
 	/*
 	 * Send certificate request. Use the values from the extension
 	 * field.
 	 */
 	case CRYPTO_CERT:
 		memset(&vtemp, 0, sizeof(vtemp));
 		vtemp.tstamp = ep->tstamp;
 		vtemp.fstamp = ep->fstamp;
 		vtemp.vallen = ep->vallen;
 		vtemp.ptr = (u_char *)ep->pkt;
 		len += crypto_send(fp, &vtemp);
 		break;
 
 	/*
 	 * Send certificate response or sign request. Use the values
 	 * from the certificate cache. If the request contains no
 	 * subject name, assume the name of this host. This is for
 	 * backwards compatibility. Private certificates are never sent.
 	 */
 	case CRYPTO_SIGN:
 	case CRYPTO_CERT | CRYPTO_RESP:
 		vallen = ntohl(ep->vallen);
 		if (vallen == 8) {
 			strcpy(certname, sys_hostname);
-		} else if (vallen == 0 || vallen > MAXHOSTNAME) {
+		} else if (vallen == 0 || vallen > MAXHOSTNAME ||
+		    len - VALUE_LEN < vallen) {
 			rval = XEVNT_LEN;
 			break;
 
 		} else {
 			memcpy(certname, ep->pkt, vallen);
 			certname[vallen] = '\0';
 		}
 
 		/*
 		 * Find all certificates with matching subject. If a
 		 * self-signed, trusted certificate is found, use that.
 		 * If not, use the first one with matching subject. A
 		 * private certificate is never divulged or signed.
 		 */
 		xp = NULL;
 		for (cp = cinfo; cp != NULL; cp = cp->link) {
 			if (cp->flags & CERT_PRIV)
 				continue;
 
 			if (strcmp(certname, cp->subject) == 0) {
 				if (xp == NULL)
 					xp = cp;
 				if (strcmp(certname, cp->issuer) ==
 				    0 && cp->flags & CERT_TRUST) {
 					xp = cp;
 					break;
 				}
 			}
 		}
 
 		/*
 		 * Be careful who you trust. If not yet synchronized,
 		 * give back an empty response. If certificate not found
 		 * or beyond the lifetime, return an error. This is to
 		 * avoid a bad dude trying to get an expired certificate
 		 * re-signed. Otherwise, send it.
 		 *
 		 * Note the timestamp and filestamp are taken from the
 		 * certificate value structure. For all certificates the
 		 * timestamp is the latest signature update time. For
 		 * host and imported certificates the filestamp is the
 		 * creation epoch. For signed certificates the filestamp
 		 * is the creation epoch of the trusted certificate at
 		 * the base of the certificate trail. In principle, this
 		 * allows strong checking for signature masquerade.
 		 */
 		if (tstamp == 0)
 			break;
 
 		if (xp == NULL)
 			rval = XEVNT_CRT;
 		else if (tstamp < xp->first || tstamp > xp->last)
 			rval = XEVNT_SRV;
 		else
 			len += crypto_send(fp, &xp->cert);
 		break;
 
 	/*
 	 * Send challenge in Schnorr (IFF) identity scheme.
 	 */
 	case CRYPTO_IFF:
 		if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) {
 			rval = XEVNT_ERR;
 			break;
 		}
 		if ((rval = crypto_alice(peer, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send response in Schnorr (IFF) identity scheme.
 	 */
 	case CRYPTO_IFF | CRYPTO_RESP:
 		if ((rval = crypto_bob(ep, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send challenge in Guillou-Quisquater (GQ) identity scheme.
 	 */
 	case CRYPTO_GQ:
 		if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) {
 			rval = XEVNT_ERR;
 			break;
 		}
 		if ((rval = crypto_alice2(peer, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send response in Guillou-Quisquater (GQ) identity scheme.
 	 */
 	case CRYPTO_GQ | CRYPTO_RESP:
 		if ((rval = crypto_bob2(ep, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send challenge in MV identity scheme.
 	 */
 	case CRYPTO_MV:
 		if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) {
 			rval = XEVNT_ERR;
 			break;
 		}
 		if ((rval = crypto_alice3(peer, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send response in MV identity scheme.
 	 */
 	case CRYPTO_MV | CRYPTO_RESP:
 		if ((rval = crypto_bob3(ep, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send certificate sign response. The integrity of the request
 	 * certificate has already been verified on the receive side.
 	 * Sign the response using the local server key. Use the
 	 * filestamp from the request and use the timestamp as the
 	 * current time. Light the error bit if the certificate is
 	 * invalid or contains an unverified signature.
 	 */
 	case CRYPTO_SIGN | CRYPTO_RESP:
 		if ((rval = cert_sign(ep, &vtemp)) == XEVNT_OK)
 			len += crypto_send(fp, &vtemp);
 		value_free(&vtemp);
 		break;
 
 	/*
 	 * Send public key and signature. Use the values from the public
 	 * key.
 	 */
 	case CRYPTO_COOK:
 		len += crypto_send(fp, &pubkey);
 		break;
 
 	/*
 	 * Encrypt and send cookie and signature. Light the error bit if
 	 * anything goes wrong.
 	 */
 	case CRYPTO_COOK | CRYPTO_RESP:
-		if ((opcode & 0xffff) < VALUE_LEN) {
+		vallen = ntohl(ep->vallen);	/* Must be <64k */
+		if (   vallen == 0
+		    || (vallen >= MAX_VALLEN)
+		    || (opcode & 0x0000ffff)  < VALUE_LEN + vallen) {
 			rval = XEVNT_LEN;
 			break;
 		}
 		if (PKT_MODE(xpkt->li_vn_mode) == MODE_SERVER) {
 			tcookie = cookie;
 		} else {
 			if ((peer = findpeerbyassoc(associd)) == NULL) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			tcookie = peer->pcookie;
 		}
-		if ((rval = crypto_encrypt(ep, &vtemp, &tcookie)) ==
-		    XEVNT_OK)
+		if ((rval = crypto_encrypt((const u_char *)ep->pkt, vallen, &tcookie, &vtemp))
+		    == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
-		value_free(&vtemp);
+			value_free(&vtemp);
+		}
 		break;
 
 	/*
 	 * Find peer and send autokey data and signature in broadcast
 	 * server and symmetric modes. Use the values in the autokey
 	 * structure. If no association is found, either the server has
 	 * restarted with new associations or some perp has replayed an
 	 * old message, in which case light the error bit.
 	 */
 	case CRYPTO_AUTO | CRYPTO_RESP:
 		if ((peer = findpeerbyassoc(associd)) == NULL) {
 			rval = XEVNT_ERR;
 			break;
 		}
 		peer->flags &= ~FLAG_ASSOC;
 		len += crypto_send(fp, &peer->sndval);
 		break;
 
 	/*
 	 * Send leapseconds table and signature. Use the values from the
 	 * tai structure. If no table has been loaded, just send an
 	 * empty request.
 	 */
 	case CRYPTO_TAI:
 	case CRYPTO_TAI | CRYPTO_RESP:
 		if (crypto_flags & CRYPTO_FLAG_TAI)
 			len += crypto_send(fp, &tai_leap);
 		break;
 
 	/*
 	 * Default - Fall through for requests; for unknown responses,
 	 * flag as error.
 	 */
 	default:
 		if (opcode & CRYPTO_RESP)
 			rval = XEVNT_ERR;
 	}
 
 	/*
 	 * In case of error, flame the log. If a request, toss the
 	 * puppy; if a response, return so the sender can flame, too.
 	 */
 	if (rval != XEVNT_OK) {
 		opcode |= CRYPTO_ERROR;
 		snprintf(statstr, NTP_MAXSTRLEN,
 		    "error %x opcode %x", rval, opcode);
 		record_crypto_stats(srcadr_sin, statstr);
 		report_event(rval, NULL);
 #ifdef DEBUG
 		if (debug)
 			printf("crypto_xmit: %s\n", statstr);
 #endif
 		if (!(opcode & CRYPTO_RESP))
 			return (0);
 	}
 
 	/*
 	 * Round up the field length to a multiple of 8 bytes and save
 	 * the request code and length.
 	 */
 	len = ((len + 7) / 8) * 8;
 	fp->opcode = htonl((opcode & 0xffff0000) | len);
 #ifdef DEBUG
 	if (debug)
 		printf(
 		    "crypto_xmit: flags 0x%x ext offset %d len %u code 0x%x assocID %d\n",
 		    crypto_flags, start, len, opcode >> 16, associd);
 #endif
 	return (len);
 }
 
 
 /*
  * crypto_verify - parse and verify the extension field and value
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_LEN	bad field format or length
  * XEVNT_TSP	bad timestamp
  * XEVNT_FSP	bad filestamp
  * XEVNT_PUB	bad or missing public key
  * XEVNT_SGL	bad signature length
  * XEVNT_SIG	signature not verified
  * XEVNT_ERR	protocol error
  */
 static int
 crypto_verify(
 	struct exten *ep,	/* extension pointer */
 	struct value *vp,	/* value pointer */
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	EVP_PKEY *pkey;		/* server public key */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp, tstamp1 = 0; /* timestamp */
 	tstamp_t fstamp, fstamp1 = 0; /* filestamp */
 	u_int	vallen;		/* value length */
 	u_int	siglen;		/* signature length */
 	u_int	opcode, len;
 	int	i;
 
 	/*
 	 * We require valid opcode and field lengths, timestamp,
 	 * filestamp, public key, digest, signature length and
 	 * signature, where relevant. Note that preliminary length
 	 * checks are done in the main loop.
 	 */
 	len = ntohl(ep->opcode) & 0x0000ffff;
 	opcode = ntohl(ep->opcode) & 0xffff0000;
 
 	/*
 	 * Check for valid operation code and protocol. The opcode must
 	 * not have the error bit set. If a response, it must have a
 	 * value header. If a request and does not contain a value
 	 * header, no need for further checking.
 	 */
 	if (opcode & CRYPTO_ERROR)
 		return (XEVNT_ERR);
 
  	if (opcode & CRYPTO_RESP) {
  		if (len < VALUE_LEN)
 			return (XEVNT_LEN);
 	} else {
  		if (len < VALUE_LEN)
 			return (XEVNT_OK);
 	}
 
 	/*
 	 * We have a value header. Check for valid field lengths. The
 	 * field length must be long enough to contain the value header,
 	 * value and signature. Note both the value and signature fields
 	 * are rounded up to the next word.
 	 */
 	vallen = ntohl(ep->vallen);
+	if (   vallen == 0
+	    || vallen > MAX_VALLEN)
+		return (XEVNT_LEN);
 	i = (vallen + 3) / 4;
 	siglen = ntohl(ep->pkt[i++]);
-	if (len < VALUE_LEN + ((vallen + 3) / 4) * 4 + ((siglen + 3) /
-	    4) * 4)
+	if (   siglen > MAX_VALLEN
+	    || len - VALUE_LEN < ((vallen + 3) / 4) * 4
+	    || len - VALUE_LEN - ((vallen + 3) / 4) * 4
+	      < ((siglen + 3) / 4) * 4)
 		return (XEVNT_LEN);
 
 	/*
 	 * Punt if this is a response with no data. Punt if this is a
 	 * request and a previous response is pending. 
 	 */
 	if (opcode & CRYPTO_RESP) {
 		if (vallen == 0)
 			return (XEVNT_LEN);
 	} else {
 		if (peer->cmmd != NULL)
 			return (XEVNT_LEN);
 	}
 
 	/*
 	 * Check for valid timestamp and filestamp. If the timestamp is
 	 * zero, the sender is not synchronized and signatures are
 	 * disregarded. If not, the timestamp must not precede the
 	 * filestamp. The timestamp and filestamp must not precede the
 	 * corresponding values in the value structure, if present. Once
 	 * the autokey values have been installed, the timestamp must
 	 * always be later than the corresponding value in the value
 	 * structure. Duplicate timestamps are illegal once the cookie
 	 * has been validated.
 	 */
 	tstamp = ntohl(ep->tstamp);
 	fstamp = ntohl(ep->fstamp);
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < fstamp)
 		return (XEVNT_TSP);
 
 	if (vp != NULL) {
 		tstamp1 = ntohl(vp->tstamp);
 		fstamp1 = ntohl(vp->fstamp);
 		if ((tstamp < tstamp1 || (tstamp == tstamp1 &&
 		    (peer->crypto & CRYPTO_FLAG_AUTO))))
 			return (XEVNT_TSP);
 
 		if ((tstamp < fstamp1 || fstamp < fstamp1))
 			return (XEVNT_FSP);
 	}
 
 	/*
 	 * Check for valid signature length, public key and digest
 	 * algorithm.
 	 */
 	if (crypto_flags & peer->crypto & CRYPTO_FLAG_PRIV)
 		pkey = sign_pkey;
 	else
 		pkey = peer->pkey;
 	if (siglen == 0 || pkey == NULL || peer->digest == NULL)
 		return (XEVNT_OK);
 
 	if (siglen != (u_int)EVP_PKEY_size(pkey))
 		return (XEVNT_SGL);
 
 	/*
 	 * Darn, I thought we would never get here. Verify the
 	 * signature. If the identity exchange is verified, light the
 	 * proventic bit. If no client identity scheme is specified,
 	 * avoid doing the sign exchange.
 	 */
 	EVP_VerifyInit(&ctx, peer->digest);
+	/* XXX: the "+ 12" needs to be at least documented... */
 	EVP_VerifyUpdate(&ctx, (u_char *)&ep->tstamp, vallen + 12);
 	if (EVP_VerifyFinal(&ctx, (u_char *)&ep->pkt[i], siglen, pkey) <= 0)
 		return (XEVNT_SIG);
 
 	if (peer->crypto & CRYPTO_FLAG_VRFY) {
 		peer->crypto |= CRYPTO_FLAG_PROV;
 		if (!(crypto_flags & CRYPTO_FLAG_MASK))
 			peer->crypto |= CRYPTO_FLAG_SIGN;
 	}
 	return (XEVNT_OK);
 }
 
 
 /*
- * crypto_encrypt - construct encrypted cookie and signature from
- * extension field and cookie
+ * crypto_encrypt - construct vp (encrypted cookie and signature) from
+ * the public key and cookie.
  *
- * Returns
+ * Returns:
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_CKY	bad or missing cookie
  * XEVNT_PER	host certificate expired
  */
 static int
 crypto_encrypt(
-	struct exten *ep,	/* extension pointer */
-	struct value *vp,	/* value pointer */
-	keyid_t	*cookie		/* server cookie */
+	const u_char *ptr,	/* Public Key */
+	u_int	vallen,		/* Length of Public Key */
+	keyid_t	*cookie,	/* server cookie */
+	struct value *vp	/* value pointer */
 	)
 {
 	EVP_PKEY *pkey;		/* public key */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	u_int32	temp32;
-	u_int	len;
-	u_char	*ptr;
 
 	/*
 	 * Extract the public key from the request.
 	 */
-	len = ntohl(ep->vallen);
-	ptr = (u_char *)ep->pkt;
-	pkey = d2i_PublicKey(EVP_PKEY_RSA, NULL, &ptr, len);
+	pkey = d2i_PublicKey(EVP_PKEY_RSA, NULL, &ptr, vallen);
 	if (pkey == NULL) {
 		msyslog(LOG_ERR, "crypto_encrypt %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_PUB);
 	}
 
 	/*
 	 * Encrypt the cookie, encode in ASN.1 and sign.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = hostval.tstamp;
-	len = EVP_PKEY_size(pkey);
-	vp->vallen = htonl(len);
-	vp->ptr = emalloc(len);
+	vallen = EVP_PKEY_size(pkey);
+	vp->vallen = htonl(vallen);
+	vp->ptr = emalloc(vallen);
 	temp32 = htonl(*cookie);
 	if (!RSA_public_encrypt(4, (u_char *)&temp32, vp->ptr,
 	    pkey->pkey.rsa, RSA_PKCS1_OAEP_PADDING)) {
 		msyslog(LOG_ERR, "crypto_encrypt %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		EVP_PKEY_free(pkey);
 		return (XEVNT_CKY);
 	}
 	EVP_PKEY_free(pkey);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
-	EVP_SignUpdate(&ctx, vp->ptr, len);
-	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
-		vp->siglen = htonl(len);
+	EVP_SignUpdate(&ctx, vp->ptr, vallen);
+	if (EVP_SignFinal(&ctx, vp->sig, &vallen, sign_pkey))
+		vp->siglen = htonl(sign_siglen);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_ident - construct extension field for identity scheme
  *
  * This routine determines which identity scheme is in use and
  * constructs an extension field for that scheme.
  */
 u_int
 crypto_ident(
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	char	filename[MAXFILENAME + 1];
 
 	/*
 	 * If the server identity has already been verified, no further
 	 * action is necessary. Otherwise, try to load the identity file
 	 * of the certificate issuer. If the issuer file is not found,
 	 * try the host file. If nothing found, declare a cryptobust.
 	 * Note we can't get here unless the trusted certificate has
 	 * been found and the CRYPTO_FLAG_VALID bit is set, so the
 	 * certificate issuer is valid.
 	 */
 	if (peer->ident_pkey != NULL)
 		EVP_PKEY_free(peer->ident_pkey);
 	if (peer->crypto & CRYPTO_FLAG_GQ) {
 		snprintf(filename, MAXFILENAME, "ntpkey_gq_%s",
 		    peer->issuer);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_GQ);
 
 		snprintf(filename, MAXFILENAME, "ntpkey_gq_%s",
 		    sys_hostname);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_GQ);
 	}
 	if (peer->crypto & CRYPTO_FLAG_IFF) {
 		snprintf(filename, MAXFILENAME, "ntpkey_iff_%s",
 		    peer->issuer);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_IFF);
 
 		snprintf(filename, MAXFILENAME, "ntpkey_iff_%s",
 		    sys_hostname);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_IFF);
 	}
 	if (peer->crypto & CRYPTO_FLAG_MV) {
 		snprintf(filename, MAXFILENAME, "ntpkey_mv_%s",
 		    peer->issuer);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_MV);
 
 		snprintf(filename, MAXFILENAME, "ntpkey_mv_%s",
 		    sys_hostname);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_MV);
 	}
 
 	/*
 	 * No compatible identity scheme is available. Life is hard.
 	 */
 	msyslog(LOG_INFO,
 	    "crypto_ident: no compatible identity scheme found");
 	return (0);
 }
 
 
 /*
  * crypto_args - construct extension field from arguments
  *
  * This routine creates an extension field with current timestamps and
  * specified opcode, association ID and optional string. Note that the
  * extension field is created here, but freed after the crypto_xmit()
  * call in the protocol module.
  *
  * Returns extension field pointer (no errors).
+ *
+ * XXX: opcode and len should really be 32-bit quantities and
+ * we should make sure that str is not too big.
  */
 struct exten *
 crypto_args(
 	struct peer *peer,	/* peer structure pointer */
 	u_int	opcode,		/* operation code */
 	char	*str		/* argument string */
 	)
 {
 	tstamp_t tstamp;	/* NTP timestamp */
 	struct exten *ep;	/* extension field pointer */
 	u_int	len;		/* extension field length */
+	size_t	slen;
 
 	tstamp = crypto_time();
 	len = sizeof(struct exten);
-	if (str != NULL)
-		len += strlen(str);
+	if (str != NULL) {
+		slen = strlen(str);
+		len += slen;
+	}
 	ep = emalloc(len);
 	memset(ep, 0, len);
 	if (opcode == 0)
 		return (ep);
 
 	ep->opcode = htonl(opcode + len);
 
 	/*
 	 * If a response, send our ID; if a request, send the
 	 * responder's ID.
 	 */
 	if (opcode & CRYPTO_RESP)
 		ep->associd = htonl(peer->associd);
 	else
 		ep->associd = htonl(peer->assoc);
 	ep->tstamp = htonl(tstamp);
 	ep->fstamp = hostval.tstamp;
 	ep->vallen = 0;
 	if (str != NULL) {
-		ep->vallen = htonl(strlen(str));
-		memcpy((char *)ep->pkt, str, strlen(str));
+		ep->vallen = htonl(slen);
+		memcpy((char *)ep->pkt, str, slen);
 	} else {
 		ep->pkt[0] = peer->associd;
 	}
 	return (ep);
 }
 
 
 /*
  * crypto_send - construct extension field from value components
  *
  * Returns extension field length. Note: it is not polite to send a
  * nonempty signature with zero timestamp or a nonzero timestamp with
  * empty signature, but these rules are not enforced here.
+ *
+ * XXX This code won't work on a box with 16-bit ints.
  */
 u_int
 crypto_send(
 	struct exten *ep,	/* extension field pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	u_int	len, temp32;
 	int	i;
 
 	/*
 	 * Copy data. If the data field is empty or zero length, encode
 	 * an empty value with length zero.
 	 */
 	ep->tstamp = vp->tstamp;
 	ep->fstamp = vp->fstamp;
 	ep->vallen = vp->vallen;
 	len = 12;
 	temp32 = ntohl(vp->vallen);
 	if (temp32 > 0 && vp->ptr != NULL)
 		memcpy(ep->pkt, vp->ptr, temp32);
 
 	/*
 	 * Copy signature. If the signature field is empty or zero
 	 * length, encode an empty signature with length zero.
 	 */
 	i = (temp32 + 3) / 4;
 	len += i * 4 + 4;
 	ep->pkt[i++] = vp->siglen;
 	temp32 = ntohl(vp->siglen);
 	if (temp32 > 0 && vp->sig != NULL)
 		memcpy(&ep->pkt[i], vp->sig, temp32);
 	len += temp32;
 	return (len);
 }
 
 
 /*
  * crypto_update - compute new public value and sign extension fields
  *
  * This routine runs periodically, like once a day, and when something
  * changes. It updates the timestamps on three value structures and one
  * value structure list, then signs all the structures:
  *
  * hostval	host name (not signed)
  * pubkey	public key
  * cinfo	certificate info/value list
  * tai_leap	leapseconds file
  *
  * Filestamps are proventicated data, so this routine is run only when
  * the host has been synchronized to a proventicated source. Thus, the
  * timestamp is proventicated, too, and can be used to deflect
  * clogging attacks and even cook breakfast.
  *
  * Returns void (no errors)
  */
 void
 crypto_update(void)
 {
 	EVP_MD_CTX ctx;		/* message digest context */
 	struct cert_info *cp, *cpn; /* certificate info/value */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	tstamp_t tstamp;	/* NTP timestamp */
 	u_int	len;
 
 	if ((tstamp = crypto_time()) == 0)
 		return;
 
 	hostval.tstamp = htonl(tstamp);
 
 	/*
 	 * Sign public key and timestamps. The filestamp is derived from
 	 * the host key file extension from wherever the file was
 	 * generated. 
 	 */
 	if (pubkey.vallen != 0) {
 		pubkey.tstamp = hostval.tstamp;
 		pubkey.siglen = 0;
 		if (pubkey.sig == NULL)
 			pubkey.sig = emalloc(sign_siglen);
 		EVP_SignInit(&ctx, sign_digest);
 		EVP_SignUpdate(&ctx, (u_char *)&pubkey, 12);
 		EVP_SignUpdate(&ctx, pubkey.ptr, ntohl(pubkey.vallen));
 		if (EVP_SignFinal(&ctx, pubkey.sig, &len, sign_pkey))
 			pubkey.siglen = htonl(len);
 	}
 
 	/*
 	 * Sign certificates and timestamps. The filestamp is derived
 	 * from the certificate file extension from wherever the file
 	 * was generated. Note we do not throw expired certificates
 	 * away; they may have signed younger ones.
 	 */
 	for (cp = cinfo; cp != NULL; cp = cpn) {
 		cpn = cp->link;
 		cp->cert.tstamp = hostval.tstamp;
 		cp->cert.siglen = 0;
 		if (cp->cert.sig == NULL)
 			cp->cert.sig = emalloc(sign_siglen);
 		EVP_SignInit(&ctx, sign_digest);
 		EVP_SignUpdate(&ctx, (u_char *)&cp->cert, 12);
 		EVP_SignUpdate(&ctx, cp->cert.ptr,
 		    ntohl(cp->cert.vallen));
 		if (EVP_SignFinal(&ctx, cp->cert.sig, &len, sign_pkey))
 			cp->cert.siglen = htonl(len);
 	}
 
 	/*
 	 * Sign leapseconds table and timestamps. The filestamp is
 	 * derived from the leapsecond file extension from wherever the
 	 * file was generated.
 	 */
 	if (tai_leap.vallen != 0) {
 		tai_leap.tstamp = hostval.tstamp;
 		tai_leap.siglen = 0;
 		if (tai_leap.sig == NULL)
 			tai_leap.sig = emalloc(sign_siglen);
 		EVP_SignInit(&ctx, sign_digest);
 		EVP_SignUpdate(&ctx, (u_char *)&tai_leap, 12);
 		EVP_SignUpdate(&ctx, tai_leap.ptr,
 		    ntohl(tai_leap.vallen));
 		if (EVP_SignFinal(&ctx, tai_leap.sig, &len, sign_pkey))
 			tai_leap.siglen = htonl(len);
 	}
 	snprintf(statstr, NTP_MAXSTRLEN,
 	    "update ts %u", ntohl(hostval.tstamp)); 
 	record_crypto_stats(NULL, statstr);
 #ifdef DEBUG
 	if (debug)
 		printf("crypto_update: %s\n", statstr);
 #endif
 }
 
 
 /*
  * value_free - free value structure components.
  *
  * Returns void (no errors)
  */
 void
 value_free(
 	struct value *vp	/* value structure */
 	)
 {
 	if (vp->ptr != NULL)
 		free(vp->ptr);
 	if (vp->sig != NULL)
 		free(vp->sig);
 	memset(vp, 0, sizeof(struct value));
 }
 
 
 /*
  * crypto_time - returns current NTP time in seconds.
  */
 tstamp_t
 crypto_time()
 {
 	l_fp	tstamp;		/* NTP time */	L_CLR(&tstamp);
 
 	L_CLR(&tstamp);
 	if (sys_leap != LEAP_NOTINSYNC)
 		get_systime(&tstamp);
 	return (tstamp.l_ui);
 }
 
 
 /*
  * asn2ntp - convert ASN1_TIME time structure to NTP time in seconds.
  */
 u_long
 asn2ntp	(
 	ASN1_TIME *asn1time	/* pointer to ASN1_TIME structure */
 	)
 {
 	char	*v;		/* pointer to ASN1_TIME string */
 	struct	tm tm;		/* used to convert to NTP time */
 
 	/*
 	 * Extract time string YYMMDDHHMMSSZ from ASN1 time structure.
 	 * Note that the YY, MM, DD fields start with one, the HH, MM,
 	 * SS fiels start with zero and the Z character should be 'Z'
 	 * for UTC. Also note that years less than 50 map to years
 	 * greater than 100. Dontcha love ASN.1? Better than MIL-188.
 	 */
 	if (asn1time->length > 13)
 		return ((u_long)(~0));	/* We can't use -1 here. It's invalid */
 
 	v = (char *)asn1time->data;
 	tm.tm_year = (v[0] - '0') * 10 + v[1] - '0';
 	if (tm.tm_year < 50)
 		tm.tm_year += 100;
 	tm.tm_mon = (v[2] - '0') * 10 + v[3] - '0' - 1;
 	tm.tm_mday = (v[4] - '0') * 10 + v[5] - '0';
 	tm.tm_hour = (v[6] - '0') * 10 + v[7] - '0';
 	tm.tm_min = (v[8] - '0') * 10 + v[9] - '0';
 	tm.tm_sec = (v[10] - '0') * 10 + v[11] - '0';
 	tm.tm_wday = 0;
 	tm.tm_yday = 0;
 	tm.tm_isdst = 0;
 	return (timegm(&tm) + JAN_1970);
 }
 
 
 /*
  * bigdig() - compute a BIGNUM MD5 hash of a BIGNUM number.
  */
 static int
 bighash(
 	BIGNUM	*bn,		/* BIGNUM * from */
 	BIGNUM	*bk		/* BIGNUM * to */
 	)
 {
 	EVP_MD_CTX ctx;		/* message digest context */
 	u_char dgst[EVP_MAX_MD_SIZE]; /* message digest */
 	u_char	*ptr;		/* a BIGNUM as binary string */
 	u_int	len;
 
 	len = BN_num_bytes(bn);
 	ptr = emalloc(len);
 	BN_bn2bin(bn, ptr);
 	EVP_DigestInit(&ctx, EVP_md5());
 	EVP_DigestUpdate(&ctx, ptr, len);
 	EVP_DigestFinal(&ctx, dgst, &len);
 	BN_bin2bn(dgst, len, bk);
 
 	/* XXX MEMLEAK? free ptr? */
 
 	return (1);
 }
 
 
 /*
  ***********************************************************************
  *								       *
  * The following routines implement the Schnorr (IFF) identity scheme  *
  *								       *
  ***********************************************************************
  *
  * The Schnorr (IFF) identity scheme is intended for use when
  * the ntp-genkeys program does not generate the certificates used in
  * the protocol and the group key cannot be conveyed in the certificate
  * itself. For this purpose, new generations of IFF values must be
  * securely transmitted to all members of the group before use. The
  * scheme is self contained and independent of new generations of host
  * keys, sign keys and certificates.
  *
  * The IFF identity scheme is based on DSA cryptography and algorithms
  * described in Stinson p. 285. The IFF values hide in a DSA cuckoo
  * structure, but only the primes and generator are used. The p is a
  * 512-bit prime, q a 160-bit prime that divides p - 1 and is a qth root
  * of 1 mod p; that is, g^q = 1 mod p. The TA rolls primvate random
  * group key b disguised as a DSA structure member, then computes public
  * key g^(q - b). These values are shared only among group members and
  * never revealed in messages. Alice challenges Bob to confirm identity
  * using the protocol described below.
  *
  * How it works
  *
  * The scheme goes like this. Both Alice and Bob have the public primes
  * p, q and generator g. The TA gives private key b to Bob and public
  * key v = g^(q - a) mod p to Alice.
  *
  * Alice rolls new random challenge r and sends to Bob in the IFF
  * request message. Bob rolls new random k, then computes y = k + b r
  * mod q and x = g^k mod p and sends (y, hash(x)) to Alice in the
  * response message. Besides making the response shorter, the hash makes
  * it effectivey impossible for an intruder to solve for b by observing
  * a number of these messages.
  * 
  * Alice receives the response and computes g^y v^r mod p. After a bit
  * of algebra, this simplifies to g^k. If the hash of this result
  * matches hash(x), Alice knows that Bob has the group key b. The signed
  * response binds this knowledge to Bob's private key and the public key
  * previously received in his certificate.
  *
  * crypto_alice - construct Alice's challenge in IFF scheme
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group key
  */
 static int
 crypto_alice(
 	struct peer *peer,	/* peer pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	DSA	*dsa;		/* IFF parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;
 	u_int	len;
 
 	/*
 	 * The identity parameters must have correct format and content.
 	 */
 	if (peer->ident_pkey == NULL)
 		return (XEVNT_ID);
 
 	if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_alice: defective key");
 		return (XEVNT_PUB);
 	}
 
 	/*
 	 * Roll new random r (0 < r < q). The OpenSSL library has a bug
 	 * omitting BN_rand_range, so we have to do it the hard way.
 	 */
 	bctx = BN_CTX_new();
 	len = BN_num_bytes(dsa->q);
 	if (peer->iffval != NULL)
 		BN_free(peer->iffval);
 	peer->iffval = BN_new();
 	BN_rand(peer->iffval, len * 8, -1, 1);	/* r */
 	BN_mod(peer->iffval, peer->iffval, dsa->q, bctx);
 	BN_CTX_free(bctx);
 
 	/*
 	 * Sign and send to Bob. The filestamp is from the local file.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = htonl(peer->fstamp);
 	vp->vallen = htonl(len);
 	vp->ptr = emalloc(len);
 	BN_bn2bin(peer->iffval, vp->ptr);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_bob - construct Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_ID	bad or missing group key
  * XEVNT_ERR	protocol error
  * XEVNT_PER	host expired certificate
  */
 static int
 crypto_bob(
 	struct exten *ep,	/* extension pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	DSA	*dsa;		/* IFF parameters */
 	DSA_SIG	*sdsa;		/* DSA signature context fake */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	BIGNUM	*bn, *bk, *r;
 	u_char	*ptr;
-	u_int	len;
+	u_int	len;		/* extension field length */
+	u_int	vallen = 0;	/* value length */
 
 	/*
 	 * If the IFF parameters are not valid, something awful
 	 * happened or we are being tormented.
 	 */
 	if (iffpar_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_bob: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	dsa = iffpar_pkey->pkey.dsa;
 
 	/*
 	 * Extract r from the challenge.
 	 */
-	len = ntohl(ep->vallen);
-	if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) {
+	vallen = ntohl(ep->vallen);
+	len = ntohl(ep->opcode) & 0x0000ffff;
+	if (vallen == 0 || len < VALUE_LEN || len - VALUE_LEN < vallen)
+		return XEVNT_LEN;
+	if ((r = BN_bin2bn((u_char *)ep->pkt, vallen, NULL)) == NULL) {
 		msyslog(LOG_ERR, "crypto_bob %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Bob rolls random k (0 < k < q), computes y = k + b r mod q
 	 * and x = g^k mod p, then sends (y, hash(x)) to Alice.
 	 */
 	bctx = BN_CTX_new(); bk = BN_new(); bn = BN_new();
 	sdsa = DSA_SIG_new();
-	BN_rand(bk, len * 8, -1, 1);		/* k */
+	BN_rand(bk, vallen * 8, -1, 1);		/* k */
 	BN_mod_mul(bn, dsa->priv_key, r, dsa->q, bctx); /* b r mod q */
 	BN_add(bn, bn, bk);
 	BN_mod(bn, bn, dsa->q, bctx);		/* k + b r mod q */
 	sdsa->r = BN_dup(bn);
 	BN_mod_exp(bk, dsa->g, bk, dsa->p, bctx); /* g^k mod p */
 	bighash(bk, bk);
 	sdsa->s = BN_dup(bk);
 	BN_CTX_free(bctx);
 	BN_free(r); BN_free(bn); BN_free(bk);
 
 	/*
 	 * Encode the values in ASN.1 and sign.
 	 */
-	tstamp = crypto_time();
-	memset(vp, 0, sizeof(struct value));
-	vp->tstamp = htonl(tstamp);
-	vp->fstamp = htonl(if_fstamp);
-	len = i2d_DSA_SIG(sdsa, NULL);
-	if (len <= 0) {
+	vallen = i2d_DSA_SIG(sdsa, NULL);
+	if (vallen == 0) {
 		msyslog(LOG_ERR, "crypto_bob %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		DSA_SIG_free(sdsa);
 		return (XEVNT_ERR);
 	}
-	vp->vallen = htonl(len);
-	ptr = emalloc(len);
+	if (vallen > MAX_VALLEN) {
+		msyslog(LOG_ERR, "crypto_bob: signature is too big: %d",
+		    vallen);
+		DSA_SIG_free(sdsa);
+		return (XEVNT_LEN);
+	}
+	memset(vp, 0, sizeof(struct value));
+	tstamp = crypto_time();
+	vp->tstamp = htonl(tstamp);
+	vp->fstamp = htonl(if_fstamp);
+	vp->vallen = htonl(vallen);
+	ptr = emalloc(vallen);
 	vp->ptr = ptr;
 	i2d_DSA_SIG(sdsa, &ptr);
 	DSA_SIG_free(sdsa);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
+	/* XXX: more validation to make sure the sign fits... */
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
-	EVP_SignUpdate(&ctx, vp->ptr, len);
-	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
+	EVP_SignUpdate(&ctx, vp->ptr, vallen);
+	if (EVP_SignFinal(&ctx, vp->sig, &vallen, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_iff - verify Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group key
  * XEVNT_FSP	bad filestamp
  */
 int
 crypto_iff(
 	struct exten *ep,	/* extension pointer */
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	DSA	*dsa;		/* IFF parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	DSA_SIG	*sdsa;		/* DSA parameters */
 	BIGNUM	*bn, *bk;
 	u_int	len;
 	const u_char	*ptr;
 	int	temp;
 
 	/*
 	 * If the IFF parameters are not valid or no challenge was sent,
 	 * something awful happened or we are being tormented.
 	 */
 	if (peer->ident_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_iff: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	if (ntohl(ep->fstamp) != peer->fstamp) {
 		msyslog(LOG_INFO, "crypto_iff: invalid filestamp %u",
 		    ntohl(ep->fstamp));
 		return (XEVNT_FSP);
 	}
 	if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_iff: defective key");
 		return (XEVNT_PUB);
 	}
 	if (peer->iffval == NULL) {
 		msyslog(LOG_INFO, "crypto_iff: missing challenge");
 		return (XEVNT_ID);
 	}
 
 	/*
 	 * Extract the k + b r and g^k values from the response.
 	 */
 	bctx = BN_CTX_new(); bk = BN_new(); bn = BN_new();
 	len = ntohl(ep->vallen);
 	ptr = (const u_char *)ep->pkt;
 	if ((sdsa = d2i_DSA_SIG(NULL, &ptr, len)) == NULL) {
 		msyslog(LOG_ERR, "crypto_iff %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Compute g^(k + b r) g^(q - b)r mod p.
 	 */
 	BN_mod_exp(bn, dsa->pub_key, peer->iffval, dsa->p, bctx);
 	BN_mod_exp(bk, dsa->g, sdsa->r, dsa->p, bctx);
 	BN_mod_mul(bn, bn, bk, dsa->p, bctx);
 
 	/*
 	 * Verify the hash of the result matches hash(x).
 	 */
 	bighash(bn, bn);
 	temp = BN_cmp(bn, sdsa->s);
 	BN_free(bn); BN_free(bk); BN_CTX_free(bctx);
 	BN_free(peer->iffval);
 	peer->iffval = NULL;
 	DSA_SIG_free(sdsa);
 	if (temp == 0)
 		return (XEVNT_OK);
 
 	else
 		return (XEVNT_ID);
 }
 
 
 /*
  ***********************************************************************
  *								       *
  * The following routines implement the Guillou-Quisquater (GQ)        *
  * identity scheme                                                     *
  *								       *
  ***********************************************************************
  *
  * The Guillou-Quisquater (GQ) identity scheme is intended for use when
  * the ntp-genkeys program generates the certificates used in the
  * protocol and the group key can be conveyed in a certificate extension
  * field. The scheme is self contained and independent of new
  * generations of host keys, sign keys and certificates.
  *
  * The GQ identity scheme is based on RSA cryptography and algorithms
  * described in Stinson p. 300 (with errors). The GQ values hide in a
  * RSA cuckoo structure, but only the modulus is used. The 512-bit
  * public modulus is n = p q, where p and q are secret large primes. The
  * TA rolls random group key b disguised as a RSA structure member.
  * Except for the public key, these values are shared only among group
  * members and never revealed in messages.
  *
  * When rolling new certificates, Bob recomputes the private and
  * public keys. The private key u is a random roll, while the public key
  * is the inverse obscured by the group key v = (u^-1)^b. These values
  * replace the private and public keys normally generated by the RSA
  * scheme. Alice challenges Bob to confirm identity using the protocol
  * described below.
  *
  * How it works
  *
  * The scheme goes like this. Both Alice and Bob have the same modulus n
  * and some random b as the group key. These values are computed and
  * distributed in advance via secret means, although only the group key
  * b is truly secret. Each has a private random private key u and public
  * key (u^-1)^b, although not necessarily the same ones. Bob and Alice
  * can regenerate the key pair from time to time without affecting
  * operations. The public key is conveyed on the certificate in an
  * extension field; the private key is never revealed.
  *
  * Alice rolls new random challenge r and sends to Bob in the GQ
  * request message. Bob rolls new random k, then computes y = k u^r mod
  * n and x = k^b mod n and sends (y, hash(x)) to Alice in the response
  * message. Besides making the response shorter, the hash makes it
  * effectivey impossible for an intruder to solve for b by observing
  * a number of these messages.
  * 
  * Alice receives the response and computes y^b v^r mod n. After a bit
  * of algebra, this simplifies to k^b. If the hash of this result
  * matches hash(x), Alice knows that Bob has the group key b. The signed
  * response binds this knowledge to Bob's private key and the public key
  * previously received in his certificate.
  *
  * crypto_alice2 - construct Alice's challenge in GQ scheme
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group key
  * XEVNT_PER	host certificate expired
  */
 static int
 crypto_alice2(
 	struct peer *peer,	/* peer pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	RSA	*rsa;		/* GQ parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;
 	u_int	len;
 
 	/*
 	 * The identity parameters must have correct format and content.
 	 */
 	if (peer->ident_pkey == NULL)
 		return (XEVNT_ID);
 
 	if ((rsa = peer->ident_pkey->pkey.rsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_alice2: defective key");
 		return (XEVNT_PUB);
 	}
 
 	/*
 	 * Roll new random r (0 < r < n). The OpenSSL library has a bug
 	 * omitting BN_rand_range, so we have to do it the hard way.
 	 */
 	bctx = BN_CTX_new();
 	len = BN_num_bytes(rsa->n);
 	if (peer->iffval != NULL)
 		BN_free(peer->iffval);
 	peer->iffval = BN_new();
 	BN_rand(peer->iffval, len * 8, -1, 1);	/* r mod n */
 	BN_mod(peer->iffval, peer->iffval, rsa->n, bctx);
 	BN_CTX_free(bctx);
 
 	/*
 	 * Sign and send to Bob. The filestamp is from the local file.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = htonl(peer->fstamp);
 	vp->vallen = htonl(len);
 	vp->ptr = emalloc(len);
 	BN_bn2bin(peer->iffval, vp->ptr);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_bob2 - construct Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_ID	bad or missing group key
  * XEVNT_ERR	protocol error
  * XEVNT_PER	host certificate expired
  */
 static int
 crypto_bob2(
 	struct exten *ep,	/* extension pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	RSA	*rsa;		/* GQ parameters */
 	DSA_SIG	*sdsa;		/* DSA parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	BIGNUM	*r, *k, *g, *y;
 	u_char	*ptr;
 	u_int	len;
 
 	/*
 	 * If the GQ parameters are not valid, something awful
 	 * happened or we are being tormented.
 	 */
 	if (gqpar_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_bob2: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	rsa = gqpar_pkey->pkey.rsa;
 
 	/*
 	 * Extract r from the challenge.
 	 */
 	len = ntohl(ep->vallen);
 	if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) {
 		msyslog(LOG_ERR, "crypto_bob2 %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Bob rolls random k (0 < k < n), computes y = k u^r mod n and
 	 * x = k^b mod n, then sends (y, hash(x)) to Alice. 
 	 */
 	bctx = BN_CTX_new(); k = BN_new(); g = BN_new(); y = BN_new();
 	sdsa = DSA_SIG_new();
 	BN_rand(k, len * 8, -1, 1);		/* k */
 	BN_mod(k, k, rsa->n, bctx);
 	BN_mod_exp(y, rsa->p, r, rsa->n, bctx); /* u^r mod n */
 	BN_mod_mul(y, k, y, rsa->n, bctx);	/* k u^r mod n */
 	sdsa->r = BN_dup(y);
 	BN_mod_exp(g, k, rsa->e, rsa->n, bctx); /* k^b mod n */
 	bighash(g, g);
 	sdsa->s = BN_dup(g);
 	BN_CTX_free(bctx);
 	BN_free(r); BN_free(k); BN_free(g); BN_free(y);
  
 	/*
 	 * Encode the values in ASN.1 and sign.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = htonl(gq_fstamp);
 	len = i2d_DSA_SIG(sdsa, NULL);
 	if (len <= 0) {
 		msyslog(LOG_ERR, "crypto_bob2 %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		DSA_SIG_free(sdsa);
 		return (XEVNT_ERR);
 	}
 	vp->vallen = htonl(len);
 	ptr = emalloc(len);
 	vp->ptr = ptr;
 	i2d_DSA_SIG(sdsa, &ptr);
 	DSA_SIG_free(sdsa);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_gq - verify Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group keys
  * XEVNT_ERR	protocol error
  * XEVNT_FSP	bad filestamp
  */
 int
 crypto_gq(
 	struct exten *ep,	/* extension pointer */
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	RSA	*rsa;		/* GQ parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	DSA_SIG	*sdsa;		/* RSA signature context fake */
 	BIGNUM	*y, *v;
 	const u_char	*ptr;
 	u_int	len;
 	int	temp;
 
 	/*
 	 * If the GQ parameters are not valid or no challenge was sent,
 	 * something awful happened or we are being tormented.
 	 */
 	if (peer->ident_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_gq: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	if (ntohl(ep->fstamp) != peer->fstamp) {
 		msyslog(LOG_INFO, "crypto_gq: invalid filestamp %u",
 		    ntohl(ep->fstamp));
 		return (XEVNT_FSP);
 	}
 	if ((rsa = peer->ident_pkey->pkey.rsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_gq: defective key");
 		return (XEVNT_PUB);
 	}
 	if (peer->iffval == NULL) {
 		msyslog(LOG_INFO, "crypto_gq: missing challenge");
 		return (XEVNT_ID);
 	}
 
 	/*
 	 * Extract the y = k u^r and hash(x = k^b) values from the
 	 * response.
 	 */
 	bctx = BN_CTX_new(); y = BN_new(); v = BN_new();
 	len = ntohl(ep->vallen);
 	ptr = (const u_char *)ep->pkt;
 	if ((sdsa = d2i_DSA_SIG(NULL, &ptr, len)) == NULL) {
 		msyslog(LOG_ERR, "crypto_gq %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Compute v^r y^b mod n.
 	 */
 	BN_mod_exp(v, peer->grpkey, peer->iffval, rsa->n, bctx);
 						/* v^r mod n */
 	BN_mod_exp(y, sdsa->r, rsa->e, rsa->n, bctx); /* y^b mod n */
 	BN_mod_mul(y, v, y, rsa->n, bctx);	/* v^r y^b mod n */
 
 	/*
 	 * Verify the hash of the result matches hash(x).
 	 */
 	bighash(y, y);
 	temp = BN_cmp(y, sdsa->s);
 	BN_CTX_free(bctx); BN_free(y); BN_free(v);
 	BN_free(peer->iffval);
 	peer->iffval = NULL;
 	DSA_SIG_free(sdsa);
 	if (temp == 0)
 		return (XEVNT_OK);
 
 	else
 		return (XEVNT_ID);
 }
 
 
 /*
  ***********************************************************************
  *								       *
  * The following routines implement the Mu-Varadharajan (MV) identity  *
  * scheme                                                              *
  *								       *
  ***********************************************************************
  */
 /*
  * The Mu-Varadharajan (MV) cryptosystem was originally intended when
  * servers broadcast messages to clients, but clients never send
  * messages to servers. There is one encryption key for the server and a
  * separate decryption key for each client. It operated something like a
  * pay-per-view satellite broadcasting system where the session key is
  * encrypted by the broadcaster and the decryption keys are held in a
  * tamperproof set-top box.
  *
  * The MV parameters and private encryption key hide in a DSA cuckoo
  * structure which uses the same parameters, but generated in a
  * different way. The values are used in an encryption scheme similar to
  * El Gamal cryptography and a polynomial formed from the expansion of
  * product terms (x - x[j]), as described in Mu, Y., and V.
  * Varadharajan: Robust and Secure Broadcasting, Proc. Indocrypt 2001,
  * 223-231. The paper has significant errors and serious omissions.
  *
  * Let q be the product of n distinct primes s'[j] (j = 1...n), where
  * each s'[j] has m significant bits. Let p be a prime p = 2 * q + 1, so
  * that q and each s'[j] divide p - 1 and p has M = n * m + 1
  * significant bits. The elements x mod q of Zq with the elements 2 and
  * the primes removed form a field Zq* valid for polynomial arithetic.
  * Let g be a generator of Zp; that is, gcd(g, p - 1) = 1 and g^q = 1
  * mod p. We expect M to be in the 500-bit range and n relatively small,
  * like 25, so the likelihood of a randomly generated element of x mod q
  * of Zq colliding with a factor of p - 1 is very small and can be
  * avoided. Associated with each s'[j] is an element s[j] such that s[j]
  * s'[j] = s'[j] mod q. We find s[j] as the quotient (q + s'[j]) /
  * s'[j]. These are the parameters of the scheme and they are expensive
  * to compute.
  *
  * We set up an instance of the scheme as follows. A set of random
  * values x[j] mod q (j = 1...n), are generated as the zeros of a
  * polynomial of order n. The product terms (x - x[j]) are expanded to
  * form coefficients a[i] mod q (i = 0...n) in powers of x. These are
  * used as exponents of the generator g mod p to generate the private
  * encryption key A. The pair (gbar, ghat) of public server keys and the
  * pairs (xbar[j], xhat[j]) (j = 1...n) of private client keys are used
  * to construct the decryption keys. The devil is in the details.
  *
  * The distinguishing characteristic of this scheme is the capability to
  * revoke keys. Included in the calculation of E, gbar and ghat is the
  * product s = prod(s'[j]) (j = 1...n) above. If the factor s'[j] is
  * subsequently removed from the product and E, gbar and ghat
  * recomputed, the jth client will no longer be able to compute E^-1 and
  * thus unable to decrypt the block.
  *
  * How it works
  *
  * The scheme goes like this. Bob has the server values (p, A, q, gbar,
  * ghat) and Alice the client values (p, xbar, xhat).
  *
  * Alice rolls new random challenge r (0 < r < p) and sends to Bob in
  * the MV request message. Bob rolls new random k (0 < k < q), encrypts
  * y = A^k mod p (a permutation) and sends (hash(y), gbar^k, ghat^k) to
  * Alice.
  * 
  * Alice receives the response and computes the decryption key (the
  * inverse permutation) from previously obtained (xbar, xhat) and
  * (gbar^k, ghat^k) in the message. She computes the inverse, which is
  * unique by reasons explained in the ntp-keygen.c program sources. If
  * the hash of this result matches hash(y), Alice knows that Bob has the
  * group key b. The signed response binds this knowledge to Bob's
  * private key and the public key previously received in his
  * certificate.
  *
  * crypto_alice3 - construct Alice's challenge in MV scheme
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group key
  * XEVNT_PER	host certificate expired
  */
 static int
 crypto_alice3(
 	struct peer *peer,	/* peer pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	DSA	*dsa;		/* MV parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;
 	u_int	len;
 
 	/*
 	 * The identity parameters must have correct format and content.
 	 */
 	if (peer->ident_pkey == NULL)
 		return (XEVNT_ID);
 
 	if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_alice3: defective key");
 		return (XEVNT_PUB);
 	}
 
 	/*
 	 * Roll new random r (0 < r < q). The OpenSSL library has a bug
 	 * omitting BN_rand_range, so we have to do it the hard way.
 	 */
 	bctx = BN_CTX_new();
 	len = BN_num_bytes(dsa->p);
 	if (peer->iffval != NULL)
 		BN_free(peer->iffval);
 	peer->iffval = BN_new();
 	BN_rand(peer->iffval, len * 8, -1, 1);	/* r */
 	BN_mod(peer->iffval, peer->iffval, dsa->p, bctx);
 	BN_CTX_free(bctx);
 
 	/*
 	 * Sign and send to Bob. The filestamp is from the local file.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = htonl(peer->fstamp);
 	vp->vallen = htonl(len);
 	vp->ptr = emalloc(len);
 	BN_bn2bin(peer->iffval, vp->ptr);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_bob3 - construct Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_ERR	protocol error
  * XEVNT_PER	host certificate expired
  */
 static int
 crypto_bob3(
 	struct exten *ep,	/* extension pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	DSA	*dsa;		/* MV parameters */
 	DSA	*sdsa;		/* DSA signature context fake */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	BIGNUM	*r, *k, *u;
 	u_char	*ptr;
 	u_int	len;
 
 	/*
 	 * If the MV parameters are not valid, something awful
 	 * happened or we are being tormented.
 	 */
 	if (mvpar_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_bob3: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	dsa = mvpar_pkey->pkey.dsa;
 
 	/*
 	 * Extract r from the challenge.
 	 */
 	len = ntohl(ep->vallen);
 	if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) {
 		msyslog(LOG_ERR, "crypto_bob3 %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Bob rolls random k (0 < k < q), making sure it is not a
 	 * factor of q. He then computes y = A^k r and sends (hash(y),
 	 * gbar^k, ghat^k) to Alice.
 	 */
 	bctx = BN_CTX_new(); k = BN_new(); u = BN_new();
 	sdsa = DSA_new();
 	sdsa->p = BN_new(); sdsa->q = BN_new(); sdsa->g = BN_new();
 	while (1) {
 		BN_rand(k, BN_num_bits(dsa->q), 0, 0);
 		BN_mod(k, k, dsa->q, bctx);
 		BN_gcd(u, k, dsa->q, bctx);
 		if (BN_is_one(u))
 			break;
 	}
 	BN_mod_exp(u, dsa->g, k, dsa->p, bctx); /* A r */
 	BN_mod_mul(u, u, r, dsa->p, bctx);
 	bighash(u, sdsa->p);
 	BN_mod_exp(sdsa->q, dsa->priv_key, k, dsa->p, bctx); /* gbar */
 	BN_mod_exp(sdsa->g, dsa->pub_key, k, dsa->p, bctx); /* ghat */
 	BN_CTX_free(bctx); BN_free(k); BN_free(r); BN_free(u);
 
 	/*
 	 * Encode the values in ASN.1 and sign.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = htonl(mv_fstamp);
 	len = i2d_DSAparams(sdsa, NULL);
 	if (len <= 0) {
 		msyslog(LOG_ERR, "crypto_bob3 %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		DSA_free(sdsa);
 		return (XEVNT_ERR);
 	}
 	vp->vallen = htonl(len);
 	ptr = emalloc(len);
 	vp->ptr = ptr;
 	i2d_DSAparams(sdsa, &ptr);
 	DSA_free(sdsa);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_mv - verify Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group key
  * XEVNT_ERR	protocol error
  * XEVNT_FSP	bad filestamp
  */
 int
 crypto_mv(
 	struct exten *ep,	/* extension pointer */
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	DSA	*dsa;		/* MV parameters */
 	DSA	*sdsa;		/* DSA parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	BIGNUM	*k, *u, *v;
 	u_int	len;
 	const u_char	*ptr;
 	int	temp;
 
 	/*
 	 * If the MV parameters are not valid or no challenge was sent,
 	 * something awful happened or we are being tormented.
 	 */
 	if (peer->ident_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_mv: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	if (ntohl(ep->fstamp) != peer->fstamp) {
 		msyslog(LOG_INFO, "crypto_mv: invalid filestamp %u",
 		    ntohl(ep->fstamp));
 		return (XEVNT_FSP);
 	}
 	if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_mv: defective key");
 		return (XEVNT_PUB);
 	}
 	if (peer->iffval == NULL) {
 		msyslog(LOG_INFO, "crypto_mv: missing challenge");
 		return (XEVNT_ID);
 	}
 
 	/*
 	 * Extract the (hash(y), gbar, ghat) values from the response.
 	 */
 	bctx = BN_CTX_new(); k = BN_new(); u = BN_new(); v = BN_new();
 	len = ntohl(ep->vallen);
 	ptr = (const u_char *)ep->pkt;
 	if ((sdsa = d2i_DSAparams(NULL, &ptr, len)) == NULL) {
 		msyslog(LOG_ERR, "crypto_mv %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Compute (gbar^xhat ghat^xbar)^-1 mod p.
 	 */
 	BN_mod_exp(u, sdsa->q, dsa->pub_key, dsa->p, bctx);
 	BN_mod_exp(v, sdsa->g, dsa->priv_key, dsa->p, bctx);
 	BN_mod_mul(u, u, v, dsa->p, bctx);
 	BN_mod_inverse(u, u, dsa->p, bctx);
 	BN_mod_mul(v, u, peer->iffval, dsa->p, bctx);
 
 	/*
 	 * The result should match the hash of r mod p.
 	 */
 	bighash(v, v);
 	temp = BN_cmp(v, sdsa->p);
 	BN_CTX_free(bctx); BN_free(k); BN_free(u); BN_free(v);
 	BN_free(peer->iffval);
 	peer->iffval = NULL;
 	DSA_free(sdsa);
 	if (temp == 0)
 		return (XEVNT_OK);
 
 	else
 		return (XEVNT_ID);
 }
 
 
 /*
  ***********************************************************************
  *								       *
  * The following routines are used to manipulate certificates          *
  *								       *
  ***********************************************************************
  */
 /*
  * cert_parse - parse x509 certificate and create info/value structures.
  *
  * The server certificate includes the version number, issuer name,
  * subject name, public key and valid date interval. If the issuer name
  * is the same as the subject name, the certificate is self signed and
  * valid only if the server is configured as trustable. If the names are
  * different, another issuer has signed the server certificate and
  * vouched for it. In this case the server certificate is valid if
  * verified by the issuer public key.
  *
  * Returns certificate info/value pointer if valid, NULL if not.
  */
 struct cert_info *		/* certificate information structure */
 cert_parse(
 	u_char	*asn1cert,	/* X509 certificate */
 	u_int	len,		/* certificate length */
 	tstamp_t fstamp		/* filestamp */
 	)
 {
 	X509	*cert;		/* X509 certificate */
 	X509_EXTENSION *ext;	/* X509v3 extension */
 	struct cert_info *ret;	/* certificate info/value */
 	BIO	*bp;
 	X509V3_EXT_METHOD *method;
 	char	pathbuf[MAXFILENAME];
 	u_char	*uptr;
 	char	*ptr;
 	int	temp, cnt, i;
 
 	/*
 	 * Decode ASN.1 objects and construct certificate structure.
 	 */
 	uptr = asn1cert;
 	if ((cert = d2i_X509(NULL, &uptr, len)) == NULL) {
 		msyslog(LOG_ERR, "cert_parse %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (NULL);
 	}
 
 	/*
 	 * Extract version, subject name and public key.
 	 */
 	ret = emalloc(sizeof(struct cert_info));
 	memset(ret, 0, sizeof(struct cert_info));
 	if ((ret->pkey = X509_get_pubkey(cert)) == NULL) {
 		msyslog(LOG_ERR, "cert_parse %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		cert_free(ret);
 		X509_free(cert);
 		return (NULL);
 	}
 	ret->version = X509_get_version(cert);
 	X509_NAME_oneline(X509_get_subject_name(cert), pathbuf,
 	    MAXFILENAME - 1);
 	ptr = strstr(pathbuf, "CN=");
 	if (ptr == NULL) {
 		msyslog(LOG_INFO, "cert_parse: invalid subject %s",
 		    pathbuf);
 		cert_free(ret);
 		X509_free(cert);
 		return (NULL);
 	}
 	ret->subject = emalloc(strlen(ptr) + 1);
 	strcpy(ret->subject, ptr + 3);
 
 	/*
 	 * Extract remaining objects. Note that the NTP serial number is
 	 * the NTP seconds at the time of signing, but this might not be
 	 * the case for other authority. We don't bother to check the
 	 * objects at this time, since the real crunch can happen only
 	 * when the time is valid but not yet certificated.
 	 */
 	ret->nid = OBJ_obj2nid(cert->cert_info->signature->algorithm);
 	ret->digest = (const EVP_MD *)EVP_get_digestbynid(ret->nid);
 	ret->serial =
 	    (u_long)ASN1_INTEGER_get(X509_get_serialNumber(cert));
 	X509_NAME_oneline(X509_get_issuer_name(cert), pathbuf,
 	    MAXFILENAME);
 	if ((ptr = strstr(pathbuf, "CN=")) == NULL) {
 		msyslog(LOG_INFO, "cert_parse: invalid issuer %s",
 		    pathbuf);
 		cert_free(ret);
 		X509_free(cert);
 		return (NULL);
 	}
 	ret->issuer = emalloc(strlen(ptr) + 1);
 	strcpy(ret->issuer, ptr + 3);
 	ret->first = asn2ntp(X509_get_notBefore(cert));
 	ret->last = asn2ntp(X509_get_notAfter(cert));
 
 	/*
 	 * Extract extension fields. These are ad hoc ripoffs of
 	 * currently assigned functions and will certainly be changed
 	 * before prime time.
 	 */
 	cnt = X509_get_ext_count(cert);
 	for (i = 0; i < cnt; i++) {
 		ext = X509_get_ext(cert, i);
 		method = X509V3_EXT_get(ext);
 		temp = OBJ_obj2nid(ext->object);
 		switch (temp) {
 
 		/*
 		 * If a key_usage field is present, we decode whether
 		 * this is a trusted or private certificate. This is
 		 * dorky; all we want is to compare NIDs, but OpenSSL
 		 * insists on BIO text strings.
 		 */
 		case NID_ext_key_usage:
 			bp = BIO_new(BIO_s_mem());
 			X509V3_EXT_print(bp, ext, 0, 0);
 			BIO_gets(bp, pathbuf, MAXFILENAME);
 			BIO_free(bp);
 #if DEBUG
 			if (debug)
 				printf("cert_parse: %s: %s\n",
 				    OBJ_nid2ln(temp), pathbuf);
 #endif
 			if (strcmp(pathbuf, "Trust Root") == 0)
 				ret->flags |= CERT_TRUST;
 			else if (strcmp(pathbuf, "Private") == 0)
 				ret->flags |= CERT_PRIV;
 			break;
 
 		/*
 		 * If a NID_subject_key_identifier field is present, it
 		 * contains the GQ public key.
 		 */
 		case NID_subject_key_identifier:
 			ret->grplen = ext->value->length - 2;
 			ret->grpkey = emalloc(ret->grplen);
 			memcpy(ret->grpkey, &ext->value->data[2],
 			    ret->grplen);
 			break;
 		}
 	}
 
 	/*
 	 * If certificate is self signed, verify signature.
 	 */
 	if (strcmp(ret->subject, ret->issuer) == 0) {
 		if (!X509_verify(cert, ret->pkey)) {
 			msyslog(LOG_INFO,
 			    "cert_parse: signature not verified %s",
 			    pathbuf);
 			cert_free(ret);
 			X509_free(cert);
 			return (NULL);
 		}
 	}
 
 	/*
 	 * Verify certificate valid times. Note that certificates cannot
 	 * be retroactive.
 	 */
 	if (ret->first > ret->last || ret->first < fstamp) {
 		msyslog(LOG_INFO,
 		    "cert_parse: invalid certificate %s first %u last %u fstamp %u",
 		    ret->subject, ret->first, ret->last, fstamp);
 		cert_free(ret);
 		X509_free(cert);
 		return (NULL);
 	}
 
 	/*
 	 * Build the value structure to sign and send later.
 	 */
 	ret->cert.fstamp = htonl(fstamp);
 	ret->cert.vallen = htonl(len);
 	ret->cert.ptr = emalloc(len);
 	memcpy(ret->cert.ptr, asn1cert, len);
 #ifdef DEBUG
 	if (debug > 1)
 		X509_print_fp(stdout, cert);
 #endif
 	X509_free(cert);
 	return (ret);
 }
 
 
 /*
  * cert_sign - sign x509 certificate equest and update value structure.
  *
  * The certificate request includes a copy of the host certificate,
  * which includes the version number, subject name and public key of the
  * host. The resulting certificate includes these values plus the
  * serial number, issuer name and valid interval of the server. The
  * valid interval extends from the current time to the same time one
  * year hence. This may extend the life of the signed certificate beyond
  * that of the signer certificate.
  *
  * It is convenient to use the NTP seconds of the current time as the
  * serial number. In the value structure the timestamp is the current
  * time and the filestamp is taken from the extension field. Note this
  * routine is called only when the client clock is synchronized to a
  * proventic source, so timestamp comparisons are valid.
  *
  * The host certificate is valid from the time it was generated for a
  * period of one year. A signed certificate is valid from the time of
  * signature for a period of one year, but only the host certificate (or
  * sign certificate if used) is actually used to encrypt and decrypt
  * signatures. The signature trail is built from the client via the
  * intermediate servers to the trusted server. Each signature on the
  * trail must be valid at the time of signature, but it could happen
  * that a signer certificate expire before the signed certificate, which
  * remains valid until its expiration. 
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_CRT	bad or missing certificate
  * XEVNT_VFY	certificate not verified
  * XEVNT_PER	host certificate expired
  */
 static int
 cert_sign(
 	struct exten *ep,	/* extension field pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	X509	*req;		/* X509 certificate request */
 	X509	*cert;		/* X509 certificate */
 	X509_EXTENSION *ext;	/* certificate extension */
 	ASN1_INTEGER *serial;	/* serial number */
 	X509_NAME *subj;	/* distinguished (common) name */
 	EVP_PKEY *pkey;		/* public key */
 	EVP_MD_CTX ctx;		/* message digest context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	u_int	len;
 	u_char	*ptr;
 	int	i, temp;
 
 	/*
 	 * Decode ASN.1 objects and construct certificate structure.
 	 * Make sure the system clock is synchronized to a proventic
 	 * source.
 	 */
 	tstamp = crypto_time();
 	if (tstamp == 0)
 		return (XEVNT_TSP);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	ptr = (u_char *)ep->pkt;
 	if ((req = d2i_X509(NULL, &ptr, ntohl(ep->vallen))) == NULL) {
 		msyslog(LOG_ERR, "cert_sign %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_CRT);
 	}
 	/*
 	 * Extract public key and check for errors.
 	 */
 	if ((pkey = X509_get_pubkey(req)) == NULL) {
 		msyslog(LOG_ERR, "cert_sign %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		X509_free(req);
 		return (XEVNT_PUB);
 	}
 
 	/*
 	 * Generate X509 certificate signed by this server. For this
 	 * purpose the issuer name is the server name. Also copy any
 	 * extensions that might be present.
 	 */
 	cert = X509_new();
 	X509_set_version(cert, X509_get_version(req));
 	serial = ASN1_INTEGER_new();
 	ASN1_INTEGER_set(serial, tstamp);
 	X509_set_serialNumber(cert, serial);
 	X509_gmtime_adj(X509_get_notBefore(cert), 0L);
 	X509_gmtime_adj(X509_get_notAfter(cert), YEAR);
 	subj = X509_get_issuer_name(cert);
 	X509_NAME_add_entry_by_txt(subj, "commonName", MBSTRING_ASC,
 	    (u_char *)sys_hostname, strlen(sys_hostname), -1, 0);
 	subj = X509_get_subject_name(req);
 	X509_set_subject_name(cert, subj);
 	X509_set_pubkey(cert, pkey);
 	ext = X509_get_ext(req, 0);
 	temp = X509_get_ext_count(req);
 	for (i = 0; i < temp; i++) {
 		ext = X509_get_ext(req, i);
 		X509_add_ext(cert, ext, -1);
 	}
 	X509_free(req);
 
 	/*
 	 * Sign and verify the certificate.
 	 */
 	X509_sign(cert, sign_pkey, sign_digest);
 	if (!X509_verify(cert, sign_pkey)) {
 		printf("cert_sign\n%s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		X509_free(cert);
 		return (XEVNT_VFY);
 	}
 	len = i2d_X509(cert, NULL);
 
 	/*
 	 * Build and sign the value structure. We have to sign it here,
 	 * since the response has to be returned right away. This is a
 	 * clogging hazard.
 	 */
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = ep->fstamp;
 	vp->vallen = htonl(len);
 	vp->ptr = emalloc(len);
 	ptr = vp->ptr;
 	i2d_X509(cert, &ptr);
 	vp->siglen = 0;
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)vp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 #ifdef DEBUG
 	if (debug > 1)
 		X509_print_fp(stdout, cert);
 #endif
 	X509_free(cert);
 	return (XEVNT_OK);
 }
 
 
 /*
  * cert_valid - verify certificate with given public key
  *
  * This is pretty ugly, as the certificate has to be verified in the
  * OpenSSL X509 structure, not in the DER format in the info/value
  * structure.
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_VFY	certificate not verified
  */
 int
 cert_valid(
 	struct cert_info *cinf,	/* certificate information structure */
 	EVP_PKEY *pkey		/* public key */
 	)
 {
 	X509	*cert;		/* X509 certificate */
 	u_char	*ptr;
 
 	if (cinf->flags & CERT_SIGN)
 		return (XEVNT_OK);
 
 	ptr = (u_char *)cinf->cert.ptr;
 	cert = d2i_X509(NULL, &ptr, ntohl(cinf->cert.vallen));
 	if (cert == NULL || !X509_verify(cert, pkey))
 		return (XEVNT_VFY);
 
 	X509_free(cert);
 	return (XEVNT_OK);
 }
 
 
 /*
  * cert - install certificate in certificate list
  *
  * This routine encodes an extension field into a certificate info/value
  * structure. It searches the certificate list for duplicates and
  * expunges whichever is older. It then searches the list for other
  * certificates that might be verified by this latest one. Finally, it
  * inserts this certificate first on the list.
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_FSP	bad or missing filestamp
  * XEVNT_CRT	bad or missing certificate 
  */
 int
 cert_install(
 	struct exten *ep,	/* cert info/value */
 	struct peer *peer	/* peer structure */
 	)
 {
 	struct cert_info *cp, *xp, *yp, **zp;
 
 	/*
 	 * Parse and validate the signed certificate. If valid,
 	 * construct the info/value structure; otherwise, scamper home.
 	 */
 	if ((cp = cert_parse((u_char *)ep->pkt, ntohl(ep->vallen),
 	    ntohl(ep->fstamp))) == NULL)
 		return (XEVNT_CRT);
 
 	/*
 	 * Scan certificate list looking for another certificate with
 	 * the same subject and issuer. If another is found with the
 	 * same or older filestamp, unlink it and return the goodies to
 	 * the heap. If another is found with a later filestamp, discard
 	 * the new one and leave the building.
 	 *
 	 * Make a note to study this issue again. An earlier certificate
 	 * with a long lifetime might be overtaken by a later
 	 * certificate with a short lifetime, thus invalidating the
 	 * earlier signature. However, we gotta find a way to leak old
 	 * stuff from the cache, so we do it anyway. 
 	 */
 	yp = cp;
 	zp = &cinfo;
 	for (xp = cinfo; xp != NULL; xp = xp->link) {
 		if (strcmp(cp->subject, xp->subject) == 0 &&
 		    strcmp(cp->issuer, xp->issuer) == 0) {
 			if (ntohl(cp->cert.fstamp) <=
 			    ntohl(xp->cert.fstamp)) {
 				*zp = xp->link;;
 				cert_free(xp);
 			} else {
 				cert_free(cp);
 				return (XEVNT_FSP);
 			}
 			break;
 		}
 		zp = &xp->link;
 	}
 	yp->link = cinfo;
 	cinfo = yp;
 
 	/*
 	 * Scan the certificate list to see if Y is signed by X. This is
 	 * independent of order.
 	 */
 	for (yp = cinfo; yp != NULL; yp = yp->link) {
 		for (xp = cinfo; xp != NULL; xp = xp->link) {
 
 			/*
 			 * If the issuer of certificate Y matches the
 			 * subject of certificate X, verify the
 			 * signature of Y using the public key of X. If
 			 * so, X signs Y.
 			 */
 			if (strcmp(yp->issuer, xp->subject) != 0 ||
 				xp->flags & CERT_ERROR)
 				continue;
 
 			if (cert_valid(yp, xp->pkey) != XEVNT_OK) {
 				yp->flags |= CERT_ERROR;
 				continue;
 			}
 
 			/*
 			 * The signature Y is valid only if it begins
 			 * during the lifetime of X; however, it is not
 			 * necessarily an error, since some other
 			 * certificate might sign Y. 
 			 */
 			if (yp->first < xp->first || yp->first >
 			    xp->last)
 				continue;
 
 			yp->flags |= CERT_SIGN;
 
 			/*
 			 * If X is trusted, then Y is trusted. Note that
 			 * we might stumble over a self-signed
 			 * certificate that is not trusted, at least
 			 * temporarily. This can happen when a dude
 			 * first comes up, but has not synchronized the
 			 * clock and had its certificate signed by its
 			 * server. In case of broken certificate trail,
 			 * this might result in a loop that could
 			 * persist until timeout.
 			 */
 			if (!(xp->flags & (CERT_TRUST | CERT_VALID)))
 				continue;
 
 			yp->flags |= CERT_VALID;
 
 			/*
 			 * If subject Y matches the server subject name,
 			 * then Y has completed the certificate trail.
 			 * Save the group key and light the valid bit.
 			 */
 			if (strcmp(yp->subject, peer->subject) != 0)
 				continue;
 
 			if (yp->grpkey != NULL) {
 				if (peer->grpkey != NULL)
 					BN_free(peer->grpkey);
 				peer->grpkey = BN_bin2bn(yp->grpkey,
 				     yp->grplen, NULL);
 			}
 			peer->crypto |= CRYPTO_FLAG_VALID;
 
 			/*
 			 * If the server has an an identity scheme,
 			 * fetch the identity credentials. If not, the
 			 * identity is verified only by the trusted
 			 * certificate. The next signature will set the
 			 * server proventic.
 			 */
 			if (peer->crypto & (CRYPTO_FLAG_GQ |
 			    CRYPTO_FLAG_IFF | CRYPTO_FLAG_MV))
 				continue;
 
 			peer->crypto |= CRYPTO_FLAG_VRFY;
 		}
 	}
 
 	/*
 	 * That was awesome. Now update the timestamps and signatures.
 	 */
 	crypto_update();
 	return (XEVNT_OK);
 }
 
 
 /*
  * cert_free - free certificate information structure
  */
 void
 cert_free(
 	struct cert_info *cinf	/* certificate info/value structure */ 
 	)
 {
 	if (cinf->pkey != NULL)
 		EVP_PKEY_free(cinf->pkey);
 	if (cinf->subject != NULL)
 		free(cinf->subject);
 	if (cinf->issuer != NULL)
 		free(cinf->issuer);
 	if (cinf->grpkey != NULL)
 		free(cinf->grpkey);
 	value_free(&cinf->cert);
 	free(cinf);
 }
 
 
 /*
  ***********************************************************************
  *								       *
  * The following routines are used only at initialization time         *
  *								       *
  ***********************************************************************
  */
 /*
  * crypto_key - load cryptographic parameters and keys from files
  *
  * This routine loads a PEM-encoded public/private key pair and extracts
  * the filestamp from the file name.
  *
  * Returns public key pointer if valid, NULL if not. Side effect updates
  * the filestamp if valid.
  */
 static EVP_PKEY *
 crypto_key(
 	char	*cp,		/* file name */
 	tstamp_t *fstamp	/* filestamp */
 	)
 {
 	FILE	*str;		/* file handle */
 	EVP_PKEY *pkey = NULL;	/* public/private key */
 	char	filename[MAXFILENAME]; /* name of key file */
 	char	linkname[MAXFILENAME]; /* filestamp buffer) */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	char	*ptr;
 
 	/*
 	 * Open the key file. If the first character of the file name is
 	 * not '/', prepend the keys directory string. If something goes
 	 * wrong, abandon ship.
 	 */
 	if (*cp == '/')
 		strcpy(filename, cp);
 	else
 		snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp);
 	str = fopen(filename, "r");
 	if (str == NULL)
 		return (NULL);
 
 	/*
 	 * Read the filestamp, which is contained in the first line.
 	 */
 	if ((ptr = fgets(linkname, MAXFILENAME, str)) == NULL) {
 		msyslog(LOG_ERR, "crypto_key: no data %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 	if ((ptr = strrchr(ptr, '.')) == NULL) {
 		msyslog(LOG_ERR, "crypto_key: no filestamp %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 	if (sscanf(++ptr, "%u", fstamp) != 1) {
 		msyslog(LOG_ERR, "crypto_key: invalid timestamp %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 
 	/*
 	 * Read and decrypt PEM-encoded private key.
 	 */
 	pkey = PEM_read_PrivateKey(str, NULL, NULL, passwd);
 	fclose(str);
 	if (pkey == NULL) {
 		msyslog(LOG_ERR, "crypto_key %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (NULL);
 	}
 
 	/*
 	 * Leave tracks in the cryptostats.
 	 */
 	if ((ptr = strrchr(linkname, '\n')) != NULL)
 		*ptr = '\0'; 
 	snprintf(statstr, NTP_MAXSTRLEN, "%s mod %d", &linkname[2],
 	    EVP_PKEY_size(pkey) * 8);
 	record_crypto_stats(NULL, statstr);
 #ifdef DEBUG
 	if (debug)
 		printf("crypto_key: %s\n", statstr);
 	if (debug > 1) {
 		if (EVP_MD_type(pkey) == EVP_PKEY_DSA)
 			DSA_print_fp(stdout, pkey->pkey.dsa, 0);
 		else
 			RSA_print_fp(stdout, pkey->pkey.rsa, 0);
 	}
 #endif
 	return (pkey);
 }
 
 
 /*
  * crypto_cert - load certificate from file
  *
  * This routine loads a X.509 RSA or DSA certificate from a file and
  * constructs a info/cert value structure for this machine. The
  * structure includes a filestamp extracted from the file name. Later
  * the certificate can be sent to another machine by request.
  *
  * Returns certificate info/value pointer if valid, NULL if not.
  */
 static struct cert_info *	/* certificate information */
 crypto_cert(
 	char	*cp		/* file name */
 	)
 {
 	struct cert_info *ret; /* certificate information */
 	FILE	*str;		/* file handle */
 	char	filename[MAXFILENAME]; /* name of certificate file */
 	char	linkname[MAXFILENAME]; /* filestamp buffer */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	tstamp_t fstamp;	/* filestamp */
 	long	len;
 	char	*ptr;
 	char	*name, *header;
 	u_char	*data;
 
 	/*
 	 * Open the certificate file. If the first character of the file
 	 * name is not '/', prepend the keys directory string. If
 	 * something goes wrong, abandon ship.
 	 */
 	if (*cp == '/')
 		strcpy(filename, cp);
 	else
 		snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp);
 	str = fopen(filename, "r");
 	if (str == NULL)
 		return (NULL);
 
 	/*
 	 * Read the filestamp, which is contained in the first line.
 	 */
 	if ((ptr = fgets(linkname, MAXFILENAME, str)) == NULL) {
 		msyslog(LOG_ERR, "crypto_cert: no data %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 	if ((ptr = strrchr(ptr, '.')) == NULL) {
 		msyslog(LOG_ERR, "crypto_cert: no filestamp %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 	if (sscanf(++ptr, "%u", &fstamp) != 1) {
 		msyslog(LOG_ERR, "crypto_cert: invalid filestamp %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 
 	/*
 	 * Read PEM-encoded certificate and install.
 	 */
 	if (!PEM_read(str, &name, &header, &data, &len)) {
 		msyslog(LOG_ERR, "crypto_cert %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		(void)fclose(str);
 		return (NULL);
 	}
 	free(header);
 	if (strcmp(name, "CERTIFICATE") !=0) {
 		msyslog(LOG_INFO, "crypto_cert: wrong PEM type %s",
 		    name);
 		free(name);
 		free(data);
 		(void)fclose(str);
 		return (NULL);
 	}
 	free(name);
 
 	/*
 	 * Parse certificate and generate info/value structure.
 	 */
 	ret = cert_parse(data, len, fstamp);
 	free(data);
 	(void)fclose(str);
 	if (ret == NULL)
 		return (NULL);
 
 	if ((ptr = strrchr(linkname, '\n')) != NULL)
 		*ptr = '\0'; 
 	snprintf(statstr, NTP_MAXSTRLEN,
 	    "%s 0x%x len %lu", &linkname[2], ret->flags, len);
 	record_crypto_stats(NULL, statstr);
 #ifdef DEBUG
 	if (debug)
 		printf("crypto_cert: %s\n", statstr);
 #endif
 	return (ret);
 }
 
 
 /*
  * crypto_tai - load leapseconds table from file
  *
  * This routine loads the ERTS leapsecond file in NIST text format,
  * converts to a value structure and extracts a filestamp from the file
  * name. The data are used to establish the TAI offset from UTC, which
  * is provided to the kernel if supported. Later the data can be sent to
  * another machine on request.
  */
 static void
 crypto_tai(
 	char	*cp		/* file name */
 	)
 {
 	FILE	*str;		/* file handle */
 	char	buf[NTP_MAXSTRLEN];	/* file line buffer */
 	u_int32	leapsec[MAX_LEAP]; /* NTP time at leaps */
 	int	offset;		/* offset at leap (s) */
 	char	filename[MAXFILENAME]; /* name of leapseconds file */
 	char	linkname[MAXFILENAME]; /* file link (for filestamp) */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	tstamp_t fstamp;	/* filestamp */
 	u_int	len;
 	u_int32	*ptr;
 	char	*dp;
 	int	rval, i, j;
 
 	/*
 	 * Open the file and discard comment lines. If the first
 	 * character of the file name is not '/', prepend the keys
 	 * directory string. If the file is not found, not to worry; it
 	 * can be retrieved over the net. But, if it is found with
 	 * errors, we crash and burn.
 	 */
 	if (*cp == '/')
 		strcpy(filename, cp);
 	else
 		snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp);
 	if ((str = fopen(filename, "r")) == NULL)
 		return;
 
 	/*
 	 * Extract filestamp if present.
 	 */
 	rval = readlink(filename, linkname, MAXFILENAME - 1);
 	if (rval > 0) {
 		linkname[rval] = '\0';
 		dp = strrchr(linkname, '.');
 	} else {
 		dp = strrchr(filename, '.');
 	}
 	if (dp != NULL)
 		sscanf(++dp, "%u", &fstamp);
 	else
 		fstamp = 0;
 	tai_leap.fstamp = htonl(fstamp);
 
 	/*
 	 * We are rather paranoid here, since an intruder might cause a
 	 * coredump by infiltrating naughty values. Empty lines and
 	 * comments are ignored. Other lines must begin with two
 	 * integers followed by junk or comments. The first integer is
 	 * the NTP seconds of leap insertion, the second is the offset
 	 * of TAI relative to UTC after that insertion. The second word
 	 * must equal the initial insertion of ten seconds on 1 January
 	 * 1972 plus one second for each succeeding insertion.
 	 */
 	i = 0;
 	while (i < MAX_LEAP) {
 		dp = fgets(buf, NTP_MAXSTRLEN - 1, str);
 		if (dp == NULL)
 			break;
 
 		if (strlen(buf) < 1)
 			continue;
 
 		if (*buf == '#')
 			continue;
 
 		if (sscanf(buf, "%u %d", &leapsec[i], &offset) != 2)
 			continue;
 
 		if (i != offset - TAI_1972) 
 			break;
 
 		i++;
 	}
 	fclose(str);
 	if (dp != NULL) {
 		msyslog(LOG_INFO,
 		    "crypto_tai: leapseconds file %s error %d", cp,
 		    rval);
 		exit (-1);
 	}
 
 	/*
 	 * The extension field table entries consists of the NTP seconds
 	 * of leap insertion in network byte order.
 	 */
 	len = i * sizeof(u_int32);
 	tai_leap.vallen = htonl(len);
 	ptr = emalloc(len);
 	tai_leap.ptr = (u_char *)ptr;
 	for (j = 0; j < i; j++)
 		*ptr++ = htonl(leapsec[j]);
 	crypto_flags |= CRYPTO_FLAG_TAI;
 	snprintf(statstr, NTP_MAXSTRLEN, "%s fs %u leap %u len %u", cp, fstamp,
 	   leapsec[--j], len);
 	record_crypto_stats(NULL, statstr);
 #ifdef DEBUG
 	if (debug)
 		printf("crypto_tai: %s\n", statstr);
 #endif
 }
 
 
 /*
  * crypto_setup - load keys, certificate and leapseconds table
  *
  * This routine loads the public/private host key and certificate. If
  * available, it loads the public/private sign key, which defaults to
  * the host key, and leapseconds table. The host key must be RSA, but
  * the sign key can be either RSA or DSA. In either case, the public key
  * on the certificate must agree with the sign key.
  */
 void
 crypto_setup(void)
 {
 	EVP_PKEY *pkey;		/* private/public key pair */
 	char	filename[MAXFILENAME]; /* file name buffer */
 	l_fp	seed;		/* crypto PRNG seed as NTP timestamp */
 	tstamp_t fstamp;	/* filestamp */
 	tstamp_t sstamp;	/* sign filestamp */
 	u_int	len, bytes;
 	u_char	*ptr;
 
 	/*
 	 * Initialize structures.
 	 */
 	if (!crypto_flags)
 		return;
 
 	gethostname(filename, MAXFILENAME);
 	bytes = strlen(filename) + 1;
 	sys_hostname = emalloc(bytes);
 	memcpy(sys_hostname, filename, bytes);
 	if (passwd == NULL)
 		passwd = sys_hostname;
 	memset(&hostval, 0, sizeof(hostval));
 	memset(&pubkey, 0, sizeof(pubkey));
 	memset(&tai_leap, 0, sizeof(tai_leap));
 
 	/*
 	 * Load required random seed file and seed the random number
 	 * generator. Be default, it is found in the user home
 	 * directory. The root home directory may be / or /root,
 	 * depending on the system. Wiggle the contents a bit and write
 	 * it back so the sequence does not repeat when we next restart.
 	 */
 	ERR_load_crypto_strings();
 	if (rand_file == NULL) {
 		if ((RAND_file_name(filename, MAXFILENAME)) != NULL) {
 			rand_file = emalloc(strlen(filename) + 1);
 			strcpy(rand_file, filename);
 		}
 	} else if (*rand_file != '/') {
 		snprintf(filename, MAXFILENAME, "%s/%s", keysdir,
 		    rand_file);
 		free(rand_file);
 		rand_file = emalloc(strlen(filename) + 1);
 		strcpy(rand_file, filename);
 	}
 	if (rand_file == NULL) {
 		msyslog(LOG_ERR,
 		    "crypto_setup: random seed file not specified");
 		exit (-1);
 	}
 	if ((bytes = RAND_load_file(rand_file, -1)) == 0) {
 		msyslog(LOG_ERR,
 		    "crypto_setup: random seed file %s not found\n",
 		    rand_file);
 		exit (-1);
 	}
 	arc4random_buf(&seed, sizeof(l_fp));
 	RAND_seed(&seed, sizeof(l_fp));
 	RAND_write_file(rand_file);
 	OpenSSL_add_all_algorithms();
 #ifdef DEBUG
 	if (debug)
 		printf(
 		    "crypto_setup: OpenSSL version %lx random seed file %s bytes read %d\n",
 		    SSLeay(), rand_file, bytes);
 #endif
 
 	/*
 	 * Load required host key from file "ntpkey_host_<hostname>". It
 	 * also becomes the default sign key.
 	 */
 	if (host_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_host_%s",
 		    sys_hostname);
 		host_file = emalloc(strlen(filename) + 1);
 		strcpy(host_file, filename);
 	}
 	pkey = crypto_key(host_file, &fstamp);
 	if (pkey == NULL) {
 		msyslog(LOG_ERR,
 		    "crypto_setup: host key file %s not found or corrupt",
 		    host_file);
 		exit (-1);
 	}
 	host_pkey = pkey;
 	sign_pkey = pkey;
 	sstamp = fstamp;
 	hostval.fstamp = htonl(fstamp);
 	if (EVP_MD_type(host_pkey) != EVP_PKEY_RSA) {
 		msyslog(LOG_ERR,
 		    "crypto_setup: host key is not RSA key type");
 		exit (-1);
 	}
 	hostval.vallen = htonl(strlen(sys_hostname));
 	hostval.ptr = (u_char *)sys_hostname;
 	
 	/*
 	 * Construct public key extension field for agreement scheme.
 	 */
 	len = i2d_PublicKey(host_pkey, NULL);
 	ptr = emalloc(len);
 	pubkey.ptr = ptr;
 	i2d_PublicKey(host_pkey, &ptr);
 	pubkey.vallen = htonl(len);
 	pubkey.fstamp = hostval.fstamp;
 
 	/*
 	 * Load optional sign key from file "ntpkey_sign_<hostname>". If
 	 * loaded, it becomes the sign key.
 	 */
 	if (sign_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_sign_%s",
 		    sys_hostname);
 		sign_file = emalloc(strlen(filename) + 1);
 		strcpy(sign_file, filename);
 	}
 	pkey = crypto_key(sign_file, &fstamp);
 	if (pkey != NULL) {
 		sign_pkey = pkey;
 		sstamp = fstamp;
 	}
 	sign_siglen = EVP_PKEY_size(sign_pkey);
 
 	/*
 	 * Load optional IFF parameters from file
 	 * "ntpkey_iff_<hostname>".
 	 */
 	if (iffpar_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_iff_%s",
 		    sys_hostname);
 		iffpar_file = emalloc(strlen(filename) + 1);
 		strcpy(iffpar_file, filename);
 	}
 	iffpar_pkey = crypto_key(iffpar_file, &if_fstamp);
 	if (iffpar_pkey != NULL)
 		crypto_flags |= CRYPTO_FLAG_IFF;
 
 	/*
 	 * Load optional GQ parameters from file "ntpkey_gq_<hostname>".
 	 */
 	if (gqpar_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_gq_%s",
 		    sys_hostname);
 		gqpar_file = emalloc(strlen(filename) + 1);
 		strcpy(gqpar_file, filename);
 	}
 	gqpar_pkey = crypto_key(gqpar_file, &gq_fstamp);
 	if (gqpar_pkey != NULL)
 		crypto_flags |= CRYPTO_FLAG_GQ;
 
 	/*
 	 * Load optional MV parameters from file "ntpkey_mv_<hostname>".
 	 */
 	if (mvpar_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_mv_%s",
 		    sys_hostname);
 		mvpar_file = emalloc(strlen(filename) + 1);
 		strcpy(mvpar_file, filename);
 	}
 	mvpar_pkey = crypto_key(mvpar_file, &mv_fstamp);
 	if (mvpar_pkey != NULL)
 		crypto_flags |= CRYPTO_FLAG_MV;
 
 	/*
 	 * Load required certificate from file "ntpkey_cert_<hostname>".
 	 */
 	if (cert_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_cert_%s",
 		    sys_hostname);
 		cert_file = emalloc(strlen(filename) + 1);
 		strcpy(cert_file, filename);
 	}
 	if ((cinfo = crypto_cert(cert_file)) == NULL) {
 		msyslog(LOG_ERR,
 		    "certificate file %s not found or corrupt",
 		    cert_file);
 		exit (-1);
 	}
 
 	/*
 	 * The subject name must be the same as the host name, unless
 	 * the certificate is private, in which case it may have come
 	 * from another host.
 	 */
 	if (!(cinfo->flags & CERT_PRIV) && strcmp(cinfo->subject,
 	    sys_hostname) != 0) {
 		msyslog(LOG_ERR,
 		    "crypto_setup: certificate %s not for this host",
 		    cert_file);
 		cert_free(cinfo);
 		exit (-1);
 	}
 
 	/*
 	 * It the certificate is trusted, the subject must be the same
 	 * as the issuer, in other words it must be self signed.
 	 */
 	if (cinfo->flags & CERT_TRUST && strcmp(cinfo->subject,
 	    cinfo->issuer) != 0) {
 		if (cert_valid(cinfo, sign_pkey) != XEVNT_OK) {
 			msyslog(LOG_ERR,
 			    "crypto_setup: certificate %s is trusted, but not self signed.",
 			    cert_file);
 			cert_free(cinfo);
 			exit (-1);
 		}
 	}
 	sign_digest = cinfo->digest;
 	if (cinfo->flags & CERT_PRIV)
 		crypto_flags |= CRYPTO_FLAG_PRIV;
 	crypto_flags |= cinfo->nid << 16;
 
 	/*
 	 * Load optional leapseconds table from file "ntpkey_leap". If
 	 * the file is missing or defective, the values can later be
 	 * retrieved from a server.
 	 */
 	if (leap_file == NULL)
 		leap_file = "ntpkey_leap";
 	crypto_tai(leap_file);
 #ifdef DEBUG
 	if (debug)
 		printf(
 		    "crypto_setup: flags 0x%x host %s signature %s\n",
 		    crypto_flags, sys_hostname, OBJ_nid2ln(cinfo->nid));
 #endif
 }
 
 
 /*
  * crypto_config - configure data from crypto configuration command.
  */
 void
 crypto_config(
 	int	item,		/* configuration item */
 	char	*cp		/* file name */
 	)
 {
 	switch (item) {
 
 	/*
 	 * Set random seed file name.
 	 */
 	case CRYPTO_CONF_RAND:
 		rand_file = emalloc(strlen(cp) + 1);
 		strcpy(rand_file, cp);
 		break;
 
 	/*
 	 * Set private key password.
 	 */
 	case CRYPTO_CONF_PW:
 		passwd = emalloc(strlen(cp) + 1);
 		strcpy(passwd, cp);
 		break;
 
 	/*
 	 * Set host file name.
 	 */
 	case CRYPTO_CONF_PRIV:
 		host_file = emalloc(strlen(cp) + 1);
 		strcpy(host_file, cp);
 		break;
 
 	/*
 	 * Set sign key file name.
 	 */
 	case CRYPTO_CONF_SIGN:
 		sign_file = emalloc(strlen(cp) + 1);
 		strcpy(sign_file, cp);
 		break;
 
 	/*
 	 * Set iff parameters file name.
 	 */
 	case CRYPTO_CONF_IFFPAR:
 		iffpar_file = emalloc(strlen(cp) + 1);
 		strcpy(iffpar_file, cp);
 		break;
 
 	/*
 	 * Set gq parameters file name.
 	 */
 	case CRYPTO_CONF_GQPAR:
 		gqpar_file = emalloc(strlen(cp) + 1);
 		strcpy(gqpar_file, cp);
 		break;
 
 	/*
 	 * Set mv parameters file name.
 	 */
 	case CRYPTO_CONF_MVPAR:
 		mvpar_file = emalloc(strlen(cp) + 1);
 		strcpy(mvpar_file, cp);
 		break;
 
 	/*
 	 * Set identity scheme.
 	 */
 	case CRYPTO_CONF_IDENT:
 		if (!strcasecmp(cp, "iff"))
 			ident_scheme |= CRYPTO_FLAG_IFF;
 		else if (!strcasecmp(cp, "gq"))
 			ident_scheme |= CRYPTO_FLAG_GQ;
 		else if (!strcasecmp(cp, "mv"))
 			ident_scheme |= CRYPTO_FLAG_MV;
 		break;
 
 	/*
 	 * Set certificate file name.
 	 */
 	case CRYPTO_CONF_CERT:
 		cert_file = emalloc(strlen(cp) + 1);
 		strcpy(cert_file, cp);
 		break;
 
 	/*
 	 * Set leapseconds file name.
 	 */
 	case CRYPTO_CONF_LEAP:
 		leap_file = emalloc(strlen(cp) + 1);
 		strcpy(leap_file, cp);
 		break;
 	}
 	crypto_flags |= CRYPTO_FLAG_ENAB;
 }
 # else
 int ntp_crypto_bs_pubkey;
 # endif /* OPENSSL */
Index: stable/8/contrib/ntp/ntpd/ntp_proto.c
===================================================================
--- stable/8/contrib/ntp/ntpd/ntp_proto.c	(revision 281230)
+++ stable/8/contrib/ntp/ntpd/ntp_proto.c	(revision 281231)
@@ -1,3451 +1,3461 @@
 /*
  * ntp_proto.c - NTP version 4 protocol machinery
  *
  * ATTENTION: Get approval from Dave Mills on all changes to this file!
  *
  */
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 #include "ntpd.h"
 #include "ntp_stdlib.h"
 #include "ntp_unixtime.h"
 #include "ntp_control.h"
 #include "ntp_string.h"
 
 #include <stdio.h>
 
 #if defined(VMS) && defined(VMS_LOCALUNIT)	/*wjm*/
 #include "ntp_refclock.h"
 #endif
 
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
 #include <sys/sysctl.h>
 #endif
 
 /*
  * This macro defines the authentication state. If x is 1 authentication
  * is required; othewise it is optional.
  */
 #define	AUTH(x, y)	((x) ? (y) == AUTH_OK : (y) == AUTH_OK || \
 			    (y) == AUTH_NONE)
 
 /*
  * System variables are declared here. See Section 3.2 of the
  * specification.
  */
 u_char	sys_leap;		/* system leap indicator */
 u_char	sys_stratum;		/* stratum of system */
 s_char	sys_precision;		/* local clock precision (log2 s) */
 double	sys_rootdelay;		/* roundtrip delay to primary source */
 double	sys_rootdispersion;	/* dispersion to primary source */
 u_int32 sys_refid;		/* source/loop in network byte order */
 static	double sys_offset;	/* current local clock offset */
 l_fp	sys_reftime;		/* time we were last updated */
 struct	peer *sys_peer;		/* our current peer */
 struct	peer *sys_pps;		/* our PPS peer */
 struct	peer *sys_prefer;	/* our cherished peer */
 int	sys_kod;		/* kod credit */
 int	sys_kod_rate = 2;	/* max kod packets per second */
 #ifdef OPENSSL
 u_long	sys_automax;		/* maximum session key lifetime */
 #endif /* OPENSSL */
 
 /*
  * Nonspecified system state variables.
  */
 int	sys_bclient;		/* broadcast client enable */
 double	sys_bdelay;		/* broadcast client default delay */
 int	sys_calldelay;		/* modem callup delay (s) */
 int	sys_authenticate;	/* requre authentication for config */
 l_fp	sys_authdelay;		/* authentication delay */
 static	u_long sys_authdly[2];	/* authentication delay shift reg */
 static	double sys_mindisp = MINDISPERSE; /* min disp increment (s) */
 static	double sys_maxdist = MAXDISTANCE; /* selection threshold (s) */
 double	sys_jitter;		/* system jitter (s) */
 static	int sys_hopper;		/* anticlockhop counter */
 static	int sys_maxhop = MAXHOP; /* anticlockhop counter threshold */
 int	leap_next;		/* leap consensus */
 keyid_t	sys_private;		/* private value for session seed */
 int	sys_manycastserver;	/* respond to manycast client pkts */
 int	peer_ntpdate;		/* active peers in ntpdate mode */
 int	sys_survivors;		/* truest of the truechimers */
 #ifdef OPENSSL
 char	*sys_hostname;		/* gethostname() name */
 #endif /* OPENSSL */
 
 /*
  * TOS and multicast mapping stuff
  */
 int	sys_floor = 0;		/* cluster stratum floor */
 int	sys_ceiling = STRATUM_UNSPEC; /* cluster stratum ceiling */
 int	sys_minsane = 1;	/* minimum candidates */
 int	sys_minclock = NTP_MINCLOCK; /* minimum survivors */
 int	sys_maxclock = NTP_MAXCLOCK; /* maximum candidates */
 int	sys_cohort = 0;		/* cohort switch */
 int	sys_orphan = STRATUM_UNSPEC + 1; /* orphan stratum */
 double	sys_orphandelay = 0;	/* orphan root delay */
 int	sys_beacon = BEACON;	/* manycast beacon interval */
 int	sys_ttlmax;		/* max ttl mapping vector index */
 u_char	sys_ttl[MAX_TTL];	/* ttl mapping vector */
 
 /*
  * Statistics counters
  */
 u_long	sys_stattime;		/* time since reset */
 u_long	sys_received;		/* packets received */
 u_long	sys_processed;		/* packets processed */
 u_long	sys_newversionpkt;	/* current version */
 u_long	sys_oldversionpkt;	/* recent version */
 u_long	sys_unknownversion;	/* invalid version */
 u_long	sys_restricted;		/* access denied */
 u_long	sys_badlength;		/* bad length or format */
 u_long	sys_badauth;		/* bad authentication */
 u_long	sys_limitrejected;	/* rate exceeded */
 
 static	double	root_distance	P((struct peer *));
 static	void	clock_combine	P((struct peer **, int));
 static	void	peer_xmit	P((struct peer *));
 static	void	fast_xmit	P((struct recvbuf *, int, keyid_t,
 				    int));
 static	void	clock_update	P((void));
 static	int	default_get_precision	P((void));
 static	int	peer_unfit	P((struct peer *));
 
 
 /*
  * transmit - Transmit Procedure. See Section 3.4.2 of the
  *	specification.
  */
 void
 transmit(
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	int	hpoll;
 
 	/*
 	 * The polling state machine. There are two kinds of machines,
 	 * those that never expect a reply (broadcast and manycast
 	 * server modes) and those that do (all other modes). The dance
 	 * is intricate...
 	 */
 	/*
 	 * Orphan mode is active when enabled and when no servers less
 	 * than the orphan statum are available. In this mode packets
 	 * are sent at the orphan stratum. An orphan with no other
 	 * synchronization source is an orphan parent. It assumes root
 	 * delay zero and reference ID the loopback address. All others
 	 * are orphan children with root delay randomized over a 1-s
 	 * range. The root delay is used by the election algorithm to
 	 * select the order of synchronization.
 	 */
 	hpoll = peer->hpoll;
 	if (sys_orphan < STRATUM_UNSPEC && sys_peer == NULL) {
 		sys_leap = LEAP_NOWARNING;
 		sys_stratum = sys_orphan;
 		sys_refid = htonl(LOOPBACKADR);
 		sys_rootdelay = 0;
 		sys_rootdispersion = 0;
 	}
 
 	/*
 	 * In broadcast mode the poll interval is never changed from
 	 * minpoll.
 	 */
 	if (peer->cast_flags & (MDF_BCAST | MDF_MCAST)) {
 		peer->outdate = current_time;
 		peer_xmit(peer);
 		poll_update(peer, hpoll);
 		return;
 	}
 
 	/*
 	 * In manycast mode we start with unity ttl. The ttl is
 	 * increased by one for each poll until either sys_maxclock
 	 * servers have been found or the maximum ttl is reached. When
 	 * sys_maxclock servers are found we stop polling until one or
 	 * more servers have timed out or until less than minpoll
 	 * associations turn up. In this case additional better servers
 	 * are dragged in and preempt the existing ones.
 	 */
 	if (peer->cast_flags & MDF_ACAST) {
 		peer->outdate = current_time;
 		if (peer->unreach > sys_beacon) {
 			peer->unreach = 0;
 			peer->ttl = 0;
 			peer_xmit(peer);
 		} else if (sys_survivors < sys_minclock ||
 		    peer_preempt < sys_maxclock) {
 			if (peer->ttl < sys_ttlmax)
 				peer->ttl++;
 			peer_xmit(peer);
 		}
 		peer->unreach++;
 		poll_update(peer, hpoll);
 		return;
 	}
 
 	/*
 	 * In unicast modes the dance is much more intricate. It is
 	 * desigmed to back off whenever possible to minimize network
 	 * traffic.
 	 */
 	if (peer->burst == 0) {
 		u_char oreach;
 
 		/*
 		 * Update the reachability status. If not heard for
 		 * three consecutive polls, stuff infinity in the clock
 		 * filter. 
 		 */
 		oreach = peer->reach;
 		peer->outdate = current_time;
 		if (peer == sys_peer)
 			sys_hopper++;
 		peer->reach <<= 1;
 		if (!(peer->reach & 0x07))
 			clock_filter(peer, 0., 0., MAXDISPERSE);
 		if (!peer->reach) {
 
 			/*
 			 * Here the peer is unreachable. If it was
 			 * previously reachable, raise a trap.
 			 */
 			if (oreach) {
 				report_event(EVNT_UNREACH, peer);
 				peer->timereachable = current_time;
 			}
 
 			/*
 			 * Send a burst if enabled, but only once after
 			 * a peer becomes unreachable. If the prempt
 			 * flag is dim, bump the unreach counter by one;
 			 * otherwise, bump it by three.
 			 */
 			if (peer->flags & FLAG_IBURST &&
 			    peer->unreach == 0) {
 				peer->burst = NTP_BURST;
 			}
 			if (!(peer->flags & FLAG_PREEMPT))	
 				peer->unreach++;
 			else
 				peer->unreach += 3;
 		} else {
 
 			/*
 			 * Here the peer is reachable. Set the poll
 			 * interval to the system poll interval. Send a
 			 * burst only if enabled and the peer is fit.
 			 *
 			 * Respond to the peer evaluation produced by
 			 * the selection algorithm. If less than the
 			 * outlyer level, up the unreach by three. If
 			 * there are excess associations, up the unreach
 			 * by two if not a candidate and by one if so.
 			 */
 			if (!(peer->flags & FLAG_PREEMPT)) {
 				peer->unreach = 0;
 			} else if (peer->status < CTL_PST_SEL_SELCAND) {
 				peer->unreach += 3;
 			} else if (peer_preempt > sys_maxclock) {
 				if (peer->status < CTL_PST_SEL_SYNCCAND)
 					peer->unreach += 2;
 				else
 					peer->unreach++;
 			} else {
 				peer->unreach = 0;
 			}
 			hpoll = sys_poll;
 			if (peer->flags & FLAG_BURST &&
 			    !peer_unfit(peer))
 				peer->burst = NTP_BURST;
 		}
 
 		/*
 		 * Watch for timeout. If ephemeral or preemptable, toss
 		 * the rascal; otherwise, bump the poll interval.
 		 */ 
 		if (peer->unreach >= NTP_UNREACH) {
 			if (peer->flags & FLAG_PREEMPT ||
 			    !(peer->flags & FLAG_CONFIG)) {
 				peer_clear(peer, "TIME");
 				unpeer(peer);
 				return;
 			} else {
 				hpoll++;
 			}
 		}
 	} else {
 		peer->burst--;
 
 		/*
 		 * If a broadcast client at this point, the burst has
 		 * concluded, so we switch to client mode and purge the
 		 * keylist, since no further transmissions will be made.
 		 */
 		if (peer->burst == 0) {
 			if (peer->cast_flags & MDF_BCLNT) {
 				peer->hmode = MODE_BCLIENT;
 #ifdef OPENSSL
 				key_expire(peer);
 #endif /* OPENSSL */
 			}
 
 			/*
 			 * If ntpdate mode and the clock has not been
 			 * set and all peers have completed the burst,
 			 * we declare a successful failure.
 			 */
 			if (mode_ntpdate) {
 				peer_ntpdate--;
 				if (peer_ntpdate == 0) {
 					msyslog(LOG_NOTICE,
 					    "no reply; clock not set");
 					exit (0);
 				}
 			}
 		}
 	}
 
 	/*
 	 * Do not transmit if in broadcast client mode. 
 	 */
 	if (peer->hmode != MODE_BCLIENT)
 		peer_xmit(peer);
 	poll_update(peer, hpoll);
 }
 
 
 /*
  * receive - Receive Procedure.  See section 3.4.3 in the specification.
  */
 void
 receive(
 	struct recvbuf *rbufp
 	)
 {
 	register struct peer *peer;	/* peer structure pointer */
 	register struct pkt *pkt;	/* receive packet pointer */
 	int	hisversion;		/* packet version */
 	int	hisleap;		/* packet leap indicator */
 	int	hismode;		/* packet mode */
 	int	hisstratum;		/* packet stratum */
 	int	restrict_mask;		/* restrict bits */
 	int	has_mac;		/* length of MAC field */
 	int	authlen;		/* offset of MAC field */
 	int	is_authentic = 0;	/* cryptosum ok */
 	keyid_t	skeyid = 0;		/* key ID */
 	struct sockaddr_storage *dstadr_sin; /* active runway */
 	struct peer *peer2;		/* aux peer structure pointer */
 	l_fp	p_org;			/* origin timestamp */
 	l_fp	p_rec;			/* receive timestamp */
 	l_fp	p_xmt;			/* transmit timestamp */
 #ifdef OPENSSL
 	keyid_t tkeyid = 0;		/* temporary key ID */
 	keyid_t	pkeyid = 0;		/* previous key ID */
 	struct autokey *ap;		/* autokey structure pointer */
 	int	rval;			/* cookie snatcher */
 #endif /* OPENSSL */
 	int retcode = AM_NOMATCH;
 	int	at_listhead;
 
 	/*
 	 * Monitor the packet and get restrictions. Note that the packet
 	 * length for control and private mode packets must be checked
 	 * by the service routines. Note that no statistics counters are
 	 * recorded for restrict violations, since these counters are in
 	 * the restriction routine. Note the careful distinctions here
 	 * between a packet with a format error and a packet that is
 	 * simply discarded without prejudice. Some restrictions have to
 	 * be handled later in order to generate a kiss-of-death packet.
 	 */
 	/*
 	 * Bogus port check is before anything, since it probably
 	 * reveals a clogging attack.
 	 */
 	sys_received++;
 	if (SRCPORT(&rbufp->recv_srcadr) == 0) {
 		sys_badlength++;
 		return;				/* bogus port */
 	}
 	at_listhead = ntp_monitor(rbufp);
 	restrict_mask = restrictions(&rbufp->recv_srcadr, at_listhead);
 #ifdef DEBUG
 	if (debug > 1)
 		printf("receive: at %ld %s<-%s flags %x restrict %03x\n",
 		    current_time, stoa(&rbufp->dstadr->sin),
 		    stoa(&rbufp->recv_srcadr),
 		    rbufp->dstadr->flags, restrict_mask);
 #endif
 	if (restrict_mask & RES_IGNORE) {
 		sys_restricted++;
 		return;				/* ignore everything */
 	}
 	pkt = &rbufp->recv_pkt;
 	hisversion = PKT_VERSION(pkt->li_vn_mode);
 	hisleap = PKT_LEAP(pkt->li_vn_mode);
 	hismode = (int)PKT_MODE(pkt->li_vn_mode);
 	hisstratum = PKT_TO_STRATUM(pkt->stratum);
 	if (hismode == MODE_PRIVATE) {
 		if (restrict_mask & RES_NOQUERY) {
 			sys_restricted++;
 			return;			/* no query private */
 		}
 		process_private(rbufp, ((restrict_mask &
 		    RES_NOMODIFY) == 0));
 		return;
 	}
 	if (hismode == MODE_CONTROL) {
 		if (restrict_mask & RES_NOQUERY) {
 			sys_restricted++;
 			return;			/* no query control */
 		}
 		process_control(rbufp, restrict_mask);
 		return;
 	}
 	if (restrict_mask & RES_DONTSERVE) {
 		sys_restricted++;
 		return;				/* no time */
 	}
 	if (rbufp->recv_length < LEN_PKT_NOMAC) {
 		sys_badlength++;
 		return;				/* runt packet */
 	}
 	
 	/*
 	 * Version check must be after the query packets, since they
 	 * intentionally use early version.
 	 */
 	if (hisversion == NTP_VERSION) {
 		sys_newversionpkt++;		/* new version */
 	} else if (!(restrict_mask & RES_VERSION) && hisversion >=
 	    NTP_OLDVERSION) {
 		sys_oldversionpkt++;		/* previous version */
 	} else {
 		sys_unknownversion++;
 		return;				/* old version */
 	}
 
 	/*
 	 * Figure out his mode and validate the packet. This has some
 	 * legacy raunch that probably should be removed. In very early
 	 * NTP versions mode 0 was equivalent to what later versions
 	 * would interpret as client mode.
 	 */
 	if (hismode == MODE_UNSPEC) {
 		if (hisversion == NTP_OLDVERSION) {
 			hismode = MODE_CLIENT;
 		} else {
 			sys_badlength++;
 			return;                 /* invalid mode */
 		}
 	}
 
 	/*
 	 * Parse the extension field if present. We figure out whether
 	 * an extension field is present by measuring the MAC size. If
 	 * the number of words following the packet header is 0, no MAC
 	 * is present and the packet is not authenticated. If 1, the
 	 * packet is a crypto-NAK; if 3, the packet is authenticated
 	 * with DES; if 5, the packet is authenticated with MD5. If 2 or
 	 * 4, the packet is a runt and discarded forthwith. If greater
 	 * than 5, an extension field is present, so we subtract the
 	 * length of the field and go around again.
 	 */
 	authlen = LEN_PKT_NOMAC;
 	has_mac = rbufp->recv_length - authlen;
 	while (has_mac > 0) {
 		int temp;
 
-		if (has_mac % 4 != 0 || has_mac < 0) {
+		if (has_mac % 4 != 0 || has_mac < MIN_MAC_LEN) {
 			sys_badlength++;
 			return;			/* bad MAC length */
 		}
 		if (has_mac == 1 * 4 || has_mac == 3 * 4 || has_mac ==
 		    MAX_MAC_LEN) {
 			skeyid = ntohl(((u_int32 *)pkt)[authlen / 4]);
 			break;
 
 		} else if (has_mac > MAX_MAC_LEN) {
 			temp = ntohl(((u_int32 *)pkt)[authlen / 4]) &
 			    0xffff;
 			if (temp < 4 || temp > NTP_MAXEXTEN || temp % 4
 			    != 0) {
 				sys_badlength++;
 				return;		/* bad MAC length */
 			}
 			authlen += temp;
 			has_mac -= temp;
 		} else {
 			sys_badlength++;
 			return;			/* bad MAC length */
 		}
 	}
+	/*
+	 * If has_mac is < 0 we had a malformed packet.
+	 */
+	if (has_mac < 0) {
+		sys_badlength++;
+		return;		/* bad length */
+	}
 #ifdef OPENSSL
 	pkeyid = tkeyid = 0;
 #endif /* OPENSSL */
 
 	/*
 	 * We have tossed out as many buggy packets as possible early in
 	 * the game to reduce the exposure to a clogging attack. Now we
 	 * have to burn some cycles to find the association and
 	 * authenticate the packet if required. Note that we burn only
 	 * MD5 cycles, again to reduce exposure. There may be no
 	 * matching association and that's okay.
 	 *
 	 * More on the autokey mambo. Normally the local interface is
 	 * found when the association was mobilized with respect to a
 	 * designated remote address. We assume packets arriving from
 	 * the remote address arrive via this interface and the local
 	 * address used to construct the autokey is the unicast address
 	 * of the interface. However, if the sender is a broadcaster,
 	 * the interface broadcast address is used instead.
 	 & Notwithstanding this technobabble, if the sender is a
 	 * multicaster, the broadcast address is null, so we use the
 	 * unicast address anyway. Don't ask.
 	 */
 	peer = findpeer(&rbufp->recv_srcadr, rbufp->dstadr,  hismode,
 	    &retcode);
 	dstadr_sin = &rbufp->dstadr->sin;
 	NTOHL_FP(&pkt->org, &p_org);
 	NTOHL_FP(&pkt->rec, &p_rec);
 	NTOHL_FP(&pkt->xmt, &p_xmt);
 
 	/*
 	 * Authentication is conditioned by three switches:
 	 *
 	 * NOPEER  (RES_NOPEER) do not mobilize an association unless
 	 *         authenticated
 	 * NOTRUST (RES_DONTTRUST) do not allow access unless
 	 *         authenticated (implies NOPEER)
 	 * enable  (sys_authenticate) master NOPEER switch, by default
 	 *         on
 	 *
 	 * The NOPEER and NOTRUST can be specified on a per-client basis
 	 * using the restrict command. The enable switch if on implies
 	 * NOPEER for all clients. There are four outcomes:
 	 *
 	 * NONE    The packet has no MAC.
 	 * OK      the packet has a MAC and authentication succeeds
 	 * ERROR   the packet has a MAC and authentication fails
 	 * CRYPTO  crypto-NAK. The MAC has four octets only.
 	 *
 	 * Note: The AUTH(x, y) macro is used to filter outcomes. If x
 	 * is zero, acceptable outcomes of y are NONE and OK. If x is
 	 * one, the only acceptable outcome of y is OK.
 	 */
 	if (has_mac == 0) {
 		is_authentic = AUTH_NONE; /* not required */
 #ifdef DEBUG
 		if (debug)
 			printf("receive: at %ld %s<-%s mode %d code %d auth %d\n",
 			    current_time, stoa(dstadr_sin),
 			    stoa(&rbufp->recv_srcadr), hismode, retcode,
 			    is_authentic);
 #endif
 	} else if (has_mac == 4) {
 			is_authentic = AUTH_CRYPTO; /* crypto-NAK */
 #ifdef DEBUG
 		if (debug)
 			printf(
 			    "receive: at %ld %s<-%s mode %d code %d keyid %08x len %d mac %d auth %d\n",
 			    current_time, stoa(dstadr_sin),
 			    stoa(&rbufp->recv_srcadr), hismode, retcode,
 			    skeyid, authlen, has_mac, is_authentic);
 #endif
 	} else {
 #ifdef OPENSSL
 		/*
 		 * For autokey modes, generate the session key
 		 * and install in the key cache. Use the socket
 		 * broadcast or unicast address as appropriate.
 		 */
 		if (skeyid > NTP_MAXKEY) {
 		
 			/*
 			 * More on the autokey dance (AKD). A cookie is
 			 * constructed from public and private values.
 			 * For broadcast packets, the cookie is public
 			 * (zero). For packets that match no
 			 * association, the cookie is hashed from the
 			 * addresses and private value. For server
 			 * packets, the cookie was previously obtained
 			 * from the server. For symmetric modes, the
 			 * cookie was previously constructed using an
 			 * agreement protocol; however, should PKI be
 			 * unavailable, we construct a fake agreement as
 			 * the EXOR of the peer and host cookies.
 			 *
 			 * hismode	ephemeral	persistent
 			 * =======================================
 			 * active	0		cookie#
 			 * passive	0%		cookie#
 			 * client	sys cookie	0%
 			 * server	0%		sys cookie
 			 * broadcast	0		0
 			 *
 			 * # if unsync, 0
 			 * % can't happen
 			 */
 			if (hismode == MODE_BROADCAST) {
 
 				/*
 				 * For broadcaster, use the interface
 				 * broadcast address when available;
 				 * otherwise, use the unicast address
 				 * found when the association was
 				 * mobilized. However, if this is from
 				 * the wildcard interface, game over.
 				 */
 				if (crypto_flags && rbufp->dstadr ==
 				    any_interface) {
 					sys_restricted++;
 					return;	     /* no wildcard */
 				}
 				pkeyid = 0;
 				if (!SOCKNUL(&rbufp->dstadr->bcast))
 					dstadr_sin =
 					    &rbufp->dstadr->bcast;
 			} else if (peer == NULL) {
 				pkeyid = session_key(
 				    &rbufp->recv_srcadr, dstadr_sin, 0,
 				    sys_private, 0);
 			} else {
 				pkeyid = peer->pcookie;
 			}
 
 			/*
 			 * The session key includes both the public
 			 * values and cookie. In case of an extension
 			 * field, the cookie used for authentication
 			 * purposes is zero. Note the hash is saved for
 			 * use later in the autokey mambo.
 			 */
 			if (authlen > LEN_PKT_NOMAC && pkeyid != 0) {
 				session_key(&rbufp->recv_srcadr,
 				    dstadr_sin, skeyid, 0, 2);
 				tkeyid = session_key(
 				    &rbufp->recv_srcadr, dstadr_sin,
 				    skeyid, pkeyid, 0);
 			} else {
 				tkeyid = session_key(
 				    &rbufp->recv_srcadr, dstadr_sin,
 				    skeyid, pkeyid, 2);
 			}
 
 		}
 #endif /* OPENSSL */
 
 		/*
 		 * Compute the cryptosum. Note a clogging attack may
 		 * succeed in bloating the key cache. If an autokey,
 		 * purge it immediately, since we won't be needing it
 		 * again. If the packet is authentic, it can mobilize an
 		 * association. Note that there is no key zero.
 		 */
 		if (!authdecrypt(skeyid, (u_int32 *)pkt, authlen,
 		    has_mac)) {
 			is_authentic = AUTH_ERROR;
 			sys_badauth++;
 			return;
 		} else {
 			is_authentic = AUTH_OK;
 		}
 #ifdef OPENSSL
 		if (skeyid > NTP_MAXKEY)
 			authtrust(skeyid, 0);
 #endif /* OPENSSL */
 #ifdef DEBUG
 		if (debug)
 			printf(
 			    "receive: at %ld %s<-%s mode %d code %d keyid %08x len %d mac %d auth %d\n",
 			    current_time, stoa(dstadr_sin),
 			    stoa(&rbufp->recv_srcadr), hismode, retcode,
 			    skeyid, authlen, has_mac, is_authentic);
 #endif
 	}
 
 	/*
 	 * The association matching rules are implemented by a set of
 	 * routines and an association table. A packet matching an
 	 * association is processed by the peer process for that
 	 * association. If there are no errors, an ephemeral association
 	 * is mobilized: a broadcast packet mobilizes a broadcast client
 	 * aassociation; a manycast server packet mobilizes a manycast
 	 * client association; a symmetric active packet mobilizes a
 	 * symmetric passive association.
 	 */
 	switch (retcode) {
 
 	/*
 	 * This is a client mode packet not matching any association. If
 	 * an ordinary client, simply toss a server mode packet back
 	 * over the fence. If a manycast client, we have to work a
 	 * little harder.
 	 */
 	case AM_FXMIT:
 
 		/*
 		 * The vanilla case is when this is not a multicast
 		 * interface. If authentication succeeds, return a
 		 * server mode packet; if not and the key ID is nonzero,
 		 * return a crypto-NAK.
 		 */
 		if (!(rbufp->dstadr->flags & INT_MCASTOPEN)) {
 			if (AUTH(restrict_mask & RES_DONTTRUST,
 			   is_authentic))
 				fast_xmit(rbufp, MODE_SERVER, skeyid,
 				    restrict_mask);
 			else if (is_authentic == AUTH_ERROR)
 				fast_xmit(rbufp, MODE_SERVER, 0,
 				    restrict_mask);
 			return;			/* hooray */
 		}
 
 		/*
 		 * This must be manycast. Do not respond if not
 		 * configured as a manycast server.
 		 */
 		if (!sys_manycastserver) {
 			sys_restricted++;
 			return;			/* not enabled */
 		}
 
 		/*
 		 * Do not respond if unsynchronized or stratum is below
 		 * the floor or at or above the ceiling.
 		 */
 		if (sys_leap == LEAP_NOTINSYNC || sys_stratum <
 		    sys_floor || sys_stratum >= sys_ceiling)
 			return;			/* bad stratum */
 
 		/*
 		 * Do not respond if our stratum is greater than the
 		 * manycaster or it has already synchronized to us.
 		 */
 		if (sys_peer == NULL || hisstratum < sys_stratum ||
 		    (sys_cohort && hisstratum == sys_stratum) ||
 		    rbufp->dstadr->addr_refid == pkt->refid)
 			return;			/* no help */
 
 		/*
 		 * Respond only if authentication succeeds. Don't do a
 		 * crypto-NAK, as that would not be useful.
 		 */
 		if (AUTH(restrict_mask & RES_DONTTRUST, is_authentic))
 			fast_xmit(rbufp, MODE_SERVER, skeyid,
 			    restrict_mask);
 
 		return;				/* hooray */
 
 	/*
 	 * This is a server mode packet returned in response to a client
 	 * mode packet sent to a multicast group address. The origin
 	 * timestamp is a good nonce to reliably associate the reply
 	 * with what was sent. If there is no match, that's curious and
 	 * could be an intruder attempting to clog, so we just ignore
 	 * it.
 	 *
 	 * If the packet is authentic and the manycast association is
 	 * found, we mobilize a client association and copy pertinent
 	 * variables from the manycast association to the new client
 	 * association. If not, just ignore the packet.
 	 *
 	 * There is an implosion hazard at the manycast client, since
 	 * the manycast servers send the server packet immediately. If
 	 * the guy is already here, don't fire up a duplicate.
 	 */
 	case AM_MANYCAST:
 		if (!AUTH(sys_authenticate | (restrict_mask &
 		    (RES_NOPEER | RES_DONTTRUST)), is_authentic))
 			return;			/* bad auth */
 
 		if ((peer2 = findmanycastpeer(rbufp)) == NULL) {
 			sys_restricted++;
 			return;			/* not enabled */
 		}
 		if ((peer = newpeer(&rbufp->recv_srcadr,
 		    rbufp->dstadr, MODE_CLIENT,
 		    hisversion, NTP_MINDPOLL, NTP_MAXDPOLL,
 		    FLAG_IBURST | FLAG_PREEMPT, MDF_UCAST | MDF_ACLNT,
 		    0, skeyid)) == NULL)
 			return;			/* system error */
 
 		/*
 		 * We don't need these, but it warms the billboards.
 		 */
 		peer->ttl = peer2->ttl;
 		break;
 
 	/*
 	 * This is the first packet received from a broadcast server. If
 	 * the packet is authentic and we are enabled as broadcast
 	 * client, mobilize a broadcast client association. We don't
 	 * kiss any frogs here.
 	 */
 	case AM_NEWBCL:
 		if (!AUTH(sys_authenticate | (restrict_mask &
 		    (RES_NOPEER | RES_DONTTRUST)), is_authentic))
 			return;			/* bad auth */
 
 		/*
 		 * Do not respond if unsynchronized or stratum is below
 		 * the floor or at or above the ceiling.
 		 */
 		if (hisleap == LEAP_NOTINSYNC || hisstratum <
 		    sys_floor || hisstratum >= sys_ceiling)
 			return;			/* bad stratum */
 
 		switch (sys_bclient) {
 
 		/*
 		 * If not enabled, just skedaddle.
 		 */
 		case 0:
 			sys_restricted++;
 			return;			/* not enabled */
 
 		/*
 		 * Execute the initial volley in order to calibrate the
 		 * propagation delay and run the Autokey protocol, if
 		 * enabled.
 		 */
 		case 1:
 			if ((peer = newpeer(&rbufp->recv_srcadr,
 			    rbufp->dstadr, MODE_CLIENT, hisversion,
 			    NTP_MINDPOLL, NTP_MAXDPOLL, FLAG_MCAST |
 			    FLAG_IBURST, MDF_BCLNT, 0, skeyid)) ==
 			    NULL)
 				return;		/* system error */
 #ifdef OPENSSL
 			if (skeyid > NTP_MAXKEY)
 				crypto_recv(peer, rbufp);
 #endif /* OPENSSL */
 			return;			/* hooray */
 
 
 		/*
 		 * Do not execute the initial volley.
 		 */
 		case 2:
 #ifdef OPENSSL
 			/*
 			 * If a two-way exchange is not possible,
 			 * neither is Autokey.
 			 */
 			if (skeyid > NTP_MAXKEY) {
 				msyslog(LOG_INFO,
 				    "receive: autokey requires two-way communication");
 				return;		/* no autokey */
 			}
 #endif /* OPENSSL */
 			if ((peer = newpeer(&rbufp->recv_srcadr,
 			    rbufp->dstadr, MODE_BCLIENT, hisversion,
 			    NTP_MINDPOLL, NTP_MAXDPOLL, 0, MDF_BCLNT, 0,
 			    skeyid)) == NULL)
 				return;		/* system error */
 		}
 		break;
 
 	/*
 	 * This is the first packet received from a symmetric active
 	 * peer. If the packet is authentic and the first he sent,
 	 * mobilize a passive association. If not, kiss the frog.
 	 */
 	case AM_NEWPASS:
 
 		/*
 		 * If the inbound packet is correctly authenticated and
 		 * enabled, a symmetric passive association is
 		 * mobilized. If not but correctly authenticated, a
 		 * symmetric active response is sent. If authentication
 		 * fails, send a crypto-NAK packet. 
 		 */
 		if (!AUTH(restrict_mask & RES_DONTTRUST, is_authentic))
 		    {
 			if (is_authentic == AUTH_ERROR)
 				fast_xmit(rbufp, MODE_ACTIVE, 0,
 				    restrict_mask);
 			return;			/* bad auth */
 		}
 		if (!AUTH(sys_authenticate | (restrict_mask &
 		    RES_NOPEER), is_authentic)) {
 			fast_xmit(rbufp, MODE_ACTIVE, skeyid,
 			    restrict_mask);
 			return;			/* hooray */
 		}
 
 		/*
 		 * Do not respond if stratum is below the floor.
 		 */
 		if (hisstratum < sys_floor)
 			return;			/* bad stratum */
 
 		if ((peer = newpeer(&rbufp->recv_srcadr,
 		    rbufp->dstadr, MODE_PASSIVE, hisversion,
 		    NTP_MINDPOLL, NTP_MAXDPOLL, 0, MDF_UCAST, 0,
 		    skeyid)) == NULL)
 			return;			/* system error */
 		break;
 
 	/*
 	 * Process regular packet. Nothing special.
 	 */
 	case AM_PROCPKT:
 		break;
 
 	/*
 	 * A passive packet matches a passive association. This is
 	 * usually the result of reconfiguring a client on the fly. As
 	 * this association might be legitamate and this packet an
 	 * attempt to deny service, just ignore it.
 	 */
 	case AM_ERR:
 		return;
 
 	/*
 	 * For everything else there is the bit bucket.
 	 */
 	default:
 		return;
 	}
 	peer->flash &= ~PKT_TEST_MASK;
 
 	/*
 	 * Next comes a rigorous schedule of timestamp checking. If the
 	 * transmit timestamp is zero, the server is horribly broken.
 	 */
 	if (L_ISZERO(&p_xmt)) {
 		return;				/* read rfc1305 */
 
 	/*
 	 * If the transmit timestamp duplicates a previous one, the
 	 * packet is a replay. This prevents the bad guys from replaying
 	 * the most recent packet, authenticated or not.
 	 */
 	} else if (L_ISEQU(&peer->org, &p_xmt)) {
 		peer->flash |= TEST1;
 		peer->oldpkt++;
 		return;				/* duplicate packet */
 	
 
 	/*
 	 * If this is a broadcast mode packet, skip further checking.
 	 */
 	} else if (hismode != MODE_BROADCAST) {
 		if (L_ISZERO(&p_org))
 			peer->flash |= TEST3;	/* protocol unsynch */
 		else if (!L_ISEQU(&p_org, &peer->xmt))
 			peer->flash |= TEST2;	/* bogus packet */
 	}
 
 	/*
-	 * Update the origin and destination timestamps. If
-	 * unsynchronized or bogus abandon ship. If the crypto machine
+	 * If unsynchronized or bogus abandon ship. If the crypto machine
 	 * breaks, light the crypto bit and plaint the log.
 	 */
-	peer->org = p_xmt;
-	peer->rec = rbufp->recv_time;
 	if (peer->flash & PKT_TEST_MASK) {
 #ifdef OPENSSL
 		if (crypto_flags && (peer->flags & FLAG_SKEY)) {
 			rval = crypto_recv(peer, rbufp);
 			if (rval != XEVNT_OK) {
 				peer_clear(peer, "CRYP");
 				peer->flash |= TEST9; /* crypto error */
 			}
 		}
 #endif /* OPENSSL */
 		return;				/* unsynch */
 	}
 
 	/*
 	 * The timestamps are valid and the receive packet matches the
 	 * last one sent. If the packet is a crypto-NAK, the server
 	 * might have just changed keys. We reset the association
 	 * and restart the protocol.
 	 */
 	if (is_authentic == AUTH_CRYPTO) {
 		peer_clear(peer, "AUTH");
 		return;				/* crypto-NAK */
 
 	/* 
 	 * If the association is authenticated, the key ID is nonzero
 	 * and received packets must be authenticated. This is designed
 	 * to avoid a bait-and-switch attack, which was possible in past
 	 * versions. If symmetric modes, return a crypto-NAK. The peer
 	 * should restart the protocol.
 	 */
-	} else if (!AUTH(peer->keyid || (restrict_mask & RES_DONTTRUST),
-	    is_authentic)) {
+	} else if (!AUTH(peer->keyid || has_mac ||
+	    (restrict_mask & RES_DONTTRUST), is_authentic)) {
 		peer->flash |= TEST5;
-		if (hismode == MODE_ACTIVE || hismode == MODE_PASSIVE)
+		if (has_mac &&
+		    (hismode == MODE_ACTIVE || hismode == MODE_PASSIVE))
 			fast_xmit(rbufp, MODE_ACTIVE, 0, restrict_mask);
 		return;				/* bad auth */
 	}
 
 	/*
 	 * That was hard and I am sweaty, but the packet is squeaky
 	 * clean. Get on with real work.
+	 *
+	 * Update the origin and destination timestamps.
 	 */
+	peer->org = p_xmt;
+	peer->rec = rbufp->recv_time;
+
 	peer->received++;
 	peer->timereceived = current_time;
 	if (is_authentic == AUTH_OK)
 		peer->flags |= FLAG_AUTHENTIC;
 	else
 		peer->flags &= ~FLAG_AUTHENTIC;
 #ifdef OPENSSL
 	/*
 	 * More autokey dance. The rules of the cha-cha are as follows:
 	 *
 	 * 1. If there is no key or the key is not auto, do nothing.
 	 *
 	 * 2. If this packet is in response to the one just previously
 	 *    sent or from a broadcast server, do the extension fields.
 	 *    Otherwise, assume bogosity and bail out.
 	 *
 	 * 3. If an extension field contains a verified signature, it is
 	 *    self-authenticated and we sit the dance.
 	 *
 	 * 4. If this is a server reply, check only to see that the
 	 *    transmitted key ID matches the received key ID.
 	 *
 	 * 5. Check to see that one or more hashes of the current key ID
 	 *    matches the previous key ID or ultimate original key ID
 	 *    obtained from the broadcaster or symmetric peer. If no
 	 *    match, sit the dance and wait for timeout.
 	 *
 	 * In case of crypto error, fire the orchestra and stop dancing.
 	 * This is considered a permanant error, so light the crypto bit
 	 * to suppress further requests. If preemptable or ephemeral,
 	 * scuttle the ship.
 	 */
 	if (crypto_flags && (peer->flags & FLAG_SKEY)) {
 		peer->flash |= TEST8;
 		rval = crypto_recv(peer, rbufp);
 		if (rval != XEVNT_OK) {
 			peer_clear(peer, "CRYP");
 			peer->flash |= TEST9;	/* crypto error */
 			if (peer->flags & FLAG_PREEMPT ||
 			    !(peer->flags & FLAG_CONFIG))
 				unpeer(peer);
 			return;
 
 		} else if (hismode == MODE_SERVER) {
 			if (skeyid == peer->keyid)
 				peer->flash &= ~TEST8;
 		} else if (!(peer->flash & TEST8)) {
 			peer->pkeyid = skeyid;
 		} else if ((ap = (struct autokey *)peer->recval.ptr) !=
 		    NULL) {
 			int i;
 
 			for (i = 0; ; i++) {
 				if (tkeyid == peer->pkeyid ||
 				    tkeyid == ap->key) {
 					peer->flash &= ~TEST8;
 					peer->pkeyid = skeyid;
 					break;
 				}
 				if (i > ap->seq)
 					break;
 				tkeyid = session_key(
 				    &rbufp->recv_srcadr, dstadr_sin,
 				    tkeyid, pkeyid, 0);
 			}
 		}
 		if (!(peer->crypto & CRYPTO_FLAG_PROV)) /* test 9 */
 			peer->flash |= TEST8;	/* not proventic */
 
 		/*
 		 * If the transmit queue is nonempty, clamp the host
 		 * poll interval to the packet poll interval.
 		 */
 		if (peer->cmmd != 0) {
 			peer->ppoll = pkt->ppoll;
 			poll_update(peer, peer->hpoll);
 		}
 	}
 #endif /* OPENSSL */
 
 	/*
 	 * The dance is complete and the flash bits have been lit. Toss
 	 * the packet over the fence for processing, which may light up
 	 * more flashers.
 	 */
 	process_packet(peer, pkt);
 
 	/*
 	 * Well, that was nice. If TEST4 is lit, either the crypto
 	 * machine jammed or a kiss-o'-death packet flew in, either of
 	 * which is fatal.
 	 */
 	if (peer->flash & TEST4) {
 		msyslog(LOG_INFO, "receive: fatal error %04x for %s",
 		    peer->flash, stoa(&peer->srcadr));
 		return;
 	}
 }
 
 
 /*
  * process_packet - Packet Procedure, a la Section 3.4.4 of the
  *	specification. Or almost, at least. If we're in here we have a
  *	reasonable expectation that we will be having a long term
  *	relationship with this host.
  */
 void
 process_packet(
 	register struct peer *peer,
 	register struct pkt *pkt
 	)
 {
 	double	t34, t21;
 	double	p_offset, p_del, p_disp;
 	l_fp	p_rec, p_xmt, p_org, p_reftime;
 	l_fp	ci;
 	u_char	pmode, pleap, pstratum;
 
 	sys_processed++;
 	peer->processed++;
 	p_del = FPTOD(NTOHS_FP(pkt->rootdelay));
 	p_disp = FPTOD(NTOHS_FP(pkt->rootdispersion));
 	NTOHL_FP(&pkt->reftime, &p_reftime);
 	NTOHL_FP(&pkt->rec, &p_rec);
 	NTOHL_FP(&pkt->xmt, &p_xmt);
 	pmode = PKT_MODE(pkt->li_vn_mode);
 	pleap = PKT_LEAP(pkt->li_vn_mode);
 	if (pmode != MODE_BROADCAST)
 		NTOHL_FP(&pkt->org, &p_org);
 	else
 		p_org = peer->rec;
 	pstratum = PKT_TO_STRATUM(pkt->stratum);
 
 	/*
 	 * Test for kiss-o'death packet)
 	 */
 	if (pleap == LEAP_NOTINSYNC && pstratum == STRATUM_UNSPEC) {
 		if (memcmp(&pkt->refid, "DENY", 4) == 0) {
 			peer_clear(peer, "DENY");
 			peer->flash |= TEST4;	/* access denied */
 		}
 	}
 
 	/*
 	 * Capture the header values.
 	 */
 	record_raw_stats(&peer->srcadr, peer->dstadr ? &peer->dstadr->sin : NULL, &p_org,
 	    &p_rec, &p_xmt, &peer->rec);
 	peer->leap = pleap;
 	peer->stratum = min(pstratum, STRATUM_UNSPEC);
 	peer->pmode = pmode;
 	peer->ppoll = pkt->ppoll;
 	peer->precision = pkt->precision;
 	peer->rootdelay = p_del;
 	peer->rootdispersion = p_disp;
 	peer->refid = pkt->refid;		/* network byte order */
 	peer->reftime = p_reftime;
 
 	/*
 	 * Verify the server is synchronized; that is, the leap bits and
 	 * stratum are valid, the root delay and root dispersion are
 	 * valid and the reference timestamp is not later than the
 	 * transmit timestamp.
 	 */
 	if (pleap == LEAP_NOTINSYNC ||		/* test 6 */
 	    pstratum < sys_floor || pstratum >= sys_ceiling)
 		peer->flash |= TEST6;		/* peer not synch */
 	if (p_del < 0 || p_disp < 0 || p_del /	/* test 7 */
 	    2 + p_disp >= MAXDISPERSE || !L_ISHIS(&p_xmt, &p_reftime))
 		peer->flash |= TEST7;		/* bad header */
 
 	/*
 	 * If any tests fail at this point, the packet is discarded.
 	 * Note that some flashers may have already been set in the
 	 * receive() routine.
 	 */
 	if (peer->flash & PKT_TEST_MASK) {
 #ifdef DEBUG
 		if (debug)
 			printf("packet: flash header %04x\n",
 			    peer->flash);
 #endif
 		return;
 	}
 	if (!(peer->reach)) {
 		report_event(EVNT_REACH, peer);
 		peer->timereachable = current_time;
 	}
 	poll_update(peer, peer->hpoll);
 	peer->reach |= 1;
 
 	/*
 	 * For a client/server association, calculate the clock offset,
 	 * roundtrip delay and dispersion. The equations are reordered
 	 * from the spec for more efficient use of temporaries. For a
 	 * broadcast association, offset the last measurement by the
 	 * computed delay during the client/server volley. Note that
 	 * org has been set to the time of last reception. Note the
 	 * computation of dispersion includes the system precision plus
 	 * that due to the frequency error since the origin time.
 	 *
 	 * It is very important to respect the hazards of overflow. The
 	 * only permitted operation on raw timestamps is subtraction,
 	 * where the result is a signed quantity spanning from 68 years
 	 * in the past to 68 years in the future. To avoid loss of
 	 * precision, these calculations are done using 64-bit integer
 	 * arithmetic. However, the offset and delay calculations are
 	 * sums and differences of these first-order differences, which
 	 * if done using 64-bit integer arithmetic, would be valid over
 	 * only half that span. Since the typical first-order
 	 * differences are usually very small, they are converted to 64-
 	 * bit doubles and all remaining calculations done in floating-
 	 * point arithmetic. This preserves the accuracy while retaining
 	 * the 68-year span.
 	 *
 	 * Let t1 = p_org, t2 = p_rec, t3 = p_xmt, t4 = peer->rec:
 	 */
 	ci = p_xmt;			/* t3 - t4 */
 	L_SUB(&ci, &peer->rec);
 	LFPTOD(&ci, t34);
 	ci = p_rec;			/* t2 - t1 */
 	L_SUB(&ci, &p_org);
 	LFPTOD(&ci, t21);
 	ci = peer->rec;			/* t4 - t1 */
 	L_SUB(&ci, &p_org);
 
 	/*
 	 * If running in a broadcast association, the clock offset is
 	 * (t1 - t0) corrected by the one-way delay, but we can't
 	 * measure that directly. Therefore, we start up in MODE_CLIENT
 	 * mode, set FLAG_MCAST and exchange eight messages to determine
 	 * the clock offset. When the last message is sent, we switch to
 	 * MODE_BCLIENT mode. The next broadcast message after that
 	 * computes the broadcast offset and clears FLAG_MCAST.
 	 */
 	if (pmode == MODE_BROADCAST) {
 		p_offset = t34;
 		if (peer->flags & FLAG_MCAST) {
 			peer->estbdelay = peer->offset - p_offset;
 			if (peer->hmode == MODE_CLIENT)
 				return;
 
 			peer->flags &= ~(FLAG_MCAST | FLAG_BURST);
 		}
 		p_offset += peer->estbdelay;
 		p_del = peer->delay;
 		p_disp = 0;
 	} else {
 		p_offset = (t21 + t34) / 2.;
 		p_del = t21 - t34;
 		LFPTOD(&ci, p_disp);
 		p_disp = LOGTOD(sys_precision) +
 		    LOGTOD(peer->precision) + clock_phi * p_disp;
 	}
 	p_del = max(p_del, LOGTOD(sys_precision));
 	clock_filter(peer, p_offset, p_del, p_disp);
 	record_peer_stats(&peer->srcadr, ctlpeerstatus(peer),
 	    peer->offset, peer->delay, peer->disp, peer->jitter);
 }
 
 
 /*
  * clock_update - Called at system process update intervals.
  */
 static void
 clock_update(void)
 {
 	u_char	oleap;
 	u_char	ostratum;
 	double	dtemp;
 
 	/*
 	 * There must be a system peer at this point. If we just changed
 	 * the system peer, but have a newer sample from the old one,
 	 * wait until newer data are available.
 	 */
 	if (sys_poll < sys_peer->minpoll)
 		sys_poll = sys_peer->minpoll;
 	if (sys_poll > sys_peer->maxpoll)
 		sys_poll = sys_peer->maxpoll;
 	poll_update(sys_peer, sys_poll);
 	if (sys_peer->epoch <= sys_clocktime)
 		return;
 
 #ifdef DEBUG
 	if (debug)
 		printf("clock_update: at %ld assoc %d \n", current_time,
 		    peer_associations);
 #endif
 	oleap = sys_leap;
 	ostratum = sys_stratum;
 	switch (local_clock(sys_peer, sys_offset)) {
 
 	/*
 	 * Clock exceeds panic threshold. Life as we know it ends.
 	 */
 	case -1:
 		report_event(EVNT_SYSFAULT, NULL);
 		exit (-1);
 		/* not reached */
 
 	/*
 	 * Clock was stepped. Flush all time values of all peers.
 	 */
 	case 2:
 		clear_all();
 		sys_leap = LEAP_NOTINSYNC;
 		sys_stratum = STRATUM_UNSPEC;
 		sys_peer = NULL;
 		sys_rootdelay = 0;
 		sys_rootdispersion = 0;
 		memcpy(&sys_refid, "STEP", 4);
 		report_event(EVNT_CLOCKRESET, NULL);
 		break;
 
 	/*
 	 * Clock was slewed. Update the system stratum, leap bits, root
 	 * delay, root dispersion, reference ID and reference time. If
 	 * the leap changes, we gotta reroll the keys. Except for
 	 * reference clocks, the minimum dispersion increment is not
 	 * less than sys_mindisp.
 	 */
 	case 1:
 		sys_leap = leap_next;
 		sys_stratum = min(sys_peer->stratum + 1,
 		    STRATUM_UNSPEC);
 		sys_reftime = sys_peer->rec;
 
 		/*
 		 * In orphan mode the stratum defaults to the orphan
 		 * stratum. The root delay is set to a random value
 		 * generated at startup. The root dispersion is set from
 		 * the peer dispersion; the peer root dispersion is
 		 * ignored.
 		 */
 		dtemp = sys_peer->disp + clock_phi * (current_time -
 		    sys_peer->update) + sys_jitter +
 		    fabs(sys_peer->offset);
 #ifdef REFCLOCK
 		if (!(sys_peer->flags & FLAG_REFCLOCK) && dtemp <
 		    sys_mindisp)
 			dtemp = sys_mindisp;
 #else
 		if (dtemp < sys_mindisp)
 			dtemp = sys_mindisp;
 #endif /* REFCLOCK */
 		if (sys_stratum >= sys_orphan) {
 			sys_stratum = sys_orphan;
 			sys_rootdelay = sys_peer->delay;
 			sys_rootdispersion = dtemp;
 		} else {
 			sys_rootdelay = sys_peer->delay +
 			    sys_peer->rootdelay;
 			sys_rootdispersion = dtemp +
 			    sys_peer->rootdispersion;
 		}
 		if (oleap == LEAP_NOTINSYNC) {
 			report_event(EVNT_SYNCCHG, NULL);
 #ifdef OPENSSL
 			expire_all();
 			crypto_update();
 #endif /* OPENSSL */
 		}
 		break;
 	/*
 	 * Popcorn spike or step threshold exceeded. Pretend it never
 	 * happened.
 	 */
 	default:
 		break;
 	}
 	if (ostratum != sys_stratum)
 		report_event(EVNT_PEERSTCHG, NULL);
 }
 
 
 /*
  * poll_update - update peer poll interval
  */
 void
 poll_update(
 	struct peer *peer,
 	int	mpoll
 	)
 {
 	int	hpoll;
 
 	/*
 	 * This routine figures out when the next poll should be sent.
 	 * That turns out to be wickedly complicated. The big problem is
 	 * that sometimes the time for the next poll is in the past.
 	 * Watch out for races here between the receive process and the
 	 * poll process. The key assertion is that, if nextdate equals
 	 * current_time, the call is from the poll process; otherwise,
 	 * it is from the receive process.
 	 *
 	 * First, bracket the poll interval according to the type of
 	 * association and options. If a fixed interval is configured,
 	 * use minpoll. This primarily is for reference clocks, but
 	 * works for any association.
 	 */
 	if (peer->flags & FLAG_FIXPOLL) {
 		hpoll = peer->minpoll;
 
 	/*
 	 * The ordinary case; clamp the poll interval between minpoll
 	 * and maxpoll.
 	 */
 	} else {
 		hpoll = max(min(peer->maxpoll, mpoll), peer->minpoll);
 	}
 #ifdef OPENSSL
 	/*
 	 * Bit of crass arrogance at this point. If the poll interval
 	 * has changed and we have a keylist, the lifetimes in the
 	 * keylist are probably bogus. In this case purge the keylist
 	 * and regenerate it later.
 	 */
 	if (hpoll != peer->hpoll)
 		key_expire(peer);
 #endif /* OPENSSL */
 	peer->hpoll = hpoll;
 
 	/*
 	 * Now we figure out if there is an override. If during the
 	 * crypto protocol and a message is pending, make it wait not
 	 * more than two seconds.
 	 */
 #ifdef OPENSSL
 	if (peer->cmmd != NULL && (sys_leap != LEAP_NOTINSYNC ||
 	    peer->crypto)) {
 		peer->nextdate = current_time + RESP_DELAY;
 
 	/*
 	 * If we get called from the receive routine while a burst is
 	 * pending, just slink away. If from the poll routine and a
 	 * reference clock or a pending crypto response, delay for one
 	 * second. If this is the first sent in a burst, wait for the
 	 * modem to come up. For others in the burst, delay two seconds.
 	 */
 	} else if (peer->burst > 0) {
 #else /* OPENSSL */
 	if (peer->burst > 0) {
 #endif /* OPENSSL */
 		if (peer->nextdate != current_time)
 			return;
 #ifdef REFCLOCK
 		else if (peer->flags & FLAG_REFCLOCK)
 			peer->nextdate += RESP_DELAY;
 #endif /* REFCLOCK */
 		else if (peer->flags & (FLAG_IBURST | FLAG_BURST) &&
 		    peer->burst == NTP_BURST)
 			peer->nextdate += sys_calldelay;
 		else
 			peer->nextdate += BURST_DELAY;
 	/*
 	 * The ordinary case; use the minimum of the host and peer
 	 * intervals, but not less than minpoll. In other words,
 	 * oversampling is okay but understampling is evil.
 	 */
 	} else {
 		peer->nextdate = peer->outdate +
 		    RANDPOLL(max(min(peer->ppoll, hpoll),
 		    peer->minpoll));
 	}
 
 	/*
 	 * If the time for the next poll has already happened, bring it
 	 * up to the next second after this one. This way the only way
 	 * to get nexdate == current time is from the poll routine.
 	 */
 	if (peer->nextdate <= current_time)
 		peer->nextdate = current_time + 1;
 #ifdef DEBUG
 	if (debug > 1)
 		printf("poll_update: at %lu %s flags %04x poll %d burst %d last %lu next %lu\n",
 		    current_time, ntoa(&peer->srcadr), peer->flags,
 		    peer->hpoll, peer->burst, peer->outdate,
 		    peer->nextdate);
 #endif
 }
 
 /*
  * peer_crypto_clear - discard crypto information
  */
 void
 peer_crypto_clear(
 		  struct peer *peer
 		  )
 {
 	/*
 	 * If cryptographic credentials have been acquired, toss them to
 	 * Valhalla. Note that autokeys are ephemeral, in that they are
 	 * tossed immediately upon use. Therefore, the keylist can be
 	 * purged anytime without needing to preserve random keys. Note
 	 * that, if the peer is purged, the cryptographic variables are
 	 * purged, too. This makes it much harder to sneak in some
 	 * unauthenticated data in the clock filter.
 	 */
 	DPRINTF(1, ("peer_crypto_clear: at %ld next %ld assoc ID %d\n",
 		    current_time, peer->nextdate, peer->associd));
 
 #ifdef OPENSSL
 	peer->assoc = 0;
 	peer->crypto = 0;
 
 	if (peer->pkey != NULL)
 		EVP_PKEY_free(peer->pkey);
 	peer->pkey = NULL;
 
 	peer->digest = NULL;	/* XXX MEMLEAK? check whether this needs to be freed in any way - never was freed */
 
 	if (peer->subject != NULL)
 		free(peer->subject);
 	peer->subject = NULL;
 
 	if (peer->issuer != NULL)
 		free(peer->issuer);
 	peer->issuer = NULL;
 
 	peer->pkeyid = 0;
 
 	peer->pcookie = 0;
 
 	if (peer->ident_pkey != NULL)
 		EVP_PKEY_free(peer->ident_pkey);
 	peer->ident_pkey = NULL;
 	
 	memset(&peer->fstamp, 0, sizeof(peer->fstamp));
 
 	if (peer->iffval != NULL)
 		BN_free(peer->iffval);
 	peer->iffval = NULL;
 
 	if (peer->grpkey != NULL)
 		BN_free(peer->grpkey);
 	peer->grpkey = NULL;
 
 	value_free(&peer->cookval);
 	value_free(&peer->recval);
 
 	if (peer->cmmd != NULL) {
 		free(peer->cmmd);
 		peer->cmmd = NULL;
 	}
 
 	key_expire(peer);
 
 	value_free(&peer->encrypt);
 #endif /* OPENSSL */
 }
 
 /*
  * peer_clear - clear peer filter registers.  See Section 3.4.8 of the spec.
  */
 void
 peer_clear(
 	struct peer *peer,		/* peer structure */
 	char	*ident			/* tally lights */
 	)
 {
 	int	i;
 
 	peer_crypto_clear(peer);
 	
 	if (peer == sys_peer)
 		sys_peer = NULL;
 
 	/*
 	 * Wipe the association clean and initialize the nonzero values.
 	 */
 	memset(CLEAR_TO_ZERO(peer), 0, LEN_CLEAR_TO_ZERO);
 	peer->estbdelay = sys_bdelay;
 	peer->ppoll = peer->maxpoll;
 	peer->hpoll = peer->minpoll;
 	peer->disp = MAXDISPERSE;
 	peer->jitter = LOGTOD(sys_precision);
 	for (i = 0; i < NTP_SHIFT; i++) {
 		peer->filter_order[i] = i;
 		peer->filter_disp[i] = MAXDISPERSE;
 	}
 #ifdef REFCLOCK
 	if (!(peer->flags & FLAG_REFCLOCK)) {
 		peer->leap = LEAP_NOTINSYNC;
 		peer->stratum = STRATUM_UNSPEC;
 		memcpy(&peer->refid, ident, 4);
 	}
 #else
 	peer->leap = LEAP_NOTINSYNC;
 	peer->stratum = STRATUM_UNSPEC;
 	memcpy(&peer->refid, ident, 4);
 #endif /* REFCLOCK */
 
 	/*
 	 * During initialization use the association count to spread out
 	 * the polls at one-second intervals. Othersie, randomize over
 	 * the minimum poll interval in order to avoid broadcast
 	 * implosion.
 	 */
 	peer->nextdate = peer->update = peer->outdate = current_time;
 	if (initializing)
 		peer->nextdate += peer_associations;
 	else if (peer->hmode == MODE_PASSIVE)
 		peer->nextdate += RESP_DELAY;
 	else
 		peer->nextdate += (ntp_random() & ((1 << NTP_MINDPOLL) -
 		    1));
 
 	DPRINTF(1, ("peer_clear: at %ld next %ld assoc ID %d refid %s\n",
 		    current_time, peer->nextdate, peer->associd, ident));
 }
 
 
 /*
  * clock_filter - add incoming clock sample to filter register and run
  *		  the filter procedure to find the best sample.
  */
 void
 clock_filter(
 	struct peer *peer,		/* peer structure pointer */
 	double	sample_offset,		/* clock offset */
 	double	sample_delay,		/* roundtrip delay */
 	double	sample_disp		/* dispersion */
 	)
 {
 	double	dst[NTP_SHIFT];		/* distance vector */
 	int	ord[NTP_SHIFT];		/* index vector */
 	int	i, j, k, m;
 	double	dtemp, etemp;
 
 	/*
 	 * Shift the new sample into the register and discard the oldest
 	 * one. The new offset and delay come directly from the
 	 * timestamp calculations. The dispersion grows from the last
 	 * outbound packet or reference clock update to the present time
 	 * and increased by the sum of the peer precision and the system
 	 * precision. The delay can sometimes swing negative due to
 	 * frequency skew, so it is clamped non-negative.
 	 */
 	j = peer->filter_nextpt;
 	peer->filter_offset[j] = sample_offset;
 	peer->filter_delay[j] = max(0, sample_delay);
 	peer->filter_disp[j] = sample_disp;
 	peer->filter_epoch[j] = current_time;
 	j = (j + 1) % NTP_SHIFT;
 	peer->filter_nextpt = j;
 
 	/*
 	 * Update dispersions since the last update and at the same
 	 * time initialize the distance and index lists. The distance
 	 * list uses a compound metric. If the sample is valid and
 	 * younger than the minimum Allan intercept, use delay;
 	 * otherwise, use biased dispersion.
 	 */
 	dtemp = clock_phi * (current_time - peer->update);
 	peer->update = current_time;
 	for (i = NTP_SHIFT - 1; i >= 0; i--) {
 		if (i != 0)
 			peer->filter_disp[j] += dtemp;
 		if (peer->filter_disp[j] >= MAXDISPERSE) 
 			peer->filter_disp[j] = MAXDISPERSE;
 		if (peer->filter_disp[j] >= MAXDISPERSE)
 			dst[i] = MAXDISPERSE;
 		else if (peer->update - peer->filter_epoch[j] >
 		    allan_xpt)
 			dst[i] = sys_maxdist + peer->filter_disp[j];
 		else
 			dst[i] = peer->filter_delay[j];
 		ord[i] = j;
 		j++; j %= NTP_SHIFT;
 	}
 
         /*
 	 * If the clock discipline has stabilized, sort the samples in
 	 * both lists by distance. Note, we do not displace a higher
 	 * distance sample by a lower distance one unless lower by at
 	 * least the precision.  
 	 */
 	if (state == 4) {
 		for (i = 1; i < NTP_SHIFT; i++) {
 			for (j = 0; j < i; j++) {
 				if (dst[j] > dst[i] +
 				    LOGTOD(sys_precision)) {
 					k = ord[j];
 					ord[j] = ord[i];
 					ord[i] = k;
 					etemp = dst[j];
 					dst[j] = dst[i];
 					dst[i] = etemp;
 				}
 			}
 		}
 	}
 
 	/*
 	 * Copy the index list to the association structure so ntpq
 	 * can see it later. Prune the distance list to samples less
 	 * than max distance, but keep at least two valid samples for
 	 * jitter calculation.
 	 */
 	m = 0;
 	for (i = 0; i < NTP_SHIFT; i++) {
 		peer->filter_order[i] = (u_char) ord[i];
 		if (dst[i] >= MAXDISPERSE || (m >= 2 && dst[i] >=
 		    sys_maxdist))
 			continue;
 		m++;
 	}
 	
 	/*
 	 * Compute the dispersion and jitter. The dispersion is weighted
 	 * exponentially by NTP_FWEIGHT (0.5) so it is normalized close
 	 * to 1.0. The jitter is the RMS differences relative to the
 	 * lowest delay sample. If no acceptable samples remain in the
 	 * shift register, quietly tiptoe home leaving only the
 	 * dispersion.
 	 */
 	peer->disp = peer->jitter = 0;
 	k = ord[0];
 	for (i = NTP_SHIFT - 1; i >= 0; i--) {
 		j = ord[i];
 		peer->disp = NTP_FWEIGHT * (peer->disp +
 		    peer->filter_disp[j]);
 		if (i < m)
 			peer->jitter += DIFF(peer->filter_offset[j],
 			    peer->filter_offset[k]);
 	}
 
 	/*
 	 * If no acceptable samples remain in the shift register,
 	 * quietly tiptoe home leaving only the dispersion. Otherwise,
 	 * save the offset, delay and jitter. Note the jitter must not
 	 * be less than the precision.
 	 */
 	if (m == 0)
 		return;
 
 	etemp = fabs(peer->offset - peer->filter_offset[k]);
 	peer->offset = peer->filter_offset[k];
 	peer->delay = peer->filter_delay[k];
 	if (m > 1)
 		peer->jitter /= m - 1;
 	peer->jitter = max(SQRT(peer->jitter), LOGTOD(sys_precision));
 
 	/*
 	 * A new sample is useful only if it is younger than the last
 	 * one used. Note the order is FIFO if the clock discipline has
 	 * not stabilized.
 	 */
 	if (peer->filter_epoch[k] <= peer->epoch) {
 #ifdef DEBUG
 		if (debug)
 			printf("clock_filter: discard %lu\n",
 			    peer->epoch - peer->filter_epoch[k]);
 #endif
 		return;
 	}
 
 	/*
 	 * If the difference between the last offset and the current one
 	 * exceeds the jitter by CLOCK_SGATE and the interval since the
 	 * last update is less than twice the system poll interval,
 	 * consider the update a popcorn spike and ignore it.
 	 */
 	if (etemp > CLOCK_SGATE * peer->jitter && m > 1 &&
 	    peer->filter_epoch[k] - peer->epoch < 2. *
 	    ULOGTOD(sys_poll)) {
 #ifdef DEBUG
 		if (debug)
 			printf("clock_filter: popcorn %.6f %.6f\n",
 			    etemp, dtemp);
 #endif
 		return;
 	}
 
 	/*
 	 * The mitigated sample statistics are saved for later
 	 * processing. If not in a burst, tickle the select.
 	 */
 	peer->epoch = peer->filter_epoch[k];
 #ifdef DEBUG
 	if (debug)
 		printf(
 		    "clock_filter: n %d off %.6f del %.6f dsp %.6f jit %.6f, age %lu\n",
 		    m, peer->offset, peer->delay, peer->disp,
 		    peer->jitter, current_time - peer->epoch);
 #endif
 	if (peer->burst == 0 || sys_leap == LEAP_NOTINSYNC)
 		clock_select();
 }
 
 
 /*
  * clock_select - find the pick-of-the-litter clock
  *
  * LOCKCLOCK: If the local clock is the prefer peer, it will always be
  * enabled, even if declared falseticker, (2) only the prefer peer can
  * be selected as the system peer, (3) if the external source is down,
  * the system leap bits are set to 11 and the stratum set to infinity.
  */
 void
 clock_select(void)
 {
 	struct peer *peer;
 	int	i, j, k, n;
 	int	nlist, nl3;
 
 	int	allow, osurv;
 	double	d, e, f, g;
 	double	high, low;
 	double	synch[NTP_MAXASSOC], error[NTP_MAXASSOC];
 	struct peer *osys_peer;
 	struct peer *typeacts = NULL;
 	struct peer *typelocal = NULL;
 	struct peer *typesystem = NULL;
 
 	static int list_alloc = 0;
 	static struct endpoint *endpoint = NULL;
 	static int *indx = NULL;
 	static struct peer **peer_list = NULL;
 	static u_int endpoint_size = 0;
 	static u_int indx_size = 0;
 	static u_int peer_list_size = 0;
 
 	/*
 	 * Initialize and create endpoint, index and peer lists big
 	 * enough to handle all associations.
 	 */
 	osys_peer = sys_peer;
 	sys_peer = NULL;
 	sys_pps = NULL;
 	sys_prefer = NULL;
 	osurv = sys_survivors;
 	sys_survivors = 0;
 #ifdef LOCKCLOCK
 	sys_leap = LEAP_NOTINSYNC;
 	sys_stratum = STRATUM_UNSPEC;
 	memcpy(&sys_refid, "DOWN", 4);
 #endif /* LOCKCLOCK */
 	nlist = 0;
 	for (n = 0; n < NTP_HASH_SIZE; n++)
 		nlist += peer_hash_count[n];
 	if (nlist > list_alloc) {
 		if (list_alloc > 0) {
 			free(endpoint);
 			free(indx);
 			free(peer_list);
 		}
 		while (list_alloc < nlist) {
 			list_alloc += 5;
 			endpoint_size += 5 * 3 * sizeof(*endpoint);
 			indx_size += 5 * 3 * sizeof(*indx);
 			peer_list_size += 5 * sizeof(*peer_list);
 		}
 		endpoint = (struct endpoint *)emalloc(endpoint_size);
 		indx = (int *)emalloc(indx_size);
 		peer_list = (struct peer **)emalloc(peer_list_size);
 	}
 
 	/*
 	 * Initially, we populate the island with all the rifraff peers
 	 * that happen to be lying around. Those with seriously
 	 * defective clocks are immediately booted off the island. Then,
 	 * the falsetickers are culled and put to sea. The truechimers
 	 * remaining are subject to repeated rounds where the most
 	 * unpopular at each round is kicked off. When the population
 	 * has dwindled to sys_minclock, the survivors split a million
 	 * bucks and collectively crank the chimes.
 	 */
 	nlist = nl3 = 0;	/* none yet */
 	for (n = 0; n < NTP_HASH_SIZE; n++) {
 		for (peer = peer_hash[n]; peer != NULL; peer =
 		    peer->next) {
 			peer->flags &= ~FLAG_SYSPEER;
 			peer->status = CTL_PST_SEL_REJECT;
 
 			/*
 			 * Leave the island immediately if the peer is
 			 * unfit to synchronize.
 			 */
 			if (peer_unfit(peer))
 				continue;
 
 			/*
 			 * Don't allow the local clock or modem drivers
 			 * in the kitchen at this point, unless the
 			 * prefer peer. Do that later, but only if
 			 * nobody else is around. These guys are all
 			 * configured, so we never throw them away.
 			 */
 #ifdef REFCLOCK
 			if (peer->refclktype == REFCLK_LOCALCLOCK
 #if defined(VMS) && defined(VMS_LOCALUNIT)
 			/* wjm: VMS_LOCALUNIT taken seriously */
 			    && REFCLOCKUNIT(&peer->srcadr) !=
 			    VMS_LOCALUNIT
 #endif	/* VMS && VMS_LOCALUNIT */
 				) {
 				typelocal = peer;
 #ifndef LOCKCLOCK
 				if (!(peer->flags & FLAG_PREFER))
 					continue; /* no local clock */
 #endif /* LOCKCLOCK */
 			}
 			if (peer->sstclktype == CTL_SST_TS_TELEPHONE) {
 				typeacts = peer;
 				if (!(peer->flags & FLAG_PREFER))
 					continue; /* no acts */
 			}
 #endif /* REFCLOCK */
 
 			/*
 			 * If we get this far, the peer can stay on the
 			 * island, but does not yet have the immunity
 			 * idol.
 			 */
 			peer->status = CTL_PST_SEL_SANE;
 			peer_list[nlist++] = peer;
 
 			/*
 			 * Insert each interval endpoint on the sorted
 			 * list.
 			 */
 			e = peer->offset;	 /* Upper end */
 			f = root_distance(peer);
 			e = e + f;
 			for (i = nl3 - 1; i >= 0; i--) {
 				if (e >= endpoint[indx[i]].val)
 					break;
 
 				indx[i + 3] = indx[i];
 			}
 			indx[i + 3] = nl3;
 			endpoint[nl3].type = 1;
 			endpoint[nl3++].val = e;
 
 			e = e - f;		/* Center point */
 			for (; i >= 0; i--) {
 				if (e >= endpoint[indx[i]].val)
 					break;
 
 				indx[i + 2] = indx[i];
 			}
 			indx[i + 2] = nl3;
 			endpoint[nl3].type = 0;
 			endpoint[nl3++].val = e;
 
 			e = e - f;		/* Lower end */
 			for (; i >= 0; i--) {
 				if (e >= endpoint[indx[i]].val)
 					break;
 
 				indx[i + 1] = indx[i];
 			}
 			indx[i + 1] = nl3;
 			endpoint[nl3].type = -1;
 			endpoint[nl3++].val = e;
 		}
 	}
 #ifdef DEBUG
 	if (debug > 2)
 		for (i = 0; i < nl3; i++)
 			printf("select: endpoint %2d %.6f\n",
 			   endpoint[indx[i]].type,
 			   endpoint[indx[i]].val);
 #endif
 	/*
 	 * This is the actual algorithm that cleaves the truechimers
 	 * from the falsetickers. The original algorithm was described
 	 * in Keith Marzullo's dissertation, but has been modified for
 	 * better accuracy.
 	 *
 	 * Briefly put, we first assume there are no falsetickers, then
 	 * scan the candidate list first from the low end upwards and
 	 * then from the high end downwards. The scans stop when the
 	 * number of intersections equals the number of candidates less
 	 * the number of falsetickers. If this doesn't happen for a
 	 * given number of falsetickers, we bump the number of
 	 * falsetickers and try again. If the number of falsetickers
 	 * becomes equal to or greater than half the number of
 	 * candidates, the Albanians have won the Byzantine wars and
 	 * correct synchronization is not possible.
 	 *
 	 * Here, nlist is the number of candidates and allow is the
 	 * number of falsetickers. Upon exit, the truechimers are the
 	 * susvivors with offsets not less than low and not greater than
 	 * high. There may be none of them.
 	 */
 	low = 1e9;
 	high = -1e9;
 	for (allow = 0; 2 * allow < nlist; allow++) {
 		int	found;
 
 		/*
 		 * Bound the interval (low, high) as the largest
 		 * interval containing points from presumed truechimers.
 		 */
 		found = 0;
 		n = 0;
 		for (i = 0; i < nl3; i++) {
 			low = endpoint[indx[i]].val;
 			n -= endpoint[indx[i]].type;
 			if (n >= nlist - allow)
 				break;
 			if (endpoint[indx[i]].type == 0)
 				found++;
 		}
 		n = 0;
 		for (j = nl3 - 1; j >= 0; j--) {
 			high = endpoint[indx[j]].val;
 			n += endpoint[indx[j]].type;
 			if (n >= nlist - allow)
 				break;
 			if (endpoint[indx[j]].type == 0)
 				found++;
 		}
 
 		/*
 		 * If the number of candidates found outside the
 		 * interval is greater than the number of falsetickers,
 		 * then at least one truechimer is outside the interval,
 		 * so go around again. This is what makes this algorithm
 		 * different than Marzullo's.
 		 */
 		if (found > allow)
 			continue;
 
 		/*
 		 * If an interval containing truechimers is found, stop.
 		 * If not, increase the number of falsetickers and go
 		 * around again.
 		 */
 		if (high > low)
 			break;
 	}
 
 	/*
 	 * Clustering algorithm. Construct candidate list in order first
 	 * by stratum then by root distance, but keep only the best
 	 * NTP_MAXASSOC of them. Scan the list to find falsetickers, who
 	 * leave the island immediately. The TRUE peer is always a
 	 * truechimer. We must leave at least one peer to collect the
 	 * million bucks. If in orphan mode, rascals found with lower
 	 * stratum are guaranteed a seat on the bus.
 	 */
 	j = 0;
 	for (i = 0; i < nlist; i++) {
 		peer = peer_list[i];
 		if (nlist > 1 && (peer->offset <= low || peer->offset >=
 		    high) && !(peer->flags & FLAG_TRUE) &&
 		    !(sys_stratum >= sys_orphan && peer->stratum <
 		    sys_orphan))
 			continue;
 
 		peer->status = CTL_PST_SEL_DISTSYSPEER;
 
 		/*
 		 * The order metric is formed from the stratum times
 		 * max distance (1.) plus the root distance. It strongly
 		 * favors the lowest stratum, but a higher stratum peer
 		 * can capture the clock if the low stratum dominant
 		 * hasn't been heard for awhile.
 		 */
 		d = root_distance(peer) + peer->stratum * sys_maxdist;
 		if (j >= NTP_MAXASSOC) {
 			if (d >= synch[j - 1])
 				continue;
 			else
 				j--;
 		}
 		for (k = j; k > 0; k--) {
 			if (d >= synch[k - 1])
 				break;
 
 			peer_list[k] = peer_list[k - 1];
 			error[k] = error[k - 1];
 			synch[k] = synch[k - 1];
 		}
 		peer_list[k] = peer;
 		error[k] = peer->jitter;
 		synch[k] = d;
 		j++;
 	}
 	nlist = j;
 
 	/*
 	 * If no survivors remain at this point, check if the local
 	 * clock or modem drivers have been found. If so, nominate one
 	 * of them as the only survivor. Otherwise, give up and leave
 	 * the island to the rats.
 	 */
 	if (nlist == 0) {
 		if (typeacts != 0) {
 			typeacts->status = CTL_PST_SEL_DISTSYSPEER;
 			peer_list[0] = typeacts;
 			nlist = 1;
 		} else if (typelocal != 0) {
 			typelocal->status = CTL_PST_SEL_DISTSYSPEER;
 			peer_list[0] = typelocal;
 			nlist = 1;
 		} else {
 			if (osys_peer != NULL) {
 				NLOG(NLOG_SYNCSTATUS)
 				    msyslog(LOG_INFO,
 				    "no servers reachable");
 				report_event(EVNT_PEERSTCHG, NULL);
 			}
 		}
 	}
 
 	/*
 	 * We can only trust the survivors if the number of candidates
 	 * sys_minsane is at least the number required to detect and
 	 * cast out one falsticker. For the Byzantine agreement
 	 * algorithm used here, that number is 4; however, the default
 	 * sys_minsane is 1 to speed initial synchronization. Careful
 	 * operators will tinker a higher value and use at least that
 	 * number of synchronization sources.
 	 */
 	if (nlist < sys_minsane)
 		return;
 
 	for (i = 0; i < nlist; i++)
 		peer_list[i]->status = CTL_PST_SEL_SELCAND;
 
 	/*
 	 * Now, vote outlyers off the island by select jitter weighted
 	 * by root distance. Continue voting as long as there are more
 	 * than sys_minclock survivors and the minimum select jitter is
 	 * greater than the maximum peer jitter. Stop if we are about to
 	 * discard a TRUE or PREFER  peer, who of course has the
 	 * immunity idol.
 	 */
 	while (1) {
 		d = 1e9;
 		e = -1e9;
 		f = g = 0;
 		k = 0;
 		for (i = 0; i < nlist; i++) {
 			if (error[i] < d)
 				d = error[i];
 			f = 0;
 			if (nlist > 1) {
 				for (j = 0; j < nlist; j++)
 					f += DIFF(peer_list[j]->offset,
 					    peer_list[i]->offset);
 				f = SQRT(f / (nlist - 1));
 			}
 			if (f * synch[i] > e) {
 				g = f;
 				e = f * synch[i];
 				k = i;
 			}
 		}
 		f = max(f, LOGTOD(sys_precision));
 		if (nlist <= sys_minclock || f <= d ||
 		    peer_list[k]->flags & (FLAG_TRUE | FLAG_PREFER))
 			break;
 #ifdef DEBUG
 		if (debug > 2)
 			printf(
 			    "select: drop %s select %.6f jitter %.6f\n",
 			    ntoa(&peer_list[k]->srcadr), g, d);
 #endif
 		for (j = k + 1; j < nlist; j++) {
 			peer_list[j - 1] = peer_list[j];
 			error[j - 1] = error[j];
 		}
 		nlist--;
 	}
 
 	/*
 	 * What remains is a list usually not greater than sys_minclock
 	 * peers. We want only a peer at the lowest stratum to become
 	 * the system peer, although all survivors are eligible for the
 	 * combining algorithm. Consider each peer in turn and OR the
 	 * leap bits on the assumption that, if some of them honk
 	 * nonzero bits, they must know what they are doing. Check for
 	 * prefer and pps peers at any stratum. Note that the head of
 	 * the list is at the lowest stratum and that unsynchronized
 	 * peers cannot survive this far.
 	 */
 	leap_next = 0;
 	for (i = 0; i < nlist; i++) {
 		peer = peer_list[i];
 		sys_survivors++;
 		leap_next |= peer->leap;
 		peer->status = CTL_PST_SEL_SYNCCAND;
 		if (peer->flags & FLAG_PREFER)
 			sys_prefer = peer;
 		if (peer == osys_peer)
 			typesystem = peer;
 #ifdef REFCLOCK
 		if (peer->refclktype == REFCLK_ATOM_PPS)
 			sys_pps = peer;
 #endif /* REFCLOCK */
 #if DEBUG
 		if (debug > 1)
 			printf("cluster: survivor %s metric %.6f\n",
 			    ntoa(&peer_list[i]->srcadr), synch[i]);
 #endif
 	}
 
 	/*
 	 * Anticlockhop provision. Keep the current system peer if it is
 	 * a survivor but not first in the list. But do that only HOPPER
 	 * times.
 	 */
 	if (osys_peer == NULL || typesystem == NULL || typesystem ==
 	    peer_list[0] || sys_hopper > sys_maxhop) {
 		typesystem = peer_list[0];
 		sys_hopper = 0;
 	} else {
 		peer->selbroken++;
 	}
 
 	/*
 	 * Mitigation rules of the game. There are several types of
 	 * peers that can be selected here: (1) orphan, (2) prefer peer
 	 * (flag FLAG_PREFER) (3) pps peers (type REFCLK_ATOM_PPS), (4)
 	 * the existing system peer, if any, and (5) the head of the
 	 * survivor list.
 	 */
 	if (typesystem->stratum >= sys_orphan) {
 
 		/*
 		 * If in orphan mode, choose the system peer. If the
 		 * lowest distance, we are the orphan parent and the
 		 * offset is zero.
 		 */
 		sys_peer = typesystem;
 		sys_peer->status = CTL_PST_SEL_SYSPEER;
 		if (sys_orphandelay < sys_peer->rootdelay) {
 			sys_offset = 0;
 			sys_refid = htonl(LOOPBACKADR);
 		} else {
 			sys_offset = sys_peer->offset;
 			sys_refid = addr2refid(&sys_peer->srcadr);
 		}
 		sys_jitter = LOGTOD(sys_precision);
 #ifdef DEBUG
 		if (debug > 1)
 			printf("select: orphan offset %.6f\n",
 			    sys_offset);
 #endif
 	} else if (sys_prefer) {
 
 		/*
 		 * If a pps peer is present, choose it; otherwise,
 		 * choose the prefer peer.
 		 */
 		if (sys_pps) {
 			sys_peer = sys_pps;
 			sys_peer->status = CTL_PST_SEL_PPS;
 			sys_offset = sys_peer->offset;
 			if (!pps_control)
 				NLOG(NLOG_SYSEVENT)
 				    msyslog(LOG_INFO,
 				    "pps sync enabled");
 			pps_control = current_time;
 #ifdef DEBUG
 			if (debug > 1)
 				printf("select: pps offset %.6f\n",
 				    sys_offset);
 #endif
 		} else {
 			sys_peer = sys_prefer;
 			sys_peer->status = CTL_PST_SEL_SYSPEER;
 			sys_offset = sys_peer->offset;
 #ifdef DEBUG
 			if (debug > 1)
 				printf("select: prefer offset %.6f\n",
 				    sys_offset);
 #endif
 		}
 		if (sys_peer->stratum == STRATUM_REFCLOCK ||
 		    sys_peer->stratum == STRATUM_UNSPEC)
 			sys_refid = sys_peer->refid;
 		else
 			sys_refid = addr2refid(&sys_peer->srcadr);
 		sys_jitter = sys_peer->jitter;
 	} else {
 
 		/*
 		 * Otherwise, choose the anticlockhopper.
 		 */ 
 		sys_peer = typesystem;
 		sys_peer->status = CTL_PST_SEL_SYSPEER;
 		clock_combine(peer_list, nlist);
 		if (sys_peer->stratum == STRATUM_REFCLOCK ||
 		    sys_peer->stratum == STRATUM_UNSPEC)
 			sys_refid = sys_peer->refid;
 		else
 			sys_refid = addr2refid(&sys_peer->srcadr);
 		sys_jitter = SQRT(SQUARE(sys_peer->jitter) +
 		    SQUARE(sys_jitter));
 #ifdef DEBUG
 		if (debug > 1)
 			printf("select: combine offset %.6f\n",
 			   sys_offset);
 #endif
 	}
 
 	/*
 	 * We have found the alpha male.
 	 */
 	sys_peer->flags |= FLAG_SYSPEER;
 	if (osys_peer != sys_peer) {
 		char *src;
 
 		report_event(EVNT_PEERSTCHG, NULL);
 
 #ifdef REFCLOCK
                 if (sys_peer->flags & FLAG_REFCLOCK)
                         src = refnumtoa(&sys_peer->srcadr);
                 else
 #endif /* REFCLOCK */
                         src = ntoa(&sys_peer->srcadr);
 		NLOG(NLOG_SYNCSTATUS)
 		    msyslog(LOG_INFO, "synchronized to %s, stratum %d",
 			src, sys_peer->stratum);
 	}
 	clock_update();
 }
 
 
 /*
  * clock_combine - compute system offset and jitter from selected peers
  */
 static void
 clock_combine(
 	struct peer **peers,		/* survivor list */
 	int	npeers			/* number of survivors */
 	)
 {
 	int	i;
 	double	x, y, z, w;
 
 	y = z = w = 0;
 	for (i = 0; i < npeers; i++) {
 		x = root_distance(peers[i]);
 		y += 1. / x;
 		z += peers[i]->offset / x;
 		w += SQUARE(peers[i]->offset - peers[0]->offset) / x;
 	}
 	sys_offset = z / y;
 	sys_jitter = SQRT(w / y);
 }
 
 /*
  * root_distance - compute synchronization distance from peer to root
  */
 static double
 root_distance(
 	struct peer *peer
 	)
 {
 	double	dist;
 
 	/*
 	 * Careful squeak here. The value returned must be greater than
 	 * the minimum root dispersion in order to avoid clockhop with
 	 * highly precise reference clocks. In orphan mode lose the peer
 	 * root delay, as that is used by the election algorithm.
 	 */
 	if (peer->stratum >= sys_orphan)
 		dist = 0;
 	else
 		dist = peer->rootdelay;
 	dist += max(sys_mindisp, dist + peer->delay) / 2 +
 	    peer->rootdispersion + peer->disp + clock_phi *
 	    (current_time - peer->update) + peer->jitter;
 	return (dist);
 }
 
 /*
  * peer_xmit - send packet for persistent association.
  */
 static void
 peer_xmit(
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	struct pkt xpkt;	/* transmit packet */
 	int	sendlen, authlen;
 	keyid_t	xkeyid = 0;	/* transmit key ID */
 	l_fp	xmt_tx;
 
 	if (!peer->dstadr)	/* don't bother with peers without interface */
 		return;
 
 	/*
 	 * This is deliciously complicated. There are three cases.
 	 *
 	 * case		leap	stratum	refid	delay	dispersion
 	 *
 	 * normal	system	system	system	system	system
 	 * orphan child	00	orphan	system	orphan	system
 	 * orphan parent 00	orphan	loopbk	0	0
 	 */
 	/*
 	 * This is a normal packet. Use the system variables.
 	 */
 	if (sys_stratum < sys_orphan) {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(sys_leap,
 		    peer->version, peer->hmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_stratum);
 		xpkt.refid = sys_refid;
 		xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay));
 		xpkt.rootdispersion =
 		    HTONS_FP(DTOUFP(sys_rootdispersion));
 
 	/*
 	 * This is a orphan child packet. The host is synchronized to an
 	 * orphan parent. Show leap synchronized, orphan stratum, system
 	 * reference ID, orphan root delay and system root dispersion.
 	 */
 	} else if (sys_peer != NULL) {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING,
 		    peer->version, peer->hmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_orphan);
 		xpkt.refid = htonl(LOOPBACKADR);
 		xpkt.rootdelay = HTONS_FP(DTOFP(sys_orphandelay));
 		xpkt.rootdispersion =
 		    HTONS_FP(DTOUFP(sys_rootdispersion));
 
 	/*
 	 * This is an orphan parent. Show leap synchronized, orphan
 	 * stratum, loopack reference ID and zero root delay and root
 	 * dispersion.
 	 */
 	} else {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING,
 		    peer->version, peer->hmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_orphan);
 		xpkt.refid = sys_refid;
 		xpkt.rootdelay = 0;
 		xpkt.rootdispersion = 0;
 	}
 	xpkt.ppoll = peer->hpoll;
 	xpkt.precision = sys_precision;
 	HTONL_FP(&sys_reftime, &xpkt.reftime);
 	HTONL_FP(&peer->org, &xpkt.org);
 	HTONL_FP(&peer->rec, &xpkt.rec);
 
 	/*
 	 * If the received packet contains a MAC, the transmitted packet
 	 * is authenticated and contains a MAC. If not, the transmitted
 	 * packet is not authenticated.
 	 *
 	 * It is most important when autokey is in use that the local
 	 * interface IP address be known before the first packet is
 	 * sent. Otherwise, it is not possible to compute a correct MAC
 	 * the recipient will accept. Thus, the I/O semantics have to do
 	 * a little more work. In particular, the wildcard interface
 	 * might not be usable.
 	 */
 	sendlen = LEN_PKT_NOMAC;
 	if (!(peer->flags & FLAG_AUTHENABLE)) {
 		get_systime(&peer->xmt);
 		HTONL_FP(&peer->xmt, &xpkt.xmt);
 		sendpkt(&peer->srcadr, peer->dstadr, sys_ttl[peer->ttl],
 			&xpkt, sendlen);
 		peer->sent++;
 #ifdef DEBUG
 		if (debug)
 			printf("transmit: at %ld %s->%s mode %d\n",
 			       current_time, peer->dstadr ? stoa(&peer->dstadr->sin) : "-",
 			       stoa(&peer->srcadr), peer->hmode);
 #endif
 		return;
 	}
 
 	/*
 	 * The received packet contains a MAC, so the transmitted packet
 	 * must be authenticated. If autokey is enabled, fuss with the
 	 * various modes; otherwise, symmetric key cryptography is used.
 	 */
 #ifdef OPENSSL
 	if (crypto_flags && (peer->flags & FLAG_SKEY)) {
 		struct exten *exten;	/* extension field */
 
 		/*
 		 * The Public Key Dance (PKD): Cryptographic credentials
 		 * are contained in extension fields, each including a
 		 * 4-octet length/code word followed by a 4-octet
 		 * association ID and optional additional data. Optional
 		 * data includes a 4-octet data length field followed by
 		 * the data itself. Request messages are sent from a
 		 * configured association; response messages can be sent
 		 * from a configured association or can take the fast
 		 * path without ever matching an association. Response
 		 * messages have the same code as the request, but have
 		 * a response bit and possibly an error bit set. In this
 		 * implementation, a message may contain no more than
 		 * one command and no more than one response.
 		 *
 		 * Cryptographic session keys include both a public and
 		 * a private componet. Request and response messages
 		 * using extension fields are always sent with the
 		 * private component set to zero. Packets without
 		 * extension fields indlude the private component when
 		 * the session key is generated.
 		 */
 		while (1) {
 		
 			/*
 			 * Allocate and initialize a keylist if not
 			 * already done. Then, use the list in inverse
 			 * order, discarding keys once used. Keep the
 			 * latest key around until the next one, so
 			 * clients can use client/server packets to
 			 * compute propagation delay.
 			 *
 			 * Note that once a key is used from the list,
 			 * it is retained in the key cache until the
 			 * next key is used. This is to allow a client
 			 * to retrieve the encrypted session key
 			 * identifier to verify authenticity.
 			 *
 			 * If for some reason a key is no longer in the
 			 * key cache, a birthday has happened and the
 			 * pseudo-random sequence is probably broken. In
 			 * that case, purge the keylist and regenerate
 			 * it.
 			 */
 			if (peer->keynumber == 0)
 				make_keylist(peer, peer->dstadr);
 			else
 				peer->keynumber--;
 			xkeyid = peer->keylist[peer->keynumber];
 			if (authistrusted(xkeyid))
 				break;
 			else
 				key_expire(peer);
 		}
 		peer->keyid = xkeyid;
 		exten = NULL;
 		switch (peer->hmode) {
 
 			/*
 			 * In broadcast server mode the autokey values are
 			 * required by the broadcast clients. Push them when a
 			 * new keylist is generated; otherwise, push the
 			 * association message so the client can request them at
 			 * other times.
 			 */
 		case MODE_BROADCAST:
 			if (peer->flags & FLAG_ASSOC)
 				exten = crypto_args(peer, CRYPTO_AUTO |
 						    CRYPTO_RESP, NULL);
 			else
 				exten = crypto_args(peer, CRYPTO_ASSOC |
 						    CRYPTO_RESP, NULL);
 			break;
 
 		/*
 		 * In symmetric modes the digest, certificate, agreement
 		 * parameters, cookie and autokey values are required.
 		 * The leapsecond table is optional. But, a passive peer
 		 * will not believe the active peer until the latter has
 		 * synchronized, so the agreement must be postponed
 		 * until then. In any case, if a new keylist is
 		 * generated, the autokey values are pushed.
 		 *
 		 * If the crypto bit is lit, don't send requests.
 		 */
 		case MODE_ACTIVE:
 		case MODE_PASSIVE:
 			if (peer->flash & TEST9)
 				break;
 			/*
 			 * Parameter and certificate.
 			 */
 			if (!peer->crypto)
 				exten = crypto_args(peer, CRYPTO_ASSOC,
 						    sys_hostname);
 			else if (!(peer->crypto & CRYPTO_FLAG_VALID))
 				exten = crypto_args(peer, CRYPTO_CERT,
 						    peer->issuer);
 
 			/*
 			 * Identity. Note we have to sign the
 			 * certificate before the cookie to avoid a
 			 * deadlock when the passive peer is walking the
 			 * certificate trail. Awesome.
 			 */
 			else if (!(peer->crypto & CRYPTO_FLAG_VRFY))
 				exten = crypto_args(peer,
 						    crypto_ident(peer), NULL);
 			else if (sys_leap != LEAP_NOTINSYNC &&
 				 !(peer->crypto & CRYPTO_FLAG_SIGN))
 				exten = crypto_args(peer, CRYPTO_SIGN,
 						    sys_hostname);
 
 			/*
 			 * Autokey. We request the cookie only when the
 			 * server and client are synchronized and
 			 * signatures work both ways. On the other hand,
 			 * the active peer needs the autokey values
 			 * before then and when the passive peer is
 			 * waiting for the active peer to synchronize.
 			 * Any time we regenerate the key list, we offer
 			 * the autokey values without being asked.
 			 */
 			else if (sys_leap != LEAP_NOTINSYNC &&
 				 peer->leap != LEAP_NOTINSYNC &&
 				 !(peer->crypto & CRYPTO_FLAG_AGREE))
 				exten = crypto_args(peer, CRYPTO_COOK,
 						    NULL);
 			else if (peer->flags & FLAG_ASSOC)
 				exten = crypto_args(peer, CRYPTO_AUTO |
 						    CRYPTO_RESP, NULL);
 			else if (!(peer->crypto & CRYPTO_FLAG_AUTO))
 				exten = crypto_args(peer, CRYPTO_AUTO,
 						    NULL);
 
 			/*
 			 * Postamble. We trade leapseconds only when the
 			 * server and client are synchronized.
 			 */
 			else if (sys_leap != LEAP_NOTINSYNC &&
 				 peer->leap != LEAP_NOTINSYNC &&
 				 peer->crypto & CRYPTO_FLAG_TAI &&
 				 !(peer->crypto & CRYPTO_FLAG_LEAP))
 				exten = crypto_args(peer, CRYPTO_TAI,
 						    NULL);
 			break;
 
 		/*
 		 * In client mode the digest, certificate, agreement
 		 * parameters and cookie are required. The leapsecond
 		 * table is optional. If broadcast client mode, the
 		 * autokey values are required as well. In broadcast
 		 * client mode, these values must be acquired during the
 		 * client/server exchange to avoid having to wait until
 		 * the next key list regeneration. Otherwise, the poor
 		 * dude may die a lingering death until becoming
 		 * unreachable and attempting rebirth.
 		 *
 		 * If neither the server or client have the agreement
 		 * parameters, the protocol transmits the cookie in the
 		 * clear. If the server has the parameters, the client
 		 * requests them and the protocol blinds it using the
 		 * agreed key. It is a protocol error if the client has
 		 * the parameters but the server does not.
 		 *
 		 * If the crypto bit is lit, don't send requests.
 		 */
 		case MODE_CLIENT:
 			if (peer->flash & TEST9)
 				break;
 			/*
 			 * Parameter and certificate.
 			 */
 			if (!peer->crypto)
 				exten = crypto_args(peer, CRYPTO_ASSOC,
 						    sys_hostname);
 			else if (!(peer->crypto & CRYPTO_FLAG_VALID))
 				exten = crypto_args(peer, CRYPTO_CERT,
 						    peer->issuer);
 
 			/*
 			 * Identity
 			 */
 			else if (!(peer->crypto & CRYPTO_FLAG_VRFY))
 				exten = crypto_args(peer,
 						    crypto_ident(peer), NULL);
 
 			/*
 			 * Autokey
 			 */
 			else if (!(peer->crypto & CRYPTO_FLAG_AGREE))
 				exten = crypto_args(peer, CRYPTO_COOK,
 						    NULL);
 			else if (!(peer->crypto & CRYPTO_FLAG_AUTO) &&
 				 (peer->cast_flags & MDF_BCLNT))
 				exten = crypto_args(peer, CRYPTO_AUTO,
 						    NULL);
 
 			/*
 			 * Postamble. We can sign the certificate here,
 			 * since there is no chance of deadlock.
 			 */
 			else if (sys_leap != LEAP_NOTINSYNC &&
 				 !(peer->crypto & CRYPTO_FLAG_SIGN))
 				exten = crypto_args(peer, CRYPTO_SIGN,
 						    sys_hostname);
 			else if (sys_leap != LEAP_NOTINSYNC &&
 				 peer->crypto & CRYPTO_FLAG_TAI &&
 				 !(peer->crypto & CRYPTO_FLAG_LEAP))
 				exten = crypto_args(peer, CRYPTO_TAI,
 						    NULL);
 			break;
 		}
 
 		/*
 		 * Build the extension fields as directed. A response to
 		 * a request is always sent, even if an error. If an
 		 * error occurs when sending a request, the crypto
 		 * machinery broke or was misconfigured. In that case
 		 * light the crypto bit to suppress further requests.
 		 */
 		if (peer->cmmd != NULL) {
 			peer->cmmd->associd = htonl(peer->associd);
 			sendlen += crypto_xmit(&xpkt, &peer->srcadr,
 					       sendlen, peer->cmmd, 0);
 			free(peer->cmmd);
 			peer->cmmd = NULL;
 		}
 		if (exten != NULL) {
 			int ltemp = 0;
 
 			if (exten->opcode != 0) {
 				ltemp = crypto_xmit(&xpkt,
 						       &peer->srcadr, sendlen, exten, 0);
 				if (ltemp == 0) {
 					peer->flash |= TEST9; /* crypto error */
 					free(exten);
 					return;
 				}
 			}
 			sendlen += ltemp;
 			free(exten);
 		}
 
 		/*
 		 * If extension fields are present, we must use a
 		 * private cookie value of zero. Don't send if the
 		 * crypto bit is set and no extension field is present,
 		 * but in that case give back the key. Most intricate.
 		 */
 		if (sendlen > LEN_PKT_NOMAC) {
 			session_key(&peer->dstadr->sin, &peer->srcadr,
 			    xkeyid, 0, 2);
 		} else if (peer->flash & TEST9) {
 			authtrust(xkeyid, 0);
 			return;
 		}
 	} 
 #endif /* OPENSSL */
 
 	/*
 	 * Stash the transmit timestamp corrected for the encryption
 	 * delay. If autokey, give back the key, as we use keys only
 	 * once. Check for errors such as missing keys, buffer overflow,
 	 * etc.
 	 */
 	xkeyid = peer->keyid;
 	get_systime(&peer->xmt);
 	L_ADD(&peer->xmt, &sys_authdelay);
 	HTONL_FP(&peer->xmt, &xpkt.xmt);
 	authlen = authencrypt(xkeyid, (u_int32 *)&xpkt, sendlen);
 	if (authlen == 0) {
 		msyslog(LOG_INFO, "transmit: %s key %u not found",
 		    stoa(&peer->srcadr), xkeyid);
 		peer->flash |= TEST9;		/* no key found */
 		return;
 	}
 	sendlen += authlen;
 #ifdef OPENSSL
 	if (xkeyid > NTP_MAXKEY)
 		authtrust(xkeyid, 0);
 #endif /* OPENSSL */
 	get_systime(&xmt_tx);
 	if (sendlen > sizeof(xpkt)) {
 		msyslog(LOG_ERR, "buffer overflow %u", sendlen);
 		exit (-1);
 	}
 	sendpkt(&peer->srcadr, peer->dstadr, sys_ttl[peer->ttl], &xpkt,
 		sendlen);
 
 	/*
 	 * Calculate the encryption delay. Keep the minimum over
 	 * the latest two samples.
 	 */
 	L_SUB(&xmt_tx, &peer->xmt);
 	L_ADD(&xmt_tx, &sys_authdelay);
 	sys_authdly[1] = sys_authdly[0];
 	sys_authdly[0] = xmt_tx.l_uf;
 	if (sys_authdly[0] < sys_authdly[1])
 		sys_authdelay.l_uf = sys_authdly[0];
 	else
 		sys_authdelay.l_uf = sys_authdly[1];
 	peer->sent++;
 #ifdef OPENSSL
 #ifdef DEBUG
 	if (debug)
 		printf(
 			"transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d index %d\n",
 			current_time, peer->dstadr ? ntoa(&peer->dstadr->sin) : "-",
 			ntoa(&peer->srcadr), peer->hmode, xkeyid, sendlen -
 			authlen, authlen, peer->keynumber);
 #endif
 #else
 #ifdef DEBUG
 	if (debug)
 		printf(
 			"transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d\n",
 			current_time, peer->dstadr ? ntoa(&peer->dstadr->sin) : "-",
 			ntoa(&peer->srcadr), peer->hmode, xkeyid, sendlen -
 			authlen, authlen);
 #endif
 #endif /* OPENSSL */
 }
 
 
 /*
  * fast_xmit - Send packet for nonpersistent association. Note that
  * neither the source or destination can be a broadcast address.
  */
 static void
 fast_xmit(
 	struct recvbuf *rbufp,	/* receive packet pointer */
 	int	xmode,		/* transmit mode */
 	keyid_t	xkeyid,		/* transmit key ID */
 	int	mask		/* restrict mask */
 	)
 {
 	struct pkt xpkt;		/* transmit packet structure */
 	struct pkt *rpkt;		/* receive packet structure */
 	l_fp	xmt_ts;			/* timestamp */
 	l_fp	xmt_tx;			/* timestamp after authent */
 	int	sendlen, authlen;
 #ifdef OPENSSL
 	u_int32	temp32;
 #endif
 
 	/*
 	 * Initialize transmit packet header fields from the receive
 	 * buffer provided. We leave some fields intact as received. If
 	 * the gazinta was from a multicast address, the gazoutta must
 	 * go out another way.
 	 *
 	 * The root delay field is special. If the system stratum is
 	 * less than the orphan stratum, send the real root delay.
 	 * Otherwise, if there is no system peer, send the orphan delay.
 	 * Otherwise, we must be an orphan parent, so send zero.
 	 */
 	rpkt = &rbufp->recv_pkt;
 	if (rbufp->dstadr->flags & INT_MCASTOPEN)
 		rbufp->dstadr = findinterface(&rbufp->recv_srcadr);
 
 	/*
 	 * This is deliciously complicated. There are four cases.
 	 *
 	 * case		leap	stratum	refid	delay	dispersion
 	 *
 	 * KoD		11	16	KISS	system	system
 	 * normal	system	system	system	system	system
 	 * orphan child	00	orphan	system	orphan	system
 	 * orphan parent 00	orphan	loopbk	0	0
 	 */
 	/*
 	 * This is a kiss-of-death (KoD) packet. Show leap
 	 * unsynchronized, stratum zero, reference ID the four-character
 	 * kiss code and system root delay. Note the rate limit on these
 	 * packets. Once a second initialize a bucket counter. Every
 	 * packet sent decrements the counter until reaching zero. If
 	 * the counter is zero, drop the kiss.
 	 */
 	if (mask & RES_LIMITED) {
 		sys_limitrejected++;
 		if (sys_kod == 0 || !(mask & RES_DEMOBILIZE))
 			return;
 
 		sys_kod--;
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOTINSYNC,
 		    PKT_VERSION(rpkt->li_vn_mode), xmode);
 		xpkt.stratum = STRATUM_UNSPEC;
 		memcpy(&xpkt.refid, "RATE", 4);
 		xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay));
 		xpkt.rootdispersion =
 		    HTONS_FP(DTOUFP(sys_rootdispersion));
 
 	/*
 	 * This is a normal packet. Use the system variables.
 	 */
 	} else if (sys_stratum < sys_orphan) {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(sys_leap,
 		    PKT_VERSION(rpkt->li_vn_mode), xmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_stratum);
 		xpkt.refid = sys_refid;
 		xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay));
 		xpkt.rootdispersion =
 		    HTONS_FP(DTOUFP(sys_rootdispersion));
 
 	/*
 	 * This is a orphan child packet. The host is synchronized to an
 	 * orphan parent. Show leap synchronized, orphan stratum, system
 	 * reference ID and orphan root delay.
 	 */
 	} else if (sys_peer != NULL) {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING,
 		    PKT_VERSION(rpkt->li_vn_mode), xmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_orphan);
 		xpkt.refid = sys_refid;
 		xpkt.rootdelay = HTONS_FP(DTOFP(sys_orphandelay));
 		xpkt.rootdispersion =
 		    HTONS_FP(DTOUFP(sys_rootdispersion));
 
 	/*
 	 * This is an orphan parent. Show leap synchronized, orphan
 	 * stratum, loopack reference ID and zero root delay.
 	 */
 	} else {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING,
 		    PKT_VERSION(rpkt->li_vn_mode), xmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_orphan);
 		xpkt.refid = htonl(LOOPBACKADR);
 		xpkt.rootdelay = HTONS_FP(DTOFP(0));
 		xpkt.rootdispersion = HTONS_FP(DTOFP(0));
 	}
 	xpkt.ppoll = rpkt->ppoll;
 	xpkt.precision = sys_precision;
 	xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion));
 	HTONL_FP(&sys_reftime, &xpkt.reftime);
 	xpkt.org = rpkt->xmt;
 	HTONL_FP(&rbufp->recv_time, &xpkt.rec);
 
 	/*
 	 * If the received packet contains a MAC, the transmitted packet
 	 * is authenticated and contains a MAC. If not, the transmitted
 	 * packet is not authenticated.
 	 */
 	sendlen = LEN_PKT_NOMAC;
 	if (rbufp->recv_length == sendlen) {
 		get_systime(&xmt_ts);
 		HTONL_FP(&xmt_ts, &xpkt.xmt);
 		sendpkt(&rbufp->recv_srcadr, rbufp->dstadr, 0, &xpkt,
 		    sendlen);
 #ifdef DEBUG
 		if (debug)
 			printf("transmit: at %ld %s->%s mode %d\n",
 			    current_time, stoa(&rbufp->dstadr->sin),
 			    stoa(&rbufp->recv_srcadr), xmode);
 #endif
 		return;
 	}
 
 	/*
 	 * The received packet contains a MAC, so the transmitted packet
 	 * must be authenticated. For symmetric key cryptography, use
 	 * the predefined and trusted symmetric keys to generate the
 	 * cryptosum. For autokey cryptography, use the server private
 	 * value to generate the cookie, which is unique for every
 	 * source-destination-key ID combination.
 	 */
 #ifdef OPENSSL
 	if (xkeyid > NTP_MAXKEY) {
 		keyid_t cookie;
 
 		/*
 		 * The only way to get here is a reply to a legitimate
 		 * client request message, so the mode must be
 		 * MODE_SERVER. If an extension field is present, there
 		 * can be only one and that must be a command. Do what
 		 * needs, but with private value of zero so the poor
 		 * jerk can decode it. If no extension field is present,
 		 * use the cookie to generate the session key.
 		 */
 		cookie = session_key(&rbufp->recv_srcadr,
 		    &rbufp->dstadr->sin, 0, sys_private, 0);
 		if (rbufp->recv_length >= (int)(sendlen + MAX_MAC_LEN +
 		    2 * sizeof(u_int32))) {
 			session_key(&rbufp->dstadr->sin,
 			    &rbufp->recv_srcadr, xkeyid, 0, 2);
 			temp32 = CRYPTO_RESP;
 			rpkt->exten[0] |= htonl(temp32);
 			sendlen += crypto_xmit(&xpkt,
 			    &rbufp->recv_srcadr, sendlen,
 			    (struct exten *)rpkt->exten, cookie);
 		} else {
 			session_key(&rbufp->dstadr->sin,
 			    &rbufp->recv_srcadr, xkeyid, cookie, 2);
 		}
 	}
 #endif /* OPENSSL */
 	get_systime(&xmt_ts);
 	L_ADD(&xmt_ts, &sys_authdelay);
 	HTONL_FP(&xmt_ts, &xpkt.xmt);
 	authlen = authencrypt(xkeyid, (u_int32 *)&xpkt, sendlen);
 	sendlen += authlen;
 #ifdef OPENSSL
 	if (xkeyid > NTP_MAXKEY)
 		authtrust(xkeyid, 0);
 #endif /* OPENSSL */
 	get_systime(&xmt_tx);
 	if (sendlen > sizeof(xpkt)) {
 		msyslog(LOG_ERR, "buffer overflow %u", sendlen);
 		exit (-1);
 	}
 	sendpkt(&rbufp->recv_srcadr, rbufp->dstadr, 0, &xpkt, sendlen);
 
 	/*
 	 * Calculate the encryption delay. Keep the minimum over the
 	 * latest two samples.
 	 */
 	L_SUB(&xmt_tx, &xmt_ts);
 	L_ADD(&xmt_tx, &sys_authdelay);
 	sys_authdly[1] = sys_authdly[0];
 	sys_authdly[0] = xmt_tx.l_uf;
 	if (sys_authdly[0] < sys_authdly[1])
 		sys_authdelay.l_uf = sys_authdly[0];
 	else
 		sys_authdelay.l_uf = sys_authdly[1];
 #ifdef DEBUG
 	if (debug)
 		printf(
 		    "transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d\n",
 		    current_time, ntoa(&rbufp->dstadr->sin),
 		    ntoa(&rbufp->recv_srcadr), xmode, xkeyid, sendlen -
 		    authlen, authlen);
 #endif
 }
 
 
 #ifdef OPENSSL
 /*
  * key_expire - purge the key list
  */
 void
 key_expire(
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	int i;
 
 	if (peer->keylist != NULL) {
 		for (i = 0; i <= peer->keynumber; i++)
 			authtrust(peer->keylist[i], 0);
 		free(peer->keylist);
 		peer->keylist = NULL;
 	}
 	value_free(&peer->sndval);
 	peer->keynumber = 0;
 #ifdef DEBUG
 	if (debug)
 		printf("key_expire: at %lu\n", current_time);
 #endif
 }
 #endif /* OPENSSL */
 
 
 /*
  * Determine if the peer is unfit for synchronization
  *
  * A peer is unfit for synchronization if
  * > TEST10 bad leap or stratum below floor or at or above ceiling
  * > TEST11 root distance exceeded
  * > TEST12 a direct or indirect synchronization loop would form
  * > TEST13 unreachable or noselect
  */
 int				/* FALSE if fit, TRUE if unfit */
 peer_unfit(
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	int	rval = 0;
 
 	/*
 	 * A stratum error occurs if (1) the server has never been
 	 * synchronized, (2) the server stratum is below the floor or
 	 * greater than or equal to the ceiling, (3) the system stratum
 	 * is below the orphan stratum and the server stratum is greater
 	 * than or equal to the orphan stratum.
 	 */
 	if (peer->leap == LEAP_NOTINSYNC || peer->stratum < sys_floor ||
 	    peer->stratum >= sys_ceiling || (sys_stratum < sys_orphan &&
 	    peer->stratum >= sys_orphan))
 		rval |= TEST10;		/* stratum out of bounds */
 
 	/*
 	 * A distance error occurs if the root distance is greater than
 	 * or equal to the distance threshold plus the increment due to
 	 * one poll interval.
 	 */
 	if (root_distance(peer) >= sys_maxdist + clock_phi *
 	    ULOGTOD(sys_poll))
 		rval |= TEST11;		/* distance exceeded */
 
 	/*
 	 * A loop error occurs if the remote peer is synchronized to the
 	 * local peer of if the remote peer is synchronized to the same
 	 * server as the local peer, but only if the remote peer is not
 	 * the orphan parent.
 	 */
 	if (peer->stratum > 1 && peer->refid != htonl(LOOPBACKADR) &&
 	    ((!peer->dstadr || peer->refid == peer->dstadr->addr_refid) ||
 	    peer->refid == sys_refid))
 		rval |= TEST12;		/* synch loop */
 
 	/*
 	 * An unreachable error occurs if the server is unreachable or
 	 * the noselect bit is set.
 	 */
 	if (!peer->reach || peer->flags & FLAG_NOSELECT)
 		rval |= TEST13;		/* unreachable */
 
 	peer->flash &= ~PEER_TEST_MASK;
 	peer->flash |= rval;
 	return (rval);
 }
 
 
 /*
  * Find the precision of this particular machine
  */
 #define MINSTEP 100e-9		/* minimum clock increment (s) */
 #define MAXSTEP 20e-3		/* maximum clock increment (s) */
 #define MINLOOPS 5		/* minimum number of step samples */
 
 /*
  * This routine calculates the system precision, defined as the minimum
  * of a sequence of differences between successive readings of the
  * system clock. However, if the system clock can be read more than once
  * during a tick interval, the difference can be zero or one LSB unit,
  * where the LSB corresponds to one nanosecond or one microsecond.
  * Conceivably, if some other process preempts this one and reads the
  * clock, the difference can be more than one LSB unit.
  *
  * For hardware clock frequencies of 10 MHz or less, we assume the
  * logical clock advances only at the hardware clock tick. For higher
  * frequencies, we assume the logical clock can advance no more than 100
  * nanoseconds between ticks.
  */
 int
 default_get_precision(void)
 {
 	l_fp	val;		/* current seconds fraction */
 	l_fp	last;		/* last seconds fraction */
 	l_fp	diff;		/* difference */
 	double	tick;		/* computed tick value */
 	double	dtemp;		/* scratch */
 	int	i;		/* log2 precision */
 
 	/*
 	 * Loop to find tick value in nanoseconds. Toss out outlyer
 	 * values less than the minimun tick value. In wacky cases, use
 	 * the default maximum value.
 	 */
 	get_systime(&last);
 	tick = MAXSTEP;
 	for (i = 0; i < MINLOOPS;) {
 		get_systime(&val);
 		diff = val;
 		L_SUB(&diff, &last);
 		last = val;
 		LFPTOD(&diff, dtemp);
 		if (dtemp < MINSTEP)
 			continue;
 		i++;
 		if (dtemp < tick)
 			tick = dtemp;
 	}
 
 	/*
 	 * Find the nearest power of two.
 	 */
 	NLOG(NLOG_SYSEVENT)
 	    msyslog(LOG_INFO, "precision = %.3f usec", tick * 1e6);
 	for (i = 0; tick <= 1; i++)
 		tick *= 2;
 	if (tick - 1. > 1. - tick / 2)
 		i--;
 	return (-i);
 }
 
 
 /*
  * kod_proto - called once per second to limit kiss-of-death packets
  */
 void
 kod_proto(void)
 {
 	sys_kod = sys_kod_rate;
 }
 
 
 /*
  * init_proto - initialize the protocol module's data
  */
 void
 init_proto(void)
 {
 	l_fp	dummy;
 	int	i;
 
 	/*
 	 * Fill in the sys_* stuff.  Default is don't listen to
 	 * broadcasting, authenticate.
 	 */
 	sys_leap = LEAP_NOTINSYNC;
 	sys_stratum = STRATUM_UNSPEC;
 	memcpy(&sys_refid, "INIT", 4);
 	sys_precision = (s_char)default_get_precision();
 	sys_jitter = LOGTOD(sys_precision);
 	sys_rootdelay = 0;
 	sys_orphandelay = (double)(ntp_random() & 0xffff) / 65536. *
 	    sys_maxdist;
 	sys_rootdispersion = 0;
 	L_CLR(&sys_reftime);
 	sys_peer = NULL;
 	sys_survivors = 0;
 	get_systime(&dummy);
 	sys_manycastserver = 0;
 	sys_bclient = 0;
 	sys_bdelay = DEFBROADDELAY;
 	sys_calldelay = BURST_DELAY;
 	sys_authenticate = 1;
 	L_CLR(&sys_authdelay);
 	sys_authdly[0] = sys_authdly[1] = 0;
 	sys_stattime = 0;
 	proto_clr_stats();
 	for (i = 0; i < MAX_TTL; i++) {
 		sys_ttl[i] = (u_char)((i * 256) / MAX_TTL);
 		sys_ttlmax = i;
 	}
 #ifdef OPENSSL
 	sys_automax = 1 << NTP_AUTOMAX;
 #endif /* OPENSSL */
 
 	/*
 	 * Default these to enable
 	 */
 	ntp_enable = 1;
 #ifndef KERNEL_FLL_BUG
 	kern_enable = 1;
 #endif
 	pps_enable = 0;
 	stats_control = 1;
 }
 
 
 /*
  * proto_config - configure the protocol module
  */
 void
 proto_config(
 	int	item,
 	u_long	value,
 	double	dvalue,
 	struct sockaddr_storage* svalue
 	)
 {
 	/*
 	 * Figure out what he wants to change, then do it
 	 */
 	switch (item) {
 
 	/*
 	 * Turn on/off kernel discipline.
 	 */
 	case PROTO_KERNEL:
 		kern_enable = (int)value;
 		break;
 
 	/*
 	 * Turn on/off clock discipline.
 	 */
 	case PROTO_NTP:
 		ntp_enable = (int)value;
 		break;
 
 	/*
 	 * Turn on/off monitoring.
 	 */
 	case PROTO_MONITOR:
 		if (value)
 			mon_start(MON_ON);
 		else
 			mon_stop(MON_ON);
 		break;
 
 	/*
 	 * Turn on/off statistics.
 	 */
 	case PROTO_FILEGEN:
 		stats_control = (int)value;
 		break;
 
 	/*
 	 * Turn on/off enable broadcasts.
 	 */
 	case PROTO_BROADCLIENT:
 		sys_bclient = (int)value;
 		if (sys_bclient == 0)
 			io_unsetbclient();
 		else
 			io_setbclient();
 		break;
 
 	/*
 	 * Turn on/off PPS discipline.
 	 */
 	case PROTO_PPS:
 		pps_enable = (int)value;
 		break;
 
 	/*
 	 * Add muliticast group address.
 	 */
 	case PROTO_MULTICAST_ADD:
 		if (svalue)
 		    io_multicast_add(*svalue);
 		sys_bclient = 1;
 		break;
 
 	/*
 	 * Delete multicast group address.
 	 */
 	case PROTO_MULTICAST_DEL:
 		if (svalue)
 		    io_multicast_del(*svalue);
 		break;
 
 	/*
 	 * Set default broadcast delay.
 	 */
 	case PROTO_BROADDELAY:
 		sys_bdelay = dvalue;
 		break;
 
 	/*
 	 * Set modem call delay.
 	 */
 	case PROTO_CALLDELAY:
 		sys_calldelay = (int)value;
 		break;
 
 	/*
 	 * Turn on/off authentication to mobilize ephemeral
 	 * associations.
 	 */
 	case PROTO_AUTHENTICATE:
 		sys_authenticate = (int)value;
 		break;
 
 	/*
 	 * Set minimum number of survivors.
 	 */
 	case PROTO_MINCLOCK:
 		sys_minclock = (int)dvalue;
 		break;
 
 	/*
 	 * Set maximum number of preemptable associations.
 	 */
 	case PROTO_MAXCLOCK:
 		sys_maxclock = (int)dvalue;
 		break;
 
 	/*
 	 * Set minimum number of survivors.
 	 */
 	case PROTO_MINSANE:
 		sys_minsane = (int)dvalue;
 		break;
 
 	/*
 	 * Set stratum floor.
 	 */
 	case PROTO_FLOOR:
 		sys_floor = (int)dvalue;
 		break;
 
 	/*
 	 * Set stratum ceiling.
 	 */
 	case PROTO_CEILING:
 		sys_ceiling = (int)dvalue;
 		break;
 
 	/*
 	 * Set orphan stratum.
 	 */
 	case PROTO_ORPHAN:
 		sys_orphan = (int)dvalue;
 		break;
 
 	/*
 	 * Set cohort switch.
 	 */
 	case PROTO_COHORT:
 		sys_cohort = (int)dvalue;
 		break;
 
 	/*
 	 * Set minimum dispersion increment.
 	 */
 	case PROTO_MINDISP:
 		sys_mindisp = dvalue;
 		break;
 
 	/*
 	 * Set maximum distance (select threshold).
 	 */
 	case PROTO_MAXDIST:
 		sys_maxdist = dvalue;
 		break;
 
 	/*
 	 * Set anticlockhop threshold.
 	 */
 	case PROTO_MAXHOP:
 		sys_maxhop = (int)dvalue;
 		break;
 
 	/*
 	 * Set adjtime() resolution (s).
 	 */
 	case PROTO_ADJ:
 		sys_tick = dvalue;
 		break;
 
 	/*
 	 * Set manycast beacon interval.
 	 */
 	case PROTO_BEACON:
 		sys_beacon = (int)dvalue;
 		break;
 
 #ifdef REFCLOCK
 	/*
 	 * Turn on/off refclock calibrate
 	 */
 	case PROTO_CAL:
 		cal_enable = (int)value;
 		break;
 #endif /* REFCLOCK */
 	default:
 
 		/*
 		 * Log this error.
 		 */
 		msyslog(LOG_INFO,
 		    "proto_config: illegal item %d, value %ld", item,
 		    value);
 	}
 }
 
 
 /*
  * proto_clr_stats - clear protocol stat counters
  */
 void
 proto_clr_stats(void)
 {
 	sys_stattime = current_time;
 	sys_received = 0;
 	sys_processed = 0;
 	sys_newversionpkt = 0;
 	sys_oldversionpkt = 0;
 	sys_unknownversion = 0;
 	sys_restricted = 0;
 	sys_badlength = 0;
 	sys_badauth = 0;
 	sys_limitrejected = 0;
 }
Index: stable/8/sys/netinet/igmp.c
===================================================================
--- stable/8/sys/netinet/igmp.c	(revision 281230)
+++ stable/8/sys/netinet/igmp.c	(revision 281231)
@@ -1,3648 +1,3647 @@
 /*-
  * Copyright (c) 2007-2009 Bruce Simpson.
  * Copyright (c) 1988 Stephen Deering.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
  */
 
 /*
  * Internet Group Management Protocol (IGMP) routines.
  * [RFC1112, RFC2236, RFC3376]
  *
  * Written by Steve Deering, Stanford, May 1988.
  * Modified by Rosen Sharma, Stanford, Aug 1994.
  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
  * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
  *
  * MULTICAST Revision: 3.5.1.4
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/protosw.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/ktr.h>
 #include <sys/condvar.h>
 
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/igmp.h>
 #include <netinet/igmp_var.h>
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifndef KTR_IGMPV3
 #define KTR_IGMPV3 KTR_INET
 #endif
 
 static struct igmp_ifinfo *
 		igi_alloc_locked(struct ifnet *);
 static void	igi_delete_locked(const struct ifnet *);
 static void	igmp_dispatch_queue(struct ifqueue *, int, const int);
 static void	igmp_fasttimo_vnet(void);
 static void	igmp_final_leave(struct in_multi *, struct igmp_ifinfo *);
 static int	igmp_handle_state_change(struct in_multi *,
 		    struct igmp_ifinfo *);
 static int	igmp_initial_join(struct in_multi *, struct igmp_ifinfo *);
 static int	igmp_input_v1_query(struct ifnet *, const struct ip *,
 		    const struct igmp *);
 static int	igmp_input_v2_query(struct ifnet *, const struct ip *,
 		    const struct igmp *);
 static int	igmp_input_v3_query(struct ifnet *, const struct ip *,
 		    /*const*/ struct igmpv3 *);
 static int	igmp_input_v3_group_query(struct in_multi *,
 		    struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *);
 static int	igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
 		    /*const*/ struct igmp *);
 static int	igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
 		    /*const*/ struct igmp *);
 static void	igmp_intr(struct mbuf *);
 static int	igmp_isgroupreported(const struct in_addr);
 static struct mbuf *
 		igmp_ra_alloc(void);
 #ifdef KTR
 static char *	igmp_rec_type_to_str(const int);
 #endif
 static void	igmp_set_version(struct igmp_ifinfo *, const int);
 static void	igmp_slowtimo_vnet(void);
 static int	igmp_v1v2_queue_report(struct in_multi *, const int);
 static void	igmp_v1v2_process_group_timer(struct in_multi *, const int);
 static void	igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
 static void	igmp_v2_update_group(struct in_multi *, const int);
 static void	igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
 static void	igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
 static struct mbuf *
 		igmp_v3_encap_report(struct ifnet *, struct mbuf *);
 static int	igmp_v3_enqueue_group_record(struct ifqueue *,
 		    struct in_multi *, const int, const int, const int);
 static int	igmp_v3_enqueue_filter_change(struct ifqueue *,
 		    struct in_multi *);
 static void	igmp_v3_process_group_timers(struct igmp_ifinfo *,
 		    struct ifqueue *, struct ifqueue *, struct in_multi *,
 		    const int);
 static int	igmp_v3_merge_state_changes(struct in_multi *,
 		    struct ifqueue *);
 static void	igmp_v3_suppress_group_record(struct in_multi *);
 static int	sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS);
 static int	sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS);
 static int	sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS);
 
 static const struct netisr_handler igmp_nh = {
 	.nh_name = "igmp",
 	.nh_handler = igmp_intr,
 	.nh_proto = NETISR_IGMP,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 /*
  * System-wide globals.
  *
  * Unlocked access to these is OK, except for the global IGMP output
  * queue. The IGMP subsystem lock ends up being system-wide for the moment,
  * because all VIMAGEs have to share a global output queue, as netisrs
  * themselves are not virtualized.
  *
  * Locking:
  *  * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
  *    Any may be taken independently; if any are held at the same
  *    time, the above lock order must be followed.
  *  * All output is delegated to the netisr.
  *    Now that Giant has been eliminated, the netisr may be inlined.
  *  * IN_MULTI_LOCK covers in_multi.
  *  * IGMP_LOCK covers igmp_ifinfo and any global variables in this file,
  *    including the output queue.
  *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
  *    per-link state iterators.
  *  * igmp_ifinfo is valid as long as PF_INET is attached to the interface,
  *    therefore it is not refcounted.
  *    We allow unlocked reads of igmp_ifinfo when accessed via in_multi.
  *
  * Reference counting
  *  * IGMP acquires its own reference every time an in_multi is passed to
  *    it and the group is being joined for the first time.
  *  * IGMP releases its reference(s) on in_multi in a deferred way,
  *    because the operations which process the release run as part of
  *    a loop whose control variables are directly affected by the release
  *    (that, and not recursing on the IF_ADDR_LOCK).
  *
  * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds
  * to a vnet in ifp->if_vnet.
  *
  * SMPng: XXX We may potentially race operations on ifma_protospec.
  * The problem is that we currently lack a clean way of taking the
  * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing,
  * as anything which modifies ifma needs to be covered by that lock.
  * So check for ifma_protospec being NULL before proceeding.
  */
 struct mtx		 igmp_mtx;
 
 struct mbuf		*m_raopt;		 /* Router Alert option */
 MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
 
 /*
  * VIMAGE-wide globals.
  *
  * The IGMPv3 timers themselves need to run per-image, however,
  * protosw timers run globally (see tcp).
  * An ifnet can only be in one vimage at a time, and the loopback
  * ifnet, loif, is itself virtualized.
  * It would otherwise be possible to seriously hose IGMP state,
  * and create inconsistencies in upstream multicast routing, if you have
  * multiple VIMAGEs running on the same link joining different multicast
  * groups, UNLESS the "primary IP address" is different. This is because
  * IGMP for IPv4 does not force link-local addresses to be used for each
  * node, unlike MLD for IPv6.
  * Obviously the IGMPv3 per-interface state has per-vimage granularity
  * also as a result.
  *
  * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection
  * policy to control the address used by IGMP on the link.
  */
 static VNET_DEFINE(int, interface_timers_running);	/* IGMPv3 general
 							 * query response */
 static VNET_DEFINE(int, state_change_timers_running);	/* IGMPv3 state-change
 							 * retransmit */
 static VNET_DEFINE(int, current_state_timers_running);	/* IGMPv1/v2 host
 							 * report; IGMPv3 g/sg
 							 * query response */
 
 #define	V_interface_timers_running	VNET(interface_timers_running)
 #define	V_state_change_timers_running	VNET(state_change_timers_running)
 #define	V_current_state_timers_running	VNET(current_state_timers_running)
 
 static VNET_DEFINE(LIST_HEAD(, igmp_ifinfo), igi_head);
 static VNET_DEFINE(struct igmpstat, igmpstat) = {
 	.igps_version = IGPS_VERSION_3,
 	.igps_len = sizeof(struct igmpstat),
 };
 static VNET_DEFINE(struct timeval, igmp_gsrdelay) = {10, 0};
 
 #define	V_igi_head			VNET(igi_head)
 #define	V_igmpstat			VNET(igmpstat)
 #define	V_igmp_gsrdelay			VNET(igmp_gsrdelay)
 
 static VNET_DEFINE(int, igmp_recvifkludge) = 1;
 static VNET_DEFINE(int, igmp_sendra) = 1;
 static VNET_DEFINE(int, igmp_sendlocal) = 1;
 static VNET_DEFINE(int, igmp_v1enable) = 1;
 static VNET_DEFINE(int, igmp_v2enable) = 1;
 static VNET_DEFINE(int, igmp_legacysupp);
 static VNET_DEFINE(int, igmp_default_version) = IGMP_VERSION_3;
 
 #define	V_igmp_recvifkludge		VNET(igmp_recvifkludge)
 #define	V_igmp_sendra			VNET(igmp_sendra)
 #define	V_igmp_sendlocal		VNET(igmp_sendlocal)
 #define	V_igmp_v1enable			VNET(igmp_v1enable)
 #define	V_igmp_v2enable			VNET(igmp_v2enable)
 #define	V_igmp_legacysupp		VNET(igmp_legacysupp)
 #define	V_igmp_default_version		VNET(igmp_default_version)
 
 /*
  * Virtualized sysctls.
  */
 SYSCTL_VNET_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RW,
     &VNET_NAME(igmpstat), igmpstat, "");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW,
     &VNET_NAME(igmp_recvifkludge), 0,
     "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW,
     &VNET_NAME(igmp_sendra), 0,
     "Send IP Router Alert option in IGMPv2/v3 messages");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW,
     &VNET_NAME(igmp_sendlocal), 0,
     "Send IGMP membership reports for 224.0.0.0/24 groups");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW,
     &VNET_NAME(igmp_v1enable), 0,
     "Enable backwards compatibility with IGMPv1");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW,
     &VNET_NAME(igmp_v2enable), 0,
     "Enable backwards compatibility with IGMPv2");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW,
     &VNET_NAME(igmp_legacysupp), 0,
     "Allow v1/v2 reports to suppress v3 group responses");
 SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, default_version,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     &VNET_NAME(igmp_default_version), 0, sysctl_igmp_default_version, "I",
     "Default version of IGMP to run on each interface");
 SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     &VNET_NAME(igmp_gsrdelay.tv_sec), 0, sysctl_igmp_gsr, "I",
     "Rate limit for IGMPv3 Group-and-Source queries in seconds");
 
 /*
  * Non-virtualized sysctls.
  */
 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE,
     sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
 
 static __inline void
 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
 {
 
 #ifdef VIMAGE
 	m->m_pkthdr.header = ifp->if_vnet;
 #endif /* VIMAGE */
 	m->m_pkthdr.flowid = ifp->if_index;
 }
 
 static __inline void
 igmp_scrub_context(struct mbuf *m)
 {
 
 	m->m_pkthdr.header = NULL;
 	m->m_pkthdr.flowid = 0;
 }
 
 #ifdef KTR
 static __inline char *
 inet_ntoa_haddr(in_addr_t haddr)
 {
 	struct in_addr ia;
 
 	ia.s_addr = htonl(haddr);
 	return (inet_ntoa(ia));
 }
 #endif
 
 /*
  * Restore context from a queued IGMP output chain.
  * Return saved ifindex.
  *
  * VIMAGE: The assertion is there to make sure that we
  * actually called CURVNET_SET() with what's in the mbuf chain.
  */
 static __inline uint32_t
 igmp_restore_context(struct mbuf *m)
 {
 
 #ifdef notyet
 #if defined(VIMAGE) && defined(INVARIANTS)
 	KASSERT(curvnet == (m->m_pkthdr.header),
 	    ("%s: called when curvnet was not restored", __func__));
 #endif
 #endif
 	return (m->m_pkthdr.flowid);
 }
 
 /*
  * Retrieve or set default IGMP version.
  *
  * VIMAGE: Assume curvnet set by caller.
  * SMPng: NOTE: Serialized by IGMP lock.
  */
 static int
 sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS)
 {
 	int	 error;
 	int	 new;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error)
 		return (error);
 
 	IGMP_LOCK();
 
 	new = V_igmp_default_version;
 
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error || !req->newptr)
 		goto out_locked;
 
 	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
 		error = EINVAL;
 		goto out_locked;
 	}
 
 	CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d",
 	     V_igmp_default_version, new);
 
 	V_igmp_default_version = new;
 
 out_locked:
 	IGMP_UNLOCK();
 	return (error);
 }
 
 /*
  * Retrieve or set threshold between group-source queries in seconds.
  *
  * VIMAGE: Assume curvnet set by caller.
  * SMPng: NOTE: Serialized by IGMP lock.
  */
 static int
 sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int i;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error)
 		return (error);
 
 	IGMP_LOCK();
 
 	i = V_igmp_gsrdelay.tv_sec;
 
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || !req->newptr)
 		goto out_locked;
 
 	if (i < -1 || i >= 60) {
 		error = EINVAL;
 		goto out_locked;
 	}
 
 	CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d",
 	     V_igmp_gsrdelay.tv_sec, i);
 	V_igmp_gsrdelay.tv_sec = i;
 
 out_locked:
 	IGMP_UNLOCK();
 	return (error);
 }
 
 /*
  * Expose struct igmp_ifinfo to userland, keyed by ifindex.
  * For use by ifmcstat(8).
  *
  * SMPng: NOTE: Does an unlocked ifindex space read.
  * VIMAGE: Assume curvnet set by caller. The node handler itself
  * is not directly virtualized.
  */
 static int
 sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
 {
 	int			*name;
 	int			 error;
 	u_int			 namelen;
 	struct ifnet		*ifp;
 	struct igmp_ifinfo	*igi;
 
 	name = (int *)arg1;
 	namelen = arg2;
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo));
 	if (error)
 		return (error);
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	if (name[0] <= 0 || name[0] > V_if_index) {
 		error = ENOENT;
 		goto out_locked;
 	}
 
 	error = ENOENT;
 
 	ifp = ifnet_byindex(name[0]);
 	if (ifp == NULL)
 		goto out_locked;
 
 	LIST_FOREACH(igi, &V_igi_head, igi_link) {
 		if (ifp == igi->igi_ifp) {
 			error = SYSCTL_OUT(req, igi,
 			    sizeof(struct igmp_ifinfo));
 			break;
 		}
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 	return (error);
 }
 
 /*
  * Dispatch an entire queue of pending packet chains
  * using the netisr.
  * VIMAGE: Assumes the vnet pointer has been set.
  */
 static void
 igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop)
 {
 	struct mbuf *m;
 
 	for (;;) {
 		_IF_DEQUEUE(ifq, m);
 		if (m == NULL)
 			break;
 		CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m);
 		if (loop)
 			m->m_flags |= M_IGMP_LOOP;
 		netisr_dispatch(NETISR_IGMP, m);
 		if (--limit == 0)
 			break;
 	}
 }
 
 /*
  * Filter outgoing IGMP report state by group.
  *
  * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
  * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
  * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
  * this may break certain IGMP snooping switches which rely on the old
  * report behaviour.
  *
  * Return zero if the given group is one for which IGMP reports
  * should be suppressed, or non-zero if reports should be issued.
  */
 static __inline int
 igmp_isgroupreported(const struct in_addr addr)
 {
 
 	if (in_allhosts(addr) ||
 	    ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr)))))
 		return (0);
 
 	return (1);
 }
 
 /*
  * Construct a Router Alert option to use in outgoing packets.
  */
 static struct mbuf *
 igmp_ra_alloc(void)
 {
 	struct mbuf	*m;
 	struct ipoption	*p;
 
 	MGET(m, M_DONTWAIT, MT_DATA);
 	p = mtod(m, struct ipoption *);
 	p->ipopt_dst.s_addr = INADDR_ANY;
 	p->ipopt_list[0] = IPOPT_RA;	/* Router Alert Option */
 	p->ipopt_list[1] = 0x04;	/* 4 bytes long */
 	p->ipopt_list[2] = IPOPT_EOL;	/* End of IP option list */
 	p->ipopt_list[3] = 0x00;	/* pad byte */
 	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
 
 	return (m);
 }
 
 /*
  * Attach IGMP when PF_INET is attached to an interface.
  */
 struct igmp_ifinfo *
 igmp_domifattach(struct ifnet *ifp)
 {
 	struct igmp_ifinfo *igi;
 
 	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
 	    __func__, ifp, ifp->if_xname);
 
 	IGMP_LOCK();
 
 	igi = igi_alloc_locked(ifp);
 	if (!(ifp->if_flags & IFF_MULTICAST))
 		igi->igi_flags |= IGIF_SILENT;
 
 	IGMP_UNLOCK();
 
 	return (igi);
 }
 
 /*
  * VIMAGE: assume curvnet set by caller.
  */
 static struct igmp_ifinfo *
 igi_alloc_locked(/*const*/ struct ifnet *ifp)
 {
 	struct igmp_ifinfo *igi;
 
 	IGMP_LOCK_ASSERT();
 
 	igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO);
 	if (igi == NULL)
 		goto out;
 
 	igi->igi_ifp = ifp;
 	igi->igi_version = V_igmp_default_version;
 	igi->igi_flags = 0;
 	igi->igi_rv = IGMP_RV_INIT;
 	igi->igi_qi = IGMP_QI_INIT;
 	igi->igi_qri = IGMP_QRI_INIT;
 	igi->igi_uri = IGMP_URI_INIT;
 
 	SLIST_INIT(&igi->igi_relinmhead);
 
 	/*
 	 * Responses to general queries are subject to bounds.
 	 */
 	IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
 
 	LIST_INSERT_HEAD(&V_igi_head, igi, igi_link);
 
 	CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)",
 	     ifp, ifp->if_xname);
 
 out:
 	return (igi);
 }
 
 /*
  * Hook for ifdetach.
  *
  * NOTE: Some finalization tasks need to run before the protocol domain
  * is detached, but also before the link layer does its cleanup.
  *
  * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK().
  * XXX This is also bitten by unlocked ifma_protospec access.
  */
 void
 igmp_ifdetach(struct ifnet *ifp)
 {
 	struct igmp_ifinfo	*igi;
 	struct ifmultiaddr	*ifma;
 	struct in_multi		*inm, *tinm;
 
 	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp,
 	    ifp->if_xname);
 
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	if (igi->igi_version == IGMP_VERSION_3) {
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_INET ||
 			    ifma->ifma_protospec == NULL)
 				continue;
 #if 0
 			KASSERT(ifma->ifma_protospec != NULL,
 			    ("%s: ifma_protospec is NULL", __func__));
 #endif
 			inm = (struct in_multi *)ifma->ifma_protospec;
 			if (inm->inm_state == IGMP_LEAVING_MEMBER) {
 				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
 				    inm, inm_nrele);
 			}
 			inm_clear_recorded(inm);
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		/*
 		 * Free the in_multi reference(s) for this IGMP lifecycle.
 		 */
 		SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele,
 		    tinm) {
 			SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
 			inm_release_locked(inm);
 		}
 	}
 
 	IGMP_UNLOCK();
 }
 
 /*
  * Hook for domifdetach.
  */
 void
 igmp_domifdetach(struct ifnet *ifp)
 {
 	struct igmp_ifinfo *igi;
 
 	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
 	    __func__, ifp, ifp->if_xname);
 
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	igi_delete_locked(ifp);
 
 	IGMP_UNLOCK();
 }
 
 static void
 igi_delete_locked(const struct ifnet *ifp)
 {
 	struct igmp_ifinfo *igi, *tigi;
 
 	CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)",
 	    __func__, ifp, ifp->if_xname);
 
 	IGMP_LOCK_ASSERT();
 
 	LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) {
 		if (igi->igi_ifp == ifp) {
 			/*
 			 * Free deferred General Query responses.
 			 */
 			_IF_DRAIN(&igi->igi_gq);
 
 			LIST_REMOVE(igi, igi_link);
 
 			KASSERT(SLIST_EMPTY(&igi->igi_relinmhead),
 			    ("%s: there are dangling in_multi references",
 			    __func__));
 
 			free(igi, M_IGMP);
 			return;
 		}
 	}
 
 #ifdef INVARIANTS
 	panic("%s: igmp_ifinfo not found for ifp %p\n", __func__,  ifp);
 #endif
 }
 
 /*
  * Process a received IGMPv1 query.
  * Return non-zero if the message should be dropped.
  *
  * VIMAGE: The curvnet pointer is derived from the input ifp.
  */
 static int
 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
     const struct igmp *igmp)
 {
 	struct ifmultiaddr	*ifma;
 	struct igmp_ifinfo	*igi;
 	struct in_multi		*inm;
 
 	/*
 	 * IGMPv1 Host Mmembership Queries SHOULD always be addressed to
 	 * 224.0.0.1. They are always treated as General Queries.
 	 * igmp_group is always ignored. Do not drop it as a userland
 	 * daemon may wish to see it.
 	 * XXX SMPng: unlocked increments in igmpstat assumed atomic.
 	 */
 	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
 		IGMPSTAT_INC(igps_rcv_badqueries);
 		return (0);
 	}
 	IGMPSTAT_INC(igps_rcv_gen_queries);
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	if (igi->igi_flags & IGIF_LOOPBACK) {
 		CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		goto out_locked;
 	}
 
 	/*
 	 * Switch to IGMPv1 host compatibility mode.
 	 */
 	igmp_set_version(igi, IGMP_VERSION_1);
 
 	CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname);
 
 	/*
 	 * Start the timers in all of our group records
 	 * for the interface on which the query arrived,
 	 * except those which are already running.
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		if (inm->inm_timer != 0)
 			continue;
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_REPORTING_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			inm->inm_timer = IGMP_RANDOM_DELAY(
 			    IGMP_V1V2_MAX_RI * PR_FASTHZ);
 			V_current_state_timers_running = 1;
 			break;
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Process a received IGMPv2 general or group-specific query.
  */
 static int
 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
     const struct igmp *igmp)
 {
 	struct ifmultiaddr	*ifma;
 	struct igmp_ifinfo	*igi;
 	struct in_multi		*inm;
 	int			 is_general_query;
 	uint16_t		 timer;
 
 	is_general_query = 0;
 
 	/*
 	 * Validate address fields upfront.
 	 * XXX SMPng: unlocked increments in igmpstat assumed atomic.
 	 */
 	if (in_nullhost(igmp->igmp_group)) {
 		/*
 		 * IGMPv2 General Query.
 		 * If this was not sent to the all-hosts group, ignore it.
 		 */
 		if (!in_allhosts(ip->ip_dst))
 			return (0);
 		IGMPSTAT_INC(igps_rcv_gen_queries);
 		is_general_query = 1;
 	} else {
 		/* IGMPv2 Group-Specific Query. */
 		IGMPSTAT_INC(igps_rcv_group_queries);
 	}
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	if (igi->igi_flags & IGIF_LOOPBACK) {
 		CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		goto out_locked;
 	}
 
 	/*
 	 * Ignore v2 query if in v1 Compatibility Mode.
 	 */
 	if (igi->igi_version == IGMP_VERSION_1)
 		goto out_locked;
 
 	igmp_set_version(igi, IGMP_VERSION_2);
 
 	timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
 	if (timer == 0)
 		timer = 1;
 
 	if (is_general_query) {
 		/*
 		 * For each reporting group joined on this
 		 * interface, kick the report timer.
 		 */
 		CTR2(KTR_IGMPV3, "process v2 general query on ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_INET ||
 			    ifma->ifma_protospec == NULL)
 				continue;
 			inm = (struct in_multi *)ifma->ifma_protospec;
 			igmp_v2_update_group(inm, timer);
 		}
 		IF_ADDR_RUNLOCK(ifp);
 	} else {
 		/*
 		 * Group-specific IGMPv2 query, we need only
 		 * look up the single group to process it.
 		 */
 		inm = inm_lookup(ifp, igmp->igmp_group);
 		if (inm != NULL) {
 			CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 			igmp_v2_update_group(inm, timer);
 		}
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Update the report timer on a group in response to an IGMPv2 query.
  *
  * If we are becoming the reporting member for this group, start the timer.
  * If we already are the reporting member for this group, and timer is
  * below the threshold, reset it.
  *
  * We may be updating the group for the first time since we switched
  * to IGMPv3. If we are, then we must clear any recorded source lists,
  * and transition to REPORTING state; the group timer is overloaded
  * for group and group-source query responses. 
  *
  * Unlike IGMPv3, the delay per group should be jittered
  * to avoid bursts of IGMPv2 reports.
  */
 static void
 igmp_v2_update_group(struct in_multi *inm, const int timer)
 {
 
 	CTR4(KTR_IGMPV3, "%s: %s/%s timer=%d", __func__,
 	    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, timer);
 
 	IN_MULTI_LOCK_ASSERT();
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 		break;
 	case IGMP_REPORTING_MEMBER:
 		if (inm->inm_timer != 0 &&
 		    inm->inm_timer <= timer) {
 			CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, "
 			    "skipping.", __func__);
 			break;
 		}
 		/* FALLTHROUGH */
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 		CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__);
 		inm->inm_state = IGMP_REPORTING_MEMBER;
 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 		V_current_state_timers_running = 1;
 		break;
 	case IGMP_SLEEPING_MEMBER:
 		CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__);
 		inm->inm_state = IGMP_AWAKENING_MEMBER;
 		break;
 	case IGMP_LEAVING_MEMBER:
 		break;
 	}
 }
 
 /*
  * Process a received IGMPv3 general, group-specific or
  * group-and-source-specific query.
  * Assumes m has already been pulled up to the full IGMP message length.
  * Return 0 if successful, otherwise an appropriate error code is returned.
  */
 static int
 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
     /*const*/ struct igmpv3 *igmpv3)
 {
 	struct igmp_ifinfo	*igi;
 	struct in_multi		*inm;
 	int			 is_general_query;
 	uint32_t		 maxresp, nsrc, qqi;
 	uint16_t		 timer;
 	uint8_t			 qrv;
 
 	is_general_query = 0;
 
 	CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname);
 
 	maxresp = igmpv3->igmp_code;	/* in 1/10ths of a second */
 	if (maxresp >= 128) {
 		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
 			  (IGMP_EXP(igmpv3->igmp_code) + 3);
 	}
 
 	/*
 	 * Robustness must never be less than 2 for on-wire IGMPv3.
 	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
 	 * an exception for interfaces whose IGMPv3 state changes
 	 * are redirected to loopback (e.g. MANET).
 	 */
 	qrv = IGMP_QRV(igmpv3->igmp_misc);
 	if (qrv < 2) {
 		CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__,
 		    qrv, IGMP_RV_INIT);
 		qrv = IGMP_RV_INIT;
 	}
 
 	qqi = igmpv3->igmp_qqi;
 	if (qqi >= 128) {
 		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
 		     (IGMP_EXP(igmpv3->igmp_qqi) + 3);
 	}
 
 	timer = maxresp * PR_FASTHZ / IGMP_TIMER_SCALE;
 	if (timer == 0)
 		timer = 1;
 
 	nsrc = ntohs(igmpv3->igmp_numsrc);
 
 	/*
 	 * Validate address fields and versions upfront before
 	 * accepting v3 query.
 	 * XXX SMPng: Unlocked access to igmpstat counters here.
 	 */
 	if (in_nullhost(igmpv3->igmp_group)) {
 		/*
 		 * IGMPv3 General Query.
 		 *
 		 * General Queries SHOULD be directed to 224.0.0.1.
 		 * A general query with a source list has undefined
 		 * behaviour; discard it.
 		 */
 		IGMPSTAT_INC(igps_rcv_gen_queries);
 		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
 			IGMPSTAT_INC(igps_rcv_badqueries);
 			return (0);
 		}
 		is_general_query = 1;
 	} else {
 		/* Group or group-source specific query. */
 		if (nsrc == 0)
 			IGMPSTAT_INC(igps_rcv_group_queries);
 		else
 			IGMPSTAT_INC(igps_rcv_gsr_queries);
 	}
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	if (igi->igi_flags & IGIF_LOOPBACK) {
 		CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		goto out_locked;
 	}
 
 	/*
 	 * Discard the v3 query if we're in Compatibility Mode.
 	 * The RFC is not obviously worded that hosts need to stay in
 	 * compatibility mode until the Old Version Querier Present
 	 * timer expires.
 	 */
 	if (igi->igi_version != IGMP_VERSION_3) {
 		CTR3(KTR_IGMPV3, "ignore v3 query in v%d mode on ifp %p(%s)",
 		    igi->igi_version, ifp, ifp->if_xname);
 		goto out_locked;
 	}
 
 	igmp_set_version(igi, IGMP_VERSION_3);
 	igi->igi_rv = qrv;
 	igi->igi_qi = qqi;
 	igi->igi_qri = maxresp;
 
 	CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi,
 	    maxresp);
 
 	if (is_general_query) {
 		/*
 		 * Schedule a current-state report on this ifp for
 		 * all groups, possibly containing source lists.
 		 * If there is a pending General Query response
 		 * scheduled earlier than the selected delay, do
 		 * not schedule any other reports.
 		 * Otherwise, reset the interface timer.
 		 */
 		CTR2(KTR_IGMPV3, "process v3 general query on ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
 			igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
 			V_interface_timers_running = 1;
 		}
 	} else {
 		/*
 		 * Group-source-specific queries are throttled on
 		 * a per-group basis to defeat denial-of-service attempts.
 		 * Queries for groups we are not a member of on this
 		 * link are simply ignored.
 		 */
 		inm = inm_lookup(ifp, igmpv3->igmp_group);
 		if (inm == NULL)
 			goto out_locked;
 		if (nsrc > 0) {
 			if (!ratecheck(&inm->inm_lastgsrtv,
 			    &V_igmp_gsrdelay)) {
 				CTR1(KTR_IGMPV3, "%s: GS query throttled.",
 				    __func__);
 				IGMPSTAT_INC(igps_drop_gsr_queries);
 				goto out_locked;
 			}
 		}
 		CTR3(KTR_IGMPV3, "process v3 %s query on ifp %p(%s)",
 		     inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_xname);
 		/*
 		 * If there is a pending General Query response
 		 * scheduled sooner than the selected delay, no
 		 * further report need be scheduled.
 		 * Otherwise, prepare to respond to the
 		 * group-specific or group-and-source query.
 		 */
 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer)
 			igmp_input_v3_group_query(inm, igi, timer, igmpv3);
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Process a recieved IGMPv3 group-specific or group-and-source-specific
  * query.
  * Return <0 if any error occured. Currently this is ignored.
  */
 static int
 igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifinfo *igi,
     int timer, /*const*/ struct igmpv3 *igmpv3)
 {
 	int			 retval;
 	uint16_t		 nsrc;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	retval = 0;
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		return (retval);
 		break;
 	case IGMP_REPORTING_MEMBER:
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 		break;
 	}
 
 	nsrc = ntohs(igmpv3->igmp_numsrc);
 
 	/*
 	 * Deal with group-specific queries upfront.
 	 * If any group query is already pending, purge any recorded
 	 * source-list state if it exists, and schedule a query response
 	 * for this group-specific query.
 	 */
 	if (nsrc == 0) {
 		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
 		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
 			inm_clear_recorded(inm);
 			timer = min(inm->inm_timer, timer);
 		}
 		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 		V_current_state_timers_running = 1;
 		return (retval);
 	}
 
 	/*
 	 * Deal with the case where a group-and-source-specific query has
 	 * been received but a group-specific query is already pending.
 	 */
 	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
 		timer = min(inm->inm_timer, timer);
 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 		V_current_state_timers_running = 1;
 		return (retval);
 	}
 
 	/*
 	 * Finally, deal with the case where a group-and-source-specific
 	 * query has been received, where a response to a previous g-s-r
 	 * query exists, or none exists.
 	 * In this case, we need to parse the source-list which the Querier
 	 * has provided us with and check if we have any source list filter
 	 * entries at T1 for these sources. If we do not, there is no need
 	 * schedule a report and the query may be dropped.
 	 * If we do, we must record them and schedule a current-state
 	 * report for those sources.
 	 * FIXME: Handling source lists larger than 1 mbuf requires that
 	 * we pass the mbuf chain pointer down to this function, and use
 	 * m_getptr() to walk the chain.
 	 */
 	if (inm->inm_nsrc > 0) {
 		const struct in_addr	*ap;
 		int			 i, nrecorded;
 
 		ap = (const struct in_addr *)(igmpv3 + 1);
 		nrecorded = 0;
 		for (i = 0; i < nsrc; i++, ap++) {
 			retval = inm_record_source(inm, ap->s_addr);
 			if (retval < 0)
 				break;
 			nrecorded += retval;
 		}
 		if (nrecorded > 0) {
 			CTR1(KTR_IGMPV3,
 			    "%s: schedule response to SG query", __func__);
 			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
 			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 			V_current_state_timers_running = 1;
 		}
 	}
 
 	return (retval);
 }
 
 /*
  * Process a received IGMPv1 host membership report.
  *
  * NOTE: 0.0.0.0 workaround breaks const correctness.
  */
 static int
 igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
     /*const*/ struct igmp *igmp)
 {
 	struct in_ifaddr *ia;
 	struct in_multi *inm;
 
 	IGMPSTAT_INC(igps_rcv_reports);
 
 	if (ifp->if_flags & IFF_LOOPBACK)
 		return (0);
 
 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
 		IGMPSTAT_INC(igps_rcv_badreports);
 		return (EINVAL);
 	}
 
 	/*
 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
 	 * Booting clients may use the source address 0.0.0.0. Some
 	 * IGMP daemons may not know how to use IP_RECVIF to determine
 	 * the interface upon which this message was received.
 	 * Replace 0.0.0.0 with the subnet address if told to do so.
 	 */
 	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
 		IFP_TO_IA(ifp, ia);
 		if (ia != NULL) {
 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
 			ifa_free(&ia->ia_ifa);
 		}
 	}
 
 	CTR3(KTR_IGMPV3, "process v1 report %s on ifp %p(%s)",
 	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 
 	/*
 	 * IGMPv1 report suppression.
 	 * If we are a member of this group, and our membership should be
 	 * reported, stop our group timer and transition to the 'lazy' state.
 	 */
 	IN_MULTI_LOCK();
 	inm = inm_lookup(ifp, igmp->igmp_group);
 	if (inm != NULL) {
 		struct igmp_ifinfo *igi;
 
 		igi = inm->inm_igi;
 		if (igi == NULL) {
 			KASSERT(igi != NULL,
 			    ("%s: no igi for ifp %p", __func__, ifp));
 			goto out_locked;
 		}
 
 		IGMPSTAT_INC(igps_rcv_ourreports);
 
 		/*
 		 * If we are in IGMPv3 host mode, do not allow the
 		 * other host's IGMPv1 report to suppress our reports
 		 * unless explicitly configured to do so.
 		 */
 		if (igi->igi_version == IGMP_VERSION_3) {
 			if (V_igmp_legacysupp)
 				igmp_v3_suppress_group_record(inm);
 			goto out_locked;
 		}
 
 		inm->inm_timer = 0;
 
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 			break;
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			CTR3(KTR_IGMPV3,
 			    "report suppressed for %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 		case IGMP_SLEEPING_MEMBER:
 			inm->inm_state = IGMP_SLEEPING_MEMBER;
 			break;
 		case IGMP_REPORTING_MEMBER:
 			CTR3(KTR_IGMPV3,
 			    "report suppressed for %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 			if (igi->igi_version == IGMP_VERSION_1)
 				inm->inm_state = IGMP_LAZY_MEMBER;
 			else if (igi->igi_version == IGMP_VERSION_2)
 				inm->inm_state = IGMP_SLEEPING_MEMBER;
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 
 out_locked:
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Process a received IGMPv2 host membership report.
  *
  * NOTE: 0.0.0.0 workaround breaks const correctness.
  */
 static int
 igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
     /*const*/ struct igmp *igmp)
 {
 	struct in_ifaddr *ia;
 	struct in_multi *inm;
 
 	/*
 	 * Make sure we don't hear our own membership report.  Fast
 	 * leave requires knowing that we are the only member of a
 	 * group.
 	 */
 	IFP_TO_IA(ifp, ia);
 	if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
 		ifa_free(&ia->ia_ifa);
 		return (0);
 	}
 
 	IGMPSTAT_INC(igps_rcv_reports);
 
 	if (ifp->if_flags & IFF_LOOPBACK) {
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		return (0);
 	}
 
 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		IGMPSTAT_INC(igps_rcv_badreports);
 		return (EINVAL);
 	}
 
 	/*
 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
 	 * Booting clients may use the source address 0.0.0.0. Some
 	 * IGMP daemons may not know how to use IP_RECVIF to determine
 	 * the interface upon which this message was received.
 	 * Replace 0.0.0.0 with the subnet address if told to do so.
 	 */
 	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
 		if (ia != NULL)
 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
 	}
 	if (ia != NULL)
 		ifa_free(&ia->ia_ifa);
 
 	CTR3(KTR_IGMPV3, "process v2 report %s on ifp %p(%s)",
 	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 
 	/*
 	 * IGMPv2 report suppression.
 	 * If we are a member of this group, and our membership should be
 	 * reported, and our group timer is pending or about to be reset,
 	 * stop our group timer by transitioning to the 'lazy' state.
 	 */
 	IN_MULTI_LOCK();
 	inm = inm_lookup(ifp, igmp->igmp_group);
 	if (inm != NULL) {
 		struct igmp_ifinfo *igi;
 
 		igi = inm->inm_igi;
 		KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp));
 
 		IGMPSTAT_INC(igps_rcv_ourreports);
 
 		/*
 		 * If we are in IGMPv3 host mode, do not allow the
 		 * other host's IGMPv1 report to suppress our reports
 		 * unless explicitly configured to do so.
 		 */
 		if (igi->igi_version == IGMP_VERSION_3) {
 			if (V_igmp_legacysupp)
 				igmp_v3_suppress_group_record(inm);
 			goto out_locked;
 		}
 
 		inm->inm_timer = 0;
 
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 			break;
 		case IGMP_REPORTING_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			CTR3(KTR_IGMPV3,
 			    "report suppressed for %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 		case IGMP_LAZY_MEMBER:
 			inm->inm_state = IGMP_LAZY_MEMBER;
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 
 out_locked:
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 void
 igmp_input(struct mbuf *m, int off)
 {
 	int iphlen;
 	struct ifnet *ifp;
 	struct igmp *igmp;
 	struct ip *ip;
 	int igmplen;
 	int minlen;
 	int queryver;
 
 	CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, m, off);
 
 	ifp = m->m_pkthdr.rcvif;
 
 	IGMPSTAT_INC(igps_rcv_total);
 
 	ip = mtod(m, struct ip *);
 	iphlen = off;
 	igmplen = ip->ip_len;
 
 	/*
 	 * Validate lengths.
 	 */
 	if (igmplen < IGMP_MINLEN) {
 		IGMPSTAT_INC(igps_rcv_tooshort);
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Always pullup to the minimum size for v1/v2 or v3
 	 * to amortize calls to m_pullup().
 	 */
 	minlen = iphlen;
 	if (igmplen >= IGMP_V3_QUERY_MINLEN)
 		minlen += IGMP_V3_QUERY_MINLEN;
 	else
 		minlen += IGMP_MINLEN;
 	if ((m->m_flags & M_EXT || m->m_len < minlen) &&
 	    (m = m_pullup(m, minlen)) == 0) {
 		IGMPSTAT_INC(igps_rcv_tooshort);
 		return;
 	}
 	ip = mtod(m, struct ip *);
 
 	/*
 	 * Validate checksum.
 	 */
 	m->m_data += iphlen;
 	m->m_len -= iphlen;
 	igmp = mtod(m, struct igmp *);
 	if (in_cksum(m, igmplen)) {
 		IGMPSTAT_INC(igps_rcv_badsum);
 		m_freem(m);
 		return;
 	}
 	m->m_data -= iphlen;
 	m->m_len += iphlen;
 
 	/*
 	 * IGMP control traffic is link-scope, and must have a TTL of 1.
 	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
 	 * probe packets may come from beyond the LAN.
 	 */
 	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
 		IGMPSTAT_INC(igps_rcv_badttl);
 		m_freem(m);
 		return;
 	}
 
 	switch (igmp->igmp_type) {
 	case IGMP_HOST_MEMBERSHIP_QUERY:
 		if (igmplen == IGMP_MINLEN) {
 			if (igmp->igmp_code == 0)
 				queryver = IGMP_VERSION_1;
 			else
 				queryver = IGMP_VERSION_2;
 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
 			queryver = IGMP_VERSION_3;
 		} else {
 			IGMPSTAT_INC(igps_rcv_tooshort);
 			m_freem(m);
 			return;
 		}
 
 		switch (queryver) {
 		case IGMP_VERSION_1:
 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
 			if (!V_igmp_v1enable)
 				break;
 			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
 				m_freem(m);
 				return;
 			}
 			break;
 
 		case IGMP_VERSION_2:
 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
 			if (!V_igmp_v2enable)
 				break;
 			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
 				m_freem(m);
 				return;
 			}
 			break;
 
 		case IGMP_VERSION_3: {
 				struct igmpv3 *igmpv3;
 				uint16_t igmpv3len;
 				uint16_t nsrc;
-				int srclen;
 
 				IGMPSTAT_INC(igps_rcv_v3_queries);
 				igmpv3 = (struct igmpv3 *)igmp;
 				/*
 				 * Validate length based on source count.
 				 */
 				nsrc = ntohs(igmpv3->igmp_numsrc);
-				srclen = sizeof(struct in_addr) * nsrc;
-				if (nsrc * sizeof(in_addr_t) > srclen) {
+				if (nsrc * sizeof(in_addr_t) >
+				    UINT16_MAX - iphlen - IGMP_V3_QUERY_MINLEN) {
 					IGMPSTAT_INC(igps_rcv_tooshort);
 					return;
 				}
 				/*
 				 * m_pullup() may modify m, so pullup in
 				 * this scope.
 				 */
 				igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN +
-				    srclen;
+				    sizeof(struct in_addr) * nsrc;
 				if ((m->m_flags & M_EXT ||
 				     m->m_len < igmpv3len) &&
 				    (m = m_pullup(m, igmpv3len)) == NULL) {
 					IGMPSTAT_INC(igps_rcv_tooshort);
 					return;
 				}
 				igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *)
 				    + iphlen);
 				if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
 					m_freem(m);
 					return;
 				}
 			}
 			break;
 		}
 		break;
 
 	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
 		if (!V_igmp_v1enable)
 			break;
 		if (igmp_input_v1_report(ifp, ip, igmp) != 0) {
 			m_freem(m);
 			return;
 		}
 		break;
 
 	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
 		if (!V_igmp_v2enable)
 			break;
 		if (!ip_checkrouteralert(m))
 			IGMPSTAT_INC(igps_rcv_nora);
 		if (igmp_input_v2_report(ifp, ip, igmp) != 0) {
 			m_freem(m);
 			return;
 		}
 		break;
 
 	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
 		/*
 		 * Hosts do not need to process IGMPv3 membership reports,
 		 * as report suppression is no longer required.
 		 */
 		if (!ip_checkrouteralert(m))
 			IGMPSTAT_INC(igps_rcv_nora);
 		break;
 
 	default:
 		break;
 	}
 
 	/*
 	 * Pass all valid IGMP packets up to any process(es) listening on a
 	 * raw IGMP socket.
 	 */
 	rip_input(m, off);
 }
 
 
 /*
  * Fast timeout handler (global).
  * VIMAGE: Timeout handlers are expected to service all vimages.
  */
 void
 igmp_fasttimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		igmp_fasttimo_vnet();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Fast timeout handler (per-vnet).
  * Sends are shuffled off to a netisr to deal with Giant.
  *
  * VIMAGE: Assume caller has set up our curvnet.
  */
 static void
 igmp_fasttimo_vnet(void)
 {
 	struct ifqueue		 scq;	/* State-change packets */
 	struct ifqueue		 qrq;	/* Query response packets */
 	struct ifnet		*ifp;
 	struct igmp_ifinfo	*igi;
 	struct ifmultiaddr	*ifma;
 	struct in_multi		*inm;
 	int			 loop, uri_fasthz;
 
 	loop = 0;
 	uri_fasthz = 0;
 
 	/*
 	 * Quick check to see if any work needs to be done, in order to
 	 * minimize the overhead of fasttimo processing.
 	 * SMPng: XXX Unlocked reads.
 	 */
 	if (!V_current_state_timers_running &&
 	    !V_interface_timers_running &&
 	    !V_state_change_timers_running)
 		return;
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	/*
 	 * IGMPv3 General Query response timer processing.
 	 */
 	if (V_interface_timers_running) {
 		CTR1(KTR_IGMPV3, "%s: interface timers running", __func__);
 
 		V_interface_timers_running = 0;
 		LIST_FOREACH(igi, &V_igi_head, igi_link) {
 			if (igi->igi_v3_timer == 0) {
 				/* Do nothing. */
 			} else if (--igi->igi_v3_timer == 0) {
 				igmp_v3_dispatch_general_query(igi);
 			} else {
 				V_interface_timers_running = 1;
 			}
 		}
 	}
 
 	if (!V_current_state_timers_running &&
 	    !V_state_change_timers_running)
 		goto out_locked;
 
 	V_current_state_timers_running = 0;
 	V_state_change_timers_running = 0;
 
 	CTR1(KTR_IGMPV3, "%s: state change timers running", __func__);
 
 	/*
 	 * IGMPv1/v2/v3 host report and state-change timer processing.
 	 * Note: Processing a v3 group timer may remove a node.
 	 */
 	LIST_FOREACH(igi, &V_igi_head, igi_link) {
 		ifp = igi->igi_ifp;
 
 		if (igi->igi_version == IGMP_VERSION_3) {
 			loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
 			uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri *
 			    PR_FASTHZ);
 
 			memset(&qrq, 0, sizeof(struct ifqueue));
 			IFQ_SET_MAXLEN(&qrq, IGMP_MAX_G_GS_PACKETS);
 
 			memset(&scq, 0, sizeof(struct ifqueue));
 			IFQ_SET_MAXLEN(&scq, IGMP_MAX_STATE_CHANGE_PACKETS);
 		}
 
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_INET ||
 			    ifma->ifma_protospec == NULL)
 				continue;
 			inm = (struct in_multi *)ifma->ifma_protospec;
 			switch (igi->igi_version) {
 			case IGMP_VERSION_1:
 			case IGMP_VERSION_2:
 				igmp_v1v2_process_group_timer(inm,
 				    igi->igi_version);
 				break;
 			case IGMP_VERSION_3:
 				igmp_v3_process_group_timers(igi, &qrq,
 				    &scq, inm, uri_fasthz);
 				break;
 			}
 		}
 		IF_ADDR_RUNLOCK(ifp);
 
 		if (igi->igi_version == IGMP_VERSION_3) {
 			struct in_multi		*tinm;
 
 			igmp_dispatch_queue(&qrq, 0, loop);
 			igmp_dispatch_queue(&scq, 0, loop);
 
 			/*
 			 * Free the in_multi reference(s) for this
 			 * IGMP lifecycle.
 			 */
 			SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead,
 			    inm_nrele, tinm) {
 				SLIST_REMOVE_HEAD(&igi->igi_relinmhead,
 				    inm_nrele);
 				inm_release_locked(inm);
 			}
 		}
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 }
 
 /*
  * Update host report group timer for IGMPv1/v2.
  * Will update the global pending timer flags.
  */
 static void
 igmp_v1v2_process_group_timer(struct in_multi *inm, const int version)
 {
 	int report_timer_expired;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	if (inm->inm_timer == 0) {
 		report_timer_expired = 0;
 	} else if (--inm->inm_timer == 0) {
 		report_timer_expired = 1;
 	} else {
 		V_current_state_timers_running = 1;
 		return;
 	}
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 		break;
 	case IGMP_REPORTING_MEMBER:
 		if (report_timer_expired) {
 			inm->inm_state = IGMP_IDLE_MEMBER;
 			(void)igmp_v1v2_queue_report(inm,
 			    (version == IGMP_VERSION_2) ?
 			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
 			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
 		}
 		break;
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		break;
 	}
 }
 
 /*
  * Update a group's timers for IGMPv3.
  * Will update the global pending timer flags.
  * Note: Unlocked read from igi.
  */
 static void
 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
     struct ifqueue *qrq, struct ifqueue *scq,
     struct in_multi *inm, const int uri_fasthz)
 {
 	int query_response_timer_expired;
 	int state_change_retransmit_timer_expired;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	query_response_timer_expired = 0;
 	state_change_retransmit_timer_expired = 0;
 
 	/*
 	 * During a transition from v1/v2 compatibility mode back to v3,
 	 * a group record in REPORTING state may still have its group
 	 * timer active. This is a no-op in this function; it is easier
 	 * to deal with it here than to complicate the slow-timeout path.
 	 */
 	if (inm->inm_timer == 0) {
 		query_response_timer_expired = 0;
 	} else if (--inm->inm_timer == 0) {
 		query_response_timer_expired = 1;
 	} else {
 		V_current_state_timers_running = 1;
 	}
 
 	if (inm->inm_sctimer == 0) {
 		state_change_retransmit_timer_expired = 0;
 	} else if (--inm->inm_sctimer == 0) {
 		state_change_retransmit_timer_expired = 1;
 	} else {
 		V_state_change_timers_running = 1;
 	}
 
 	/* We are in fasttimo, so be quick about it. */
 	if (!state_change_retransmit_timer_expired &&
 	    !query_response_timer_expired)
 		return;
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 		break;
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 		/*
 		 * Respond to a previously pending Group-Specific
 		 * or Group-and-Source-Specific query by enqueueing
 		 * the appropriate Current-State report for
 		 * immediate transmission.
 		 */
 		if (query_response_timer_expired) {
 			int retval;
 
 			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
 			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
 			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 			    __func__, retval);
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			/* XXX Clear recorded sources for next time. */
 			inm_clear_recorded(inm);
 		}
 		/* FALLTHROUGH */
 	case IGMP_REPORTING_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		if (state_change_retransmit_timer_expired) {
 			/*
 			 * State-change retransmission timer fired.
 			 * If there are any further pending retransmissions,
 			 * set the global pending state-change flag, and
 			 * reset the timer.
 			 */
 			if (--inm->inm_scrv > 0) {
 				inm->inm_sctimer = uri_fasthz;
 				V_state_change_timers_running = 1;
 			}
 			/*
 			 * Retransmit the previously computed state-change
 			 * report. If there are no further pending
 			 * retransmissions, the mbuf queue will be consumed.
 			 * Update T0 state to T1 as we have now sent
 			 * a state-change.
 			 */
 			(void)igmp_v3_merge_state_changes(inm, scq);
 
 			inm_commit(inm);
 			CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 			    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 
 			/*
 			 * If we are leaving the group for good, make sure
 			 * we release IGMP's reference to it.
 			 * This release must be deferred using a SLIST,
 			 * as we are called from a loop which traverses
 			 * the in_ifmultiaddr TAILQ.
 			 */
 			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
 			    inm->inm_scrv == 0) {
 				inm->inm_state = IGMP_NOT_MEMBER;
 				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
 				    inm, inm_nrele);
 			}
 		}
 		break;
 	}
 }
 
 
 /*
  * Suppress a group's pending response to a group or source/group query.
  *
  * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
  * Do NOT update ST1/ST0 as this operation merely suppresses
  * the currently pending group record.
  * Do NOT suppress the response to a general query. It is possible but
  * it would require adding another state or flag.
  */
 static void
 igmp_v3_suppress_group_record(struct in_multi *inm)
 {
 
 	IN_MULTI_LOCK_ASSERT();
 
 	KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3,
 		("%s: not IGMPv3 mode on link", __func__));
 
 	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
 	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER)
 		return;
 
 	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
 		inm_clear_recorded(inm);
 
 	inm->inm_timer = 0;
 	inm->inm_state = IGMP_REPORTING_MEMBER;
 }
 
 /*
  * Switch to a different IGMP version on the given interface,
  * as per Section 7.2.1.
  */
 static void
 igmp_set_version(struct igmp_ifinfo *igi, const int version)
 {
 	int old_version_timer;
 
 	IGMP_LOCK_ASSERT();
 
 	CTR4(KTR_IGMPV3, "%s: switching to v%d on ifp %p(%s)", __func__,
 	    version, igi->igi_ifp, igi->igi_ifp->if_xname);
 
 	if (version == IGMP_VERSION_1 || version == IGMP_VERSION_2) {
 		/*
 		 * Compute the "Older Version Querier Present" timer as per
 		 * Section 8.12.
 		 */
 		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
 		old_version_timer *= PR_SLOWHZ;
 
 		if (version == IGMP_VERSION_1) {
 			igi->igi_v1_timer = old_version_timer;
 			igi->igi_v2_timer = 0;
 		} else if (version == IGMP_VERSION_2) {
 			igi->igi_v1_timer = 0;
 			igi->igi_v2_timer = old_version_timer;
 		}
 	}
 
 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
 		if (igi->igi_version != IGMP_VERSION_2) {
 			igi->igi_version = IGMP_VERSION_2;
 			igmp_v3_cancel_link_timers(igi);
 		}
 	} else if (igi->igi_v1_timer > 0) {
 		if (igi->igi_version != IGMP_VERSION_1) {
 			igi->igi_version = IGMP_VERSION_1;
 			igmp_v3_cancel_link_timers(igi);
 		}
 	}
 }
 
 /*
  * Cancel pending IGMPv3 timers for the given link and all groups
  * joined on it; state-change, general-query, and group-query timers.
  *
  * Only ever called on a transition from v3 to Compatibility mode. Kill
  * the timers stone dead (this may be expensive for large N groups), they
  * will be restarted if Compatibility Mode deems that they must be due to
  * query processing.
  */
 static void
 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
 {
 	struct ifmultiaddr	*ifma;
 	struct ifnet		*ifp;
 	struct in_multi		*inm, *tinm;
 
 	CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__,
 	    igi->igi_ifp, igi->igi_ifp->if_xname);
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	/*
 	 * Stop the v3 General Query Response on this link stone dead.
 	 * If fasttimo is woken up due to V_interface_timers_running,
 	 * the flag will be cleared if there are no pending link timers.
 	 */
 	igi->igi_v3_timer = 0;
 
 	/*
 	 * Now clear the current-state and state-change report timers
 	 * for all memberships scoped to this link.
 	 */
 	ifp = igi->igi_ifp;
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			/*
 			 * These states are either not relevant in v3 mode,
 			 * or are unreported. Do nothing.
 			 */
 			break;
 		case IGMP_LEAVING_MEMBER:
 			/*
 			 * If we are leaving the group and switching to
 			 * compatibility mode, we need to release the final
 			 * reference held for issuing the INCLUDE {}, and
 			 * transition to REPORTING to ensure the host leave
 			 * message is sent upstream to the old querier --
 			 * transition to NOT would lose the leave and race.
 			 */
 			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
 			/* FALLTHROUGH */
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 			inm_clear_recorded(inm);
 			/* FALLTHROUGH */
 		case IGMP_REPORTING_MEMBER:
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			break;
 		}
 		/*
 		 * Always clear state-change and group report timers.
 		 * Free any pending IGMPv3 state-change records.
 		 */
 		inm->inm_sctimer = 0;
 		inm->inm_timer = 0;
 		_IF_DRAIN(&inm->inm_scq);
 	}
 	IF_ADDR_RUNLOCK(ifp);
 	SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, tinm) {
 		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
 		inm_release_locked(inm);
 	}
 }
 
 /*
  * Update the Older Version Querier Present timers for a link.
  * See Section 7.2.1 of RFC 3376.
  */
 static void
 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
 {
 
 	IGMP_LOCK_ASSERT();
 
 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
 		/*
 		 * IGMPv1 and IGMPv2 Querier Present timers expired.
 		 *
 		 * Revert to IGMPv3.
 		 */
 		if (igi->igi_version != IGMP_VERSION_3) {
 			CTR5(KTR_IGMPV3,
 			    "%s: transition from v%d -> v%d on %p(%s)",
 			    __func__, igi->igi_version, IGMP_VERSION_3,
 			    igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_version = IGMP_VERSION_3;
 		}
 	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
 		/*
 		 * IGMPv1 Querier Present timer expired,
 		 * IGMPv2 Querier Present timer running.
 		 * If IGMPv2 was disabled since last timeout,
 		 * revert to IGMPv3.
 		 * If IGMPv2 is enabled, revert to IGMPv2.
 		 */
 		if (!V_igmp_v2enable) {
 			CTR5(KTR_IGMPV3,
 			    "%s: transition from v%d -> v%d on %p(%s)",
 			    __func__, igi->igi_version, IGMP_VERSION_3,
 			    igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_v2_timer = 0;
 			igi->igi_version = IGMP_VERSION_3;
 		} else {
 			--igi->igi_v2_timer;
 			if (igi->igi_version != IGMP_VERSION_2) {
 				CTR5(KTR_IGMPV3,
 				    "%s: transition from v%d -> v%d on %p(%s)",
 				    __func__, igi->igi_version, IGMP_VERSION_2,
 				    igi->igi_ifp, igi->igi_ifp->if_xname);
 				igi->igi_version = IGMP_VERSION_2;
 			}
 		}
 	} else if (igi->igi_v1_timer > 0) {
 		/*
 		 * IGMPv1 Querier Present timer running.
 		 * Stop IGMPv2 timer if running.
 		 *
 		 * If IGMPv1 was disabled since last timeout,
 		 * revert to IGMPv3.
 		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
 		 */
 		if (!V_igmp_v1enable) {
 			CTR5(KTR_IGMPV3,
 			    "%s: transition from v%d -> v%d on %p(%s)",
 			    __func__, igi->igi_version, IGMP_VERSION_3,
 			    igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_v1_timer = 0;
 			igi->igi_version = IGMP_VERSION_3;
 		} else {
 			--igi->igi_v1_timer;
 		}
 		if (igi->igi_v2_timer > 0) {
 			CTR3(KTR_IGMPV3,
 			    "%s: cancel v2 timer on %p(%s)",
 			    __func__, igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_v2_timer = 0;
 		}
 	}
 }
 
 /*
  * Global slowtimo handler.
  * VIMAGE: Timeout handlers are expected to service all vimages.
  */
 void
 igmp_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		igmp_slowtimo_vnet();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Per-vnet slowtimo handler.
  */
 static void
 igmp_slowtimo_vnet(void)
 {
 	struct igmp_ifinfo *igi;
 
 	IGMP_LOCK();
 
 	LIST_FOREACH(igi, &V_igi_head, igi_link) {
 		igmp_v1v2_process_querier_timers(igi);
 	}
 
 	IGMP_UNLOCK();
 }
 
 /*
  * Dispatch an IGMPv1/v2 host report or leave message.
  * These are always small enough to fit inside a single mbuf.
  */
 static int
 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
 {
 	struct ifnet		*ifp;
 	struct igmp		*igmp;
 	struct ip		*ip;
 	struct mbuf		*m;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	ifp = inm->inm_ifp;
 
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOMEM);
 	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
 
 	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
 
 	m->m_data += sizeof(struct ip);
 	m->m_len = sizeof(struct igmp);
 
 	igmp = mtod(m, struct igmp *);
 	igmp->igmp_type = type;
 	igmp->igmp_code = 0;
 	igmp->igmp_group = inm->inm_addr;
 	igmp->igmp_cksum = 0;
 	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
 
 	m->m_data -= sizeof(struct ip);
 	m->m_len += sizeof(struct ip);
 
 	ip = mtod(m, struct ip *);
 	ip->ip_tos = 0;
 	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
 	ip->ip_off = 0;
 	ip->ip_p = IPPROTO_IGMP;
 	ip->ip_src.s_addr = INADDR_ANY;
 
 	if (type == IGMP_HOST_LEAVE_MESSAGE)
 		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
 	else
 		ip->ip_dst = inm->inm_addr;
 
 	igmp_save_context(m, ifp);
 
 	m->m_flags |= M_IGMPV2;
 	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK)
 		m->m_flags |= M_IGMP_LOOP;
 
 	CTR2(KTR_IGMPV3, "%s: netisr_dispatch(NETISR_IGMP, %p)", __func__, m);
 	netisr_dispatch(NETISR_IGMP, m);
 
 	return (0);
 }
 
 /*
  * Process a state change from the upper layer for the given IPv4 group.
  *
  * Each socket holds a reference on the in_multi in its own ip_moptions.
  * The socket layer will have made the necessary updates to.the group
  * state, it is now up to IGMP to issue a state change report if there
  * has been any change between T0 (when the last state-change was issued)
  * and T1 (now).
  *
  * We use the IGMPv3 state machine at group level. The IGMP module
  * however makes the decision as to which IGMP protocol version to speak.
  * A state change *from* INCLUDE {} always means an initial join.
  * A state change *to* INCLUDE {} always means a final leave.
  *
  * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
  * save ourselves a bunch of work; any exclusive mode groups need not
  * compute source filter lists.
  *
  * VIMAGE: curvnet should have been set by caller, as this routine
  * is called from the socket option handlers.
  */
 int
 igmp_change_state(struct in_multi *inm)
 {
 	struct igmp_ifinfo *igi;
 	struct ifnet *ifp;
 	int error;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	error = 0;
 
 	/*
 	 * Try to detect if the upper layer just asked us to change state
 	 * for an interface which has now gone away.
 	 */
 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
 	ifp = inm->inm_ifma->ifma_ifp;
 	/*
 	 * Sanity check that netinet's notion of ifp is the
 	 * same as net's.
 	 */
 	KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
 
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	/*
 	 * If we detect a state transition to or from MCAST_UNDEFINED
 	 * for this group, then we are starting or finishing an IGMP
 	 * life cycle for this group.
 	 */
 	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
 		CTR3(KTR_IGMPV3, "%s: inm transition %d -> %d", __func__,
 		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode);
 		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
 			CTR1(KTR_IGMPV3, "%s: initial join", __func__);
 			error = igmp_initial_join(inm, igi);
 			goto out_locked;
 		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
 			CTR1(KTR_IGMPV3, "%s: final leave", __func__);
 			igmp_final_leave(inm, igi);
 			goto out_locked;
 		}
 	} else {
 		CTR1(KTR_IGMPV3, "%s: filter set change", __func__);
 	}
 
 	error = igmp_handle_state_change(inm, igi);
 
 out_locked:
 	IGMP_UNLOCK();
 	return (error);
 }
 
 /*
  * Perform the initial join for an IGMP group.
  *
  * When joining a group:
  *  If the group should have its IGMP traffic suppressed, do nothing.
  *  IGMPv1 starts sending IGMPv1 host membership reports.
  *  IGMPv2 starts sending IGMPv2 host membership reports.
  *  IGMPv3 will schedule an IGMPv3 state-change report containing the
  *  initial state of the membership.
  */
 static int
 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi)
 {
 	struct ifnet		*ifp;
 	struct ifqueue		*ifq;
 	int			 error, retval, syncstates;
 
 	CTR4(KTR_IGMPV3, "%s: initial join %s on ifp %p(%s)",
 	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
 	    inm->inm_ifp->if_xname);
 
 	error = 0;
 	syncstates = 1;
 
 	ifp = inm->inm_ifp;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
 
 	/*
 	 * Groups joined on loopback or marked as 'not reported',
 	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
 	 * are never reported in any IGMP protocol exchanges.
 	 * All other groups enter the appropriate IGMP state machine
 	 * for the version in use on this link.
 	 * A link marked as IGIF_SILENT causes IGMP to be completely
 	 * disabled for the link.
 	 */
 	if ((ifp->if_flags & IFF_LOOPBACK) ||
 	    (igi->igi_flags & IGIF_SILENT) ||
 	    !igmp_isgroupreported(inm->inm_addr)) {
 		CTR1(KTR_IGMPV3,
 "%s: not kicking state machine for silent group", __func__);
 		inm->inm_state = IGMP_SILENT_MEMBER;
 		inm->inm_timer = 0;
 	} else {
 		/*
 		 * Deal with overlapping in_multi lifecycle.
 		 * If this group was LEAVING, then make sure
 		 * we drop the reference we picked up to keep the
 		 * group around for the final INCLUDE {} enqueue.
 		 */
 		if (igi->igi_version == IGMP_VERSION_3 &&
 		    inm->inm_state == IGMP_LEAVING_MEMBER)
 			inm_release_locked(inm);
 
 		inm->inm_state = IGMP_REPORTING_MEMBER;
 
 		switch (igi->igi_version) {
 		case IGMP_VERSION_1:
 		case IGMP_VERSION_2:
 			inm->inm_state = IGMP_IDLE_MEMBER;
 			error = igmp_v1v2_queue_report(inm,
 			    (igi->igi_version == IGMP_VERSION_2) ?
 			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
 			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
 			if (error == 0) {
 				inm->inm_timer = IGMP_RANDOM_DELAY(
 				    IGMP_V1V2_MAX_RI * PR_FASTHZ);
 				V_current_state_timers_running = 1;
 			}
 			break;
 
 		case IGMP_VERSION_3:
 			/*
 			 * Defer update of T0 to T1, until the first copy
 			 * of the state change has been transmitted.
 			 */
 			syncstates = 0;
 
 			/*
 			 * Immediately enqueue a State-Change Report for
 			 * this interface, freeing any previous reports.
 			 * Don't kick the timers if there is nothing to do,
 			 * or if an error occurred.
 			 */
 			ifq = &inm->inm_scq;
 			_IF_DRAIN(ifq);
 			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
 			    0, 0);
 			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 			    __func__, retval);
 			if (retval <= 0) {
 				error = retval * -1;
 				break;
 			}
 
 			/*
 			 * Schedule transmission of pending state-change
 			 * report up to RV times for this link. The timer
 			 * will fire at the next igmp_fasttimo (~200ms),
 			 * giving us an opportunity to merge the reports.
 			 */
 			if (igi->igi_flags & IGIF_LOOPBACK) {
 				inm->inm_scrv = 1;
 			} else {
 				KASSERT(igi->igi_rv > 1,
 				   ("%s: invalid robustness %d", __func__,
 				    igi->igi_rv));
 				inm->inm_scrv = igi->igi_rv;
 			}
 			inm->inm_sctimer = 1;
 			V_state_change_timers_running = 1;
 
 			error = 0;
 			break;
 		}
 	}
 
 	/*
 	 * Only update the T0 state if state change is atomic,
 	 * i.e. we don't need to wait for a timer to fire before we
 	 * can consider the state change to have been communicated.
 	 */
 	if (syncstates) {
 		inm_commit(inm);
 		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 	}
 
 	return (error);
 }
 
 /*
  * Issue an intermediate state change during the IGMP life-cycle.
  */
 static int
 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi)
 {
 	struct ifnet		*ifp;
 	int			 retval;
 
 	CTR4(KTR_IGMPV3, "%s: state change for %s on ifp %p(%s)",
 	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
 	    inm->inm_ifp->if_xname);
 
 	ifp = inm->inm_ifp;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
 
 	if ((ifp->if_flags & IFF_LOOPBACK) ||
 	    (igi->igi_flags & IGIF_SILENT) ||
 	    !igmp_isgroupreported(inm->inm_addr) ||
 	    (igi->igi_version != IGMP_VERSION_3)) {
 		if (!igmp_isgroupreported(inm->inm_addr)) {
 			CTR1(KTR_IGMPV3,
 "%s: not kicking state machine for silent group", __func__);
 		}
 		CTR1(KTR_IGMPV3, "%s: nothing to do", __func__);
 		inm_commit(inm);
 		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 		return (0);
 	}
 
 	_IF_DRAIN(&inm->inm_scq);
 
 	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
 	CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval);
 	if (retval <= 0)
 		return (-retval);
 
 	/*
 	 * If record(s) were enqueued, start the state-change
 	 * report timer for this group.
 	 */
 	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv);
 	inm->inm_sctimer = 1;
 	V_state_change_timers_running = 1;
 
 	return (0);
 }
 
 /*
  * Perform the final leave for an IGMP group.
  *
  * When leaving a group:
  *  IGMPv1 does nothing.
  *  IGMPv2 sends a host leave message, if and only if we are the reporter.
  *  IGMPv3 enqueues a state-change report containing a transition
  *  to INCLUDE {} for immediate transmission.
  */
 static void
 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi)
 {
 	int syncstates;
 
 	syncstates = 1;
 
 	CTR4(KTR_IGMPV3, "%s: final leave %s on ifp %p(%s)",
 	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
 	    inm->inm_ifp->if_xname);
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		/* Already leaving or left; do nothing. */
 		CTR1(KTR_IGMPV3,
 "%s: not kicking state machine for silent group", __func__);
 		break;
 	case IGMP_REPORTING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 		if (igi->igi_version == IGMP_VERSION_2) {
 #ifdef INVARIANTS
 			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
 			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
 			panic("%s: IGMPv3 state reached, not IGMPv3 mode",
 			     __func__);
 #endif
 			igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE);
 			inm->inm_state = IGMP_NOT_MEMBER;
 		} else if (igi->igi_version == IGMP_VERSION_3) {
 			/*
 			 * Stop group timer and all pending reports.
 			 * Immediately enqueue a state-change report
 			 * TO_IN {} to be sent on the next fast timeout,
 			 * giving us an opportunity to merge reports.
 			 */
 			_IF_DRAIN(&inm->inm_scq);
 			inm->inm_timer = 0;
 			if (igi->igi_flags & IGIF_LOOPBACK) {
 				inm->inm_scrv = 1;
 			} else {
 				inm->inm_scrv = igi->igi_rv;
 			}
 			CTR4(KTR_IGMPV3, "%s: Leaving %s/%s with %d "
 			    "pending retransmissions.", __func__,
 			    inet_ntoa(inm->inm_addr),
 			    inm->inm_ifp->if_xname, inm->inm_scrv);
 			if (inm->inm_scrv == 0) {
 				inm->inm_state = IGMP_NOT_MEMBER;
 				inm->inm_sctimer = 0;
 			} else {
 				int retval;
 
 				inm_acquire_locked(inm);
 
 				retval = igmp_v3_enqueue_group_record(
 				    &inm->inm_scq, inm, 1, 0, 0);
 				KASSERT(retval != 0,
 				    ("%s: enqueue record = %d", __func__,
 				     retval));
 
 				inm->inm_state = IGMP_LEAVING_MEMBER;
 				inm->inm_sctimer = 1;
 				V_state_change_timers_running = 1;
 				syncstates = 0;
 			}
 			break;
 		}
 		break;
 	case IGMP_LAZY_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 		/* Our reports are suppressed; do nothing. */
 		break;
 	}
 
 	if (syncstates) {
 		inm_commit(inm);
 		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
 		CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for %s/%s",
 		    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 	}
 }
 
 /*
  * Enqueue an IGMPv3 group record to the given output queue.
  *
  * XXX This function could do with having the allocation code
  * split out, and the multiple-tree-walks coalesced into a single
  * routine as has been done in igmp_v3_enqueue_filter_change().
  *
  * If is_state_change is zero, a current-state record is appended.
  * If is_state_change is non-zero, a state-change report is appended.
  *
  * If is_group_query is non-zero, an mbuf packet chain is allocated.
  * If is_group_query is zero, and if there is a packet with free space
  * at the tail of the queue, it will be appended to providing there
  * is enough free space.
  * Otherwise a new mbuf packet chain is allocated.
  *
  * If is_source_query is non-zero, each source is checked to see if
  * it was recorded for a Group-Source query, and will be omitted if
  * it is not both in-mode and recorded.
  *
  * The function will attempt to allocate leading space in the packet
  * for the IP/IGMP header to be prepended without fragmenting the chain.
  *
  * If successful the size of all data appended to the queue is returned,
  * otherwise an error code less than zero is returned, or zero if
  * no record(s) were appended.
  */
 static int
 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
     const int is_state_change, const int is_group_query,
     const int is_source_query)
 {
 	struct igmp_grouprec	 ig;
 	struct igmp_grouprec	*pig;
 	struct ifnet		*ifp;
 	struct ip_msource	*ims, *nims;
 	struct mbuf		*m0, *m, *md;
 	int			 error, is_filter_list_change;
 	int			 minrec0len, m0srcs, msrcs, nbytes, off;
 	int			 record_has_sources;
 	int			 now;
 	int			 type;
 	in_addr_t		 naddr;
 	uint8_t			 mode;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	error = 0;
 	ifp = inm->inm_ifp;
 	is_filter_list_change = 0;
 	m = NULL;
 	m0 = NULL;
 	m0srcs = 0;
 	msrcs = 0;
 	nbytes = 0;
 	nims = NULL;
 	record_has_sources = 1;
 	pig = NULL;
 	type = IGMP_DO_NOTHING;
 	mode = inm->inm_st[1].iss_fmode;
 
 	/*
 	 * If we did not transition out of ASM mode during t0->t1,
 	 * and there are no source nodes to process, we can skip
 	 * the generation of source records.
 	 */
 	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
 	    inm->inm_nsrc == 0)
 		record_has_sources = 0;
 
 	if (is_state_change) {
 		/*
 		 * Queue a state change record.
 		 * If the mode did not change, and there are non-ASM
 		 * listeners or source filters present,
 		 * we potentially need to issue two records for the group.
 		 * If we are transitioning to MCAST_UNDEFINED, we need
 		 * not send any sources.
 		 * If there are ASM listeners, and there was no filter
 		 * mode transition of any kind, do nothing.
 		 */
 		if (mode != inm->inm_st[0].iss_fmode) {
 			if (mode == MCAST_EXCLUDE) {
 				CTR1(KTR_IGMPV3, "%s: change to EXCLUDE",
 				    __func__);
 				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
 			} else {
 				CTR1(KTR_IGMPV3, "%s: change to INCLUDE",
 				    __func__);
 				type = IGMP_CHANGE_TO_INCLUDE_MODE;
 				if (mode == MCAST_UNDEFINED)
 					record_has_sources = 0;
 			}
 		} else {
 			if (record_has_sources) {
 				is_filter_list_change = 1;
 			} else {
 				type = IGMP_DO_NOTHING;
 			}
 		}
 	} else {
 		/*
 		 * Queue a current state record.
 		 */
 		if (mode == MCAST_EXCLUDE) {
 			type = IGMP_MODE_IS_EXCLUDE;
 		} else if (mode == MCAST_INCLUDE) {
 			type = IGMP_MODE_IS_INCLUDE;
 			KASSERT(inm->inm_st[1].iss_asm == 0,
 			    ("%s: inm %p is INCLUDE but ASM count is %d",
 			     __func__, inm, inm->inm_st[1].iss_asm));
 		}
 	}
 
 	/*
 	 * Generate the filter list changes using a separate function.
 	 */
 	if (is_filter_list_change)
 		return (igmp_v3_enqueue_filter_change(ifq, inm));
 
 	if (type == IGMP_DO_NOTHING) {
 		CTR3(KTR_IGMPV3, "%s: nothing to do for %s/%s",
 		    __func__, inet_ntoa(inm->inm_addr),
 		    inm->inm_ifp->if_xname);
 		return (0);
 	}
 
 	/*
 	 * If any sources are present, we must be able to fit at least
 	 * one in the trailing space of the tail packet's mbuf,
 	 * ideally more.
 	 */
 	minrec0len = sizeof(struct igmp_grouprec);
 	if (record_has_sources)
 		minrec0len += sizeof(in_addr_t);
 
 	CTR4(KTR_IGMPV3, "%s: queueing %s for %s/%s", __func__,
 	    igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr),
 	    inm->inm_ifp->if_xname);
 
 	/*
 	 * Check if we have a packet in the tail of the queue for this
 	 * group into which the first group record for this group will fit.
 	 * Otherwise allocate a new packet.
 	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
 	 * Note: Group records for G/GSR query responses MUST be sent
 	 * in their own packet.
 	 */
 	m0 = ifq->ifq_tail;
 	if (!is_group_query &&
 	    m0 != NULL &&
 	    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
 	    (m0->m_pkthdr.len + minrec0len) <
 	     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
 		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
 			    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 		m = m0;
 		CTR1(KTR_IGMPV3, "%s: use existing packet", __func__);
 	} else {
 		if (_IF_QFULL(ifq)) {
 			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
 			return (-ENOMEM);
 		}
 		m = NULL;
 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 		if (!is_state_change && !is_group_query) {
 			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 			if (m)
 				m->m_data += IGMP_LEADINGSPACE;
 		}
 		if (m == NULL) {
 			m = m_gethdr(M_DONTWAIT, MT_DATA);
 			if (m)
 				MH_ALIGN(m, IGMP_LEADINGSPACE);
 		}
 		if (m == NULL)
 			return (-ENOMEM);
 
 		igmp_save_context(m, ifp);
 
 		CTR1(KTR_IGMPV3, "%s: allocated first packet", __func__);
 	}
 
 	/*
 	 * Append group record.
 	 * If we have sources, we don't know how many yet.
 	 */
 	ig.ig_type = type;
 	ig.ig_datalen = 0;
 	ig.ig_numsrc = 0;
 	ig.ig_group = inm->inm_addr;
 	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
 		if (m != m0)
 			m_freem(m);
 		CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
 		return (-ENOMEM);
 	}
 	nbytes += sizeof(struct igmp_grouprec);
 
 	/*
 	 * Append as many sources as will fit in the first packet.
 	 * If we are appending to a new packet, the chain allocation
 	 * may potentially use clusters; use m_getptr() in this case.
 	 * If we are appending to an existing packet, we need to obtain
 	 * a pointer to the group record after m_append(), in case a new
 	 * mbuf was allocated.
 	 * Only append sources which are in-mode at t1. If we are
 	 * transitioning to MCAST_UNDEFINED state on the group, do not
 	 * include source entries.
 	 * Only report recorded sources in our filter set when responding
 	 * to a group-source query.
 	 */
 	if (record_has_sources) {
 		if (m == m0) {
 			md = m_last(m);
 			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
 			    md->m_len - nbytes);
 		} else {
 			md = m_getptr(m, 0, &off);
 			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
 			    off);
 		}
 		msrcs = 0;
 		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
 			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
 			    inet_ntoa_haddr(ims->ims_haddr));
 			now = ims_get_mode(inm, ims, 1);
 			CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now);
 			if ((now != mode) ||
 			    (now == mode && mode == MCAST_UNDEFINED)) {
 				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
 				continue;
 			}
 			if (is_source_query && ims->ims_stp == 0) {
 				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
 				    __func__);
 				continue;
 			}
 			CTR1(KTR_IGMPV3, "%s: append node", __func__);
 			naddr = htonl(ims->ims_haddr);
 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
 				if (m != m0)
 					m_freem(m);
 				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
 				    __func__);
 				return (-ENOMEM);
 			}
 			nbytes += sizeof(in_addr_t);
 			++msrcs;
 			if (msrcs == m0srcs)
 				break;
 		}
 		CTR2(KTR_IGMPV3, "%s: msrcs is %d this packet", __func__,
 		    msrcs);
 		pig->ig_numsrc = htons(msrcs);
 		nbytes += (msrcs * sizeof(in_addr_t));
 	}
 
 	if (is_source_query && msrcs == 0) {
 		CTR1(KTR_IGMPV3, "%s: no recorded sources to report", __func__);
 		if (m != m0)
 			m_freem(m);
 		return (0);
 	}
 
 	/*
 	 * We are good to go with first packet.
 	 */
 	if (m != m0) {
 		CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__);
 		m->m_pkthdr.PH_vt.vt_nrecs = 1;
 		_IF_ENQUEUE(ifq, m);
 	} else
 		m->m_pkthdr.PH_vt.vt_nrecs++;
 
 	/*
 	 * No further work needed if no source list in packet(s).
 	 */
 	if (!record_has_sources)
 		return (nbytes);
 
 	/*
 	 * Whilst sources remain to be announced, we need to allocate
 	 * a new packet and fill out as many sources as will fit.
 	 * Always try for a cluster first.
 	 */
 	while (nims != NULL) {
 		if (_IF_QFULL(ifq)) {
 			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
 			return (-ENOMEM);
 		}
 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 		if (m)
 			m->m_data += IGMP_LEADINGSPACE;
 		if (m == NULL) {
 			m = m_gethdr(M_DONTWAIT, MT_DATA);
 			if (m)
 				MH_ALIGN(m, IGMP_LEADINGSPACE);
 		}
 		if (m == NULL)
 			return (-ENOMEM);
 		igmp_save_context(m, ifp);
 		md = m_getptr(m, 0, &off);
 		pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off);
 		CTR1(KTR_IGMPV3, "%s: allocated next packet", __func__);
 
 		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
 			if (m != m0)
 				m_freem(m);
 			CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
 			return (-ENOMEM);
 		}
 		m->m_pkthdr.PH_vt.vt_nrecs = 1;
 		nbytes += sizeof(struct igmp_grouprec);
 
 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 
 		msrcs = 0;
 		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
 			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
 			    inet_ntoa_haddr(ims->ims_haddr));
 			now = ims_get_mode(inm, ims, 1);
 			if ((now != mode) ||
 			    (now == mode && mode == MCAST_UNDEFINED)) {
 				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
 				continue;
 			}
 			if (is_source_query && ims->ims_stp == 0) {
 				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
 				    __func__);
 				continue;
 			}
 			CTR1(KTR_IGMPV3, "%s: append node", __func__);
 			naddr = htonl(ims->ims_haddr);
 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
 				if (m != m0)
 					m_freem(m);
 				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
 				    __func__);
 				return (-ENOMEM);
 			}
 			++msrcs;
 			if (msrcs == m0srcs)
 				break;
 		}
 		pig->ig_numsrc = htons(msrcs);
 		nbytes += (msrcs * sizeof(in_addr_t));
 
 		CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__);
 		_IF_ENQUEUE(ifq, m);
 	}
 
 	return (nbytes);
 }
 
 /*
  * Type used to mark record pass completion.
  * We exploit the fact we can cast to this easily from the
  * current filter modes on each ip_msource node.
  */
 typedef enum {
 	REC_NONE = 0x00,	/* MCAST_UNDEFINED */
 	REC_ALLOW = 0x01,	/* MCAST_INCLUDE */
 	REC_BLOCK = 0x02,	/* MCAST_EXCLUDE */
 	REC_FULL = REC_ALLOW | REC_BLOCK
 } rectype_t;
 
 /*
  * Enqueue an IGMPv3 filter list change to the given output queue.
  *
  * Source list filter state is held in an RB-tree. When the filter list
  * for a group is changed without changing its mode, we need to compute
  * the deltas between T0 and T1 for each source in the filter set,
  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
  *
  * As we may potentially queue two record types, and the entire R-B tree
  * needs to be walked at once, we break this out into its own function
  * so we can generate a tightly packed queue of packets.
  *
  * XXX This could be written to only use one tree walk, although that makes
  * serializing into the mbuf chains a bit harder. For now we do two walks
  * which makes things easier on us, and it may or may not be harder on
  * the L2 cache.
  *
  * If successful the size of all data appended to the queue is returned,
  * otherwise an error code less than zero is returned, or zero if
  * no record(s) were appended.
  */
 static int
 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
 {
 	static const int MINRECLEN =
 	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
 	struct ifnet		*ifp;
 	struct igmp_grouprec	 ig;
 	struct igmp_grouprec	*pig;
 	struct ip_msource	*ims, *nims;
 	struct mbuf		*m, *m0, *md;
 	in_addr_t		 naddr;
 	int			 m0srcs, nbytes, npbytes, off, rsrcs, schanged;
 	int			 nallow, nblock;
 	uint8_t			 mode, now, then;
 	rectype_t		 crt, drt, nrt;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	if (inm->inm_nsrc == 0 ||
 	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0))
 		return (0);
 
 	ifp = inm->inm_ifp;			/* interface */
 	mode = inm->inm_st[1].iss_fmode;	/* filter mode at t1 */
 	crt = REC_NONE;	/* current group record type */
 	drt = REC_NONE;	/* mask of completed group record types */
 	nrt = REC_NONE;	/* record type for current node */
 	m0srcs = 0;	/* # source which will fit in current mbuf chain */
 	nbytes = 0;	/* # of bytes appended to group's state-change queue */
 	npbytes = 0;	/* # of bytes appended this packet */
 	rsrcs = 0;	/* # sources encoded in current record */
 	schanged = 0;	/* # nodes encoded in overall filter change */
 	nallow = 0;	/* # of source entries in ALLOW_NEW */
 	nblock = 0;	/* # of source entries in BLOCK_OLD */
 	nims = NULL;	/* next tree node pointer */
 
 	/*
 	 * For each possible filter record mode.
 	 * The first kind of source we encounter tells us which
 	 * is the first kind of record we start appending.
 	 * If a node transitioned to UNDEFINED at t1, its mode is treated
 	 * as the inverse of the group's filter mode.
 	 */
 	while (drt != REC_FULL) {
 		do {
 			m0 = ifq->ifq_tail;
 			if (m0 != NULL &&
 			    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <=
 			     IGMP_V3_REPORT_MAXRECS) &&
 			    (m0->m_pkthdr.len + MINRECLEN) <
 			     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
 				m = m0;
 				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
 					    sizeof(struct igmp_grouprec)) /
 				    sizeof(in_addr_t);
 				CTR1(KTR_IGMPV3,
 				    "%s: use previous packet", __func__);
 			} else {
 				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 				if (m)
 					m->m_data += IGMP_LEADINGSPACE;
 				if (m == NULL) {
 					m = m_gethdr(M_DONTWAIT, MT_DATA);
 					if (m)
 						MH_ALIGN(m, IGMP_LEADINGSPACE);
 				}
 				if (m == NULL) {
 					CTR1(KTR_IGMPV3,
 					    "%s: m_get*() failed", __func__);
 					return (-ENOMEM);
 				}
 				m->m_pkthdr.PH_vt.vt_nrecs = 0;
 				igmp_save_context(m, ifp);
 				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 				    sizeof(struct igmp_grouprec)) /
 				    sizeof(in_addr_t);
 				npbytes = 0;
 				CTR1(KTR_IGMPV3,
 				    "%s: allocated new packet", __func__);
 			}
 			/*
 			 * Append the IGMP group record header to the
 			 * current packet's data area.
 			 * Recalculate pointer to free space for next
 			 * group record, in case m_append() allocated
 			 * a new mbuf or cluster.
 			 */
 			memset(&ig, 0, sizeof(ig));
 			ig.ig_group = inm->inm_addr;
 			if (!m_append(m, sizeof(ig), (void *)&ig)) {
 				if (m != m0)
 					m_freem(m);
 				CTR1(KTR_IGMPV3,
 				    "%s: m_append() failed", __func__);
 				return (-ENOMEM);
 			}
 			npbytes += sizeof(struct igmp_grouprec);
 			if (m != m0) {
 				/* new packet; offset in c hain */
 				md = m_getptr(m, npbytes -
 				    sizeof(struct igmp_grouprec), &off);
 				pig = (struct igmp_grouprec *)(mtod(md,
 				    uint8_t *) + off);
 			} else {
 				/* current packet; offset from last append */
 				md = m_last(m);
 				pig = (struct igmp_grouprec *)(mtod(md,
 				    uint8_t *) + md->m_len -
 				    sizeof(struct igmp_grouprec));
 			}
 			/*
 			 * Begin walking the tree for this record type
 			 * pass, or continue from where we left off
 			 * previously if we had to allocate a new packet.
 			 * Only report deltas in-mode at t1.
 			 * We need not report included sources as allowed
 			 * if we are in inclusive mode on the group,
 			 * however the converse is not true.
 			 */
 			rsrcs = 0;
 			if (nims == NULL)
 				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
 			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
 				CTR2(KTR_IGMPV3, "%s: visit node %s",
 				    __func__, inet_ntoa_haddr(ims->ims_haddr));
 				now = ims_get_mode(inm, ims, 1);
 				then = ims_get_mode(inm, ims, 0);
 				CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d",
 				    __func__, then, now);
 				if (now == then) {
 					CTR1(KTR_IGMPV3,
 					    "%s: skip unchanged", __func__);
 					continue;
 				}
 				if (mode == MCAST_EXCLUDE &&
 				    now == MCAST_INCLUDE) {
 					CTR1(KTR_IGMPV3,
 					    "%s: skip IN src on EX group",
 					    __func__);
 					continue;
 				}
 				nrt = (rectype_t)now;
 				if (nrt == REC_NONE)
 					nrt = (rectype_t)(~mode & REC_FULL);
 				if (schanged++ == 0) {
 					crt = nrt;
 				} else if (crt != nrt)
 					continue;
 				naddr = htonl(ims->ims_haddr);
 				if (!m_append(m, sizeof(in_addr_t),
 				    (void *)&naddr)) {
 					if (m != m0)
 						m_freem(m);
 					CTR1(KTR_IGMPV3,
 					    "%s: m_append() failed", __func__);
 					return (-ENOMEM);
 				}
 				nallow += !!(crt == REC_ALLOW);
 				nblock += !!(crt == REC_BLOCK);
 				if (++rsrcs == m0srcs)
 					break;
 			}
 			/*
 			 * If we did not append any tree nodes on this
 			 * pass, back out of allocations.
 			 */
 			if (rsrcs == 0) {
 				npbytes -= sizeof(struct igmp_grouprec);
 				if (m != m0) {
 					CTR1(KTR_IGMPV3,
 					    "%s: m_free(m)", __func__);
 					m_freem(m);
 				} else {
 					CTR1(KTR_IGMPV3,
 					    "%s: m_adj(m, -ig)", __func__);
 					m_adj(m, -((int)sizeof(
 					    struct igmp_grouprec)));
 				}
 				continue;
 			}
 			npbytes += (rsrcs * sizeof(in_addr_t));
 			if (crt == REC_ALLOW)
 				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
 			else if (crt == REC_BLOCK)
 				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
 			pig->ig_numsrc = htons(rsrcs);
 			/*
 			 * Count the new group record, and enqueue this
 			 * packet if it wasn't already queued.
 			 */
 			m->m_pkthdr.PH_vt.vt_nrecs++;
 			if (m != m0)
 				_IF_ENQUEUE(ifq, m);
 			nbytes += npbytes;
 		} while (nims != NULL);
 		drt |= crt;
 		crt = (~crt & REC_FULL);
 	}
 
 	CTR3(KTR_IGMPV3, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__,
 	    nallow, nblock);
 
 	return (nbytes);
 }
 
 static int
 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
 {
 	struct ifqueue	*gq;
 	struct mbuf	*m;		/* pending state-change */
 	struct mbuf	*m0;		/* copy of pending state-change */
 	struct mbuf	*mt;		/* last state-change in packet */
 	int		 docopy, domerge;
 	u_int		 recslen;
 
 	docopy = 0;
 	domerge = 0;
 	recslen = 0;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	/*
 	 * If there are further pending retransmissions, make a writable
 	 * copy of each queued state-change message before merging.
 	 */
 	if (inm->inm_scrv > 0)
 		docopy = 1;
 
 	gq = &inm->inm_scq;
 #ifdef KTR
 	if (gq->ifq_head == NULL) {
 		CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty",
 		    __func__, inm);
 	}
 #endif
 
 	m = gq->ifq_head;
 	while (m != NULL) {
 		/*
 		 * Only merge the report into the current packet if
 		 * there is sufficient space to do so; an IGMPv3 report
 		 * packet may only contain 65,535 group records.
 		 * Always use a simple mbuf chain concatentation to do this,
 		 * as large state changes for single groups may have
 		 * allocated clusters.
 		 */
 		domerge = 0;
 		mt = ifscq->ifq_tail;
 		if (mt != NULL) {
 			recslen = m_length(m, NULL);
 
 			if ((mt->m_pkthdr.PH_vt.vt_nrecs +
 			    m->m_pkthdr.PH_vt.vt_nrecs <=
 			    IGMP_V3_REPORT_MAXRECS) &&
 			    (mt->m_pkthdr.len + recslen <=
 			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE)))
 				domerge = 1;
 		}
 
 		if (!domerge && _IF_QFULL(gq)) {
 			CTR2(KTR_IGMPV3,
 			    "%s: outbound queue full, skipping whole packet %p",
 			    __func__, m);
 			mt = m->m_nextpkt;
 			if (!docopy)
 				m_freem(m);
 			m = mt;
 			continue;
 		}
 
 		if (!docopy) {
 			CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m);
 			_IF_DEQUEUE(gq, m0);
 			m = m0->m_nextpkt;
 		} else {
 			CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m);
 			m0 = m_dup(m, M_NOWAIT);
 			if (m0 == NULL)
 				return (ENOMEM);
 			m0->m_nextpkt = NULL;
 			m = m->m_nextpkt;
 		}
 
 		if (!domerge) {
 			CTR3(KTR_IGMPV3, "%s: queueing %p to ifscq %p)",
 			    __func__, m0, ifscq);
 			_IF_ENQUEUE(ifscq, m0);
 		} else {
 			struct mbuf *mtl;	/* last mbuf of packet mt */
 
 			CTR3(KTR_IGMPV3, "%s: merging %p with ifscq tail %p)",
 			    __func__, m0, mt);
 
 			mtl = m_last(mt);
 			m0->m_flags &= ~M_PKTHDR;
 			mt->m_pkthdr.len += recslen;
 			mt->m_pkthdr.PH_vt.vt_nrecs +=
 			    m0->m_pkthdr.PH_vt.vt_nrecs;
 
 			mtl->m_next = m0;
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Respond to a pending IGMPv3 General Query.
  */
 static void
 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
 {
 	struct ifmultiaddr	*ifma;
 	struct ifnet		*ifp;
 	struct in_multi		*inm;
 	int			 retval, loop;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	KASSERT(igi->igi_version == IGMP_VERSION_3,
 	    ("%s: called when version %d", __func__, igi->igi_version));
 
 	ifp = igi->igi_ifp;
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		KASSERT(ifp == inm->inm_ifp,
 		    ("%s: inconsistent ifp", __func__));
 
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 			break;
 		case IGMP_REPORTING_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
 			    inm, 0, 0, 0);
 			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 			    __func__, retval);
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
 	igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
 
 	/*
 	 * Slew transmission of bursts over 500ms intervals.
 	 */
 	if (igi->igi_gq.ifq_head != NULL) {
 		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
 		    IGMP_RESPONSE_BURST_INTERVAL);
 		V_interface_timers_running = 1;
 	}
 }
 
 /*
  * Transmit the next pending IGMP message in the output queue.
  *
  * We get called from netisr_processqueue(). A mutex private to igmpoq
  * will be acquired and released around this routine.
  *
  * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis.
  * MRT: Nothing needs to be done, as IGMP traffic is always local to
  * a link and uses a link-scope multicast address.
  */
 static void
 igmp_intr(struct mbuf *m)
 {
 	struct ip_moptions	 imo;
 	struct ifnet		*ifp;
 	struct mbuf		*ipopts, *m0;
 	int			 error;
 	uint32_t		 ifindex;
 
 	CTR2(KTR_IGMPV3, "%s: transmit %p", __func__, m);
 
 	/*
 	 * Set VNET image pointer from enqueued mbuf chain
 	 * before doing anything else. Whilst we use interface
 	 * indexes to guard against interface detach, they are
 	 * unique to each VIMAGE and must be retrieved.
 	 */
 	CURVNET_SET((struct vnet *)(m->m_pkthdr.header));
 	ifindex = igmp_restore_context(m);
 
 	/*
 	 * Check if the ifnet still exists. This limits the scope of
 	 * any race in the absence of a global ifp lock for low cost
 	 * (an array lookup).
 	 */
 	ifp = ifnet_byindex(ifindex);
 	if (ifp == NULL) {
 		CTR3(KTR_IGMPV3, "%s: dropped %p as ifindex %u went away.",
 		    __func__, m, ifindex);
 		m_freem(m);
 		IPSTAT_INC(ips_noroute);
 		goto out;
 	}
 
 	ipopts = V_igmp_sendra ? m_raopt : NULL;
 
 	imo.imo_multicast_ttl  = 1;
 	imo.imo_multicast_vif  = -1;
 	imo.imo_multicast_loop = (V_ip_mrouter != NULL);
 
 	/*
 	 * If the user requested that IGMP traffic be explicitly
 	 * redirected to the loopback interface (e.g. they are running a
 	 * MANET interface and the routing protocol needs to see the
 	 * updates), handle this now.
 	 */
 	if (m->m_flags & M_IGMP_LOOP)
 		imo.imo_multicast_ifp = V_loif;
 	else
 		imo.imo_multicast_ifp = ifp;
 
 	if (m->m_flags & M_IGMPV2) {
 		m0 = m;
 	} else {
 		m0 = igmp_v3_encap_report(ifp, m);
 		if (m0 == NULL) {
 			CTR2(KTR_IGMPV3, "%s: dropped %p", __func__, m);
 			m_freem(m);
 			IPSTAT_INC(ips_odropped);
 			goto out;
 		}
 	}
 
 	igmp_scrub_context(m0);
 	m->m_flags &= ~(M_PROTOFLAGS);
 	m0->m_pkthdr.rcvif = V_loif;
 #ifdef MAC
 	mac_netinet_igmp_send(ifp, m0);
 #endif
 	error = ip_output(m0, ipopts, NULL, 0, &imo, NULL);
 	if (error) {
 		CTR3(KTR_IGMPV3, "%s: ip_output(%p) = %d", __func__, m0, error);
 		goto out;
 	}
 
 	IGMPSTAT_INC(igps_snd_reports);
 
 out:
 	/*
 	 * We must restore the existing vnet pointer before
 	 * continuing as we are run from netisr context.
 	 */
 	CURVNET_RESTORE();
 }
 
 /*
  * Encapsulate an IGMPv3 report.
  *
  * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
  * chain has already had its IP/IGMPv3 header prepended. In this case
  * the function will not attempt to prepend; the lengths and checksums
  * will however be re-computed.
  *
  * Returns a pointer to the new mbuf chain head, or NULL if the
  * allocation failed.
  */
 static struct mbuf *
 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
 {
 	struct igmp_report	*igmp;
 	struct ip		*ip;
 	int			 hdrlen, igmpreclen;
 
 	KASSERT((m->m_flags & M_PKTHDR),
 	    ("%s: mbuf chain %p is !M_PKTHDR", __func__, m));
 
 	igmpreclen = m_length(m, NULL);
 	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
 
 	if (m->m_flags & M_IGMPV3_HDR) {
 		igmpreclen -= hdrlen;
 	} else {
 		M_PREPEND(m, hdrlen, M_DONTWAIT);
 		if (m == NULL)
 			return (NULL);
 		m->m_flags |= M_IGMPV3_HDR;
 	}
 
 	CTR2(KTR_IGMPV3, "%s: igmpreclen is %d", __func__, igmpreclen);
 
 	m->m_data += sizeof(struct ip);
 	m->m_len -= sizeof(struct ip);
 
 	igmp = mtod(m, struct igmp_report *);
 	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
 	igmp->ir_rsv1 = 0;
 	igmp->ir_rsv2 = 0;
 	igmp->ir_numgrps = htons(m->m_pkthdr.PH_vt.vt_nrecs);
 	igmp->ir_cksum = 0;
 	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
 	m->m_pkthdr.PH_vt.vt_nrecs = 0;
 
 	m->m_data -= sizeof(struct ip);
 	m->m_len += sizeof(struct ip);
 
 	ip = mtod(m, struct ip *);
 	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
 	ip->ip_len = hdrlen + igmpreclen;
 	ip->ip_off = IP_DF;
 	ip->ip_p = IPPROTO_IGMP;
 	ip->ip_sum = 0;
 
 	ip->ip_src.s_addr = INADDR_ANY;
 
 	if (m->m_flags & M_IGMP_LOOP) {
 		struct in_ifaddr *ia;
 
 		IFP_TO_IA(ifp, ia);
 		if (ia != NULL) {
 			ip->ip_src = ia->ia_addr.sin_addr;
 			ifa_free(&ia->ia_ifa);
 		}
 	}
 
 	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
 
 	return (m);
 }
 
 #ifdef KTR
 static char *
 igmp_rec_type_to_str(const int type)
 {
 
 	switch (type) {
 		case IGMP_CHANGE_TO_EXCLUDE_MODE:
 			return "TO_EX";
 			break;
 		case IGMP_CHANGE_TO_INCLUDE_MODE:
 			return "TO_IN";
 			break;
 		case IGMP_MODE_IS_EXCLUDE:
 			return "MODE_EX";
 			break;
 		case IGMP_MODE_IS_INCLUDE:
 			return "MODE_IN";
 			break;
 		case IGMP_ALLOW_NEW_SOURCES:
 			return "ALLOW_NEW";
 			break;
 		case IGMP_BLOCK_OLD_SOURCES:
 			return "BLOCK_OLD";
 			break;
 		default:
 			break;
 	}
 	return "unknown";
 }
 #endif
 
 static void
 igmp_init(void *unused __unused)
 {
 
 	CTR1(KTR_IGMPV3, "%s: initializing", __func__);
 
 	IGMP_LOCK_INIT();
 
 	m_raopt = igmp_ra_alloc();
 
 	netisr_register(&igmp_nh);
 }
 SYSINIT(igmp_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_init, NULL);
 
 static void
 igmp_uninit(void *unused __unused)
 {
 
 	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
 
 	netisr_unregister(&igmp_nh);
 
 	m_free(m_raopt);
 	m_raopt = NULL;
 
 	IGMP_LOCK_DESTROY();
 }
 SYSUNINIT(igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_uninit, NULL);
 
 static void
 vnet_igmp_init(const void *unused __unused)
 {
 
 	CTR1(KTR_IGMPV3, "%s: initializing", __func__);
 
 	LIST_INIT(&V_igi_head);
 }
 VNET_SYSINIT(vnet_igmp_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_igmp_init,
     NULL);
 
 static void
 vnet_igmp_uninit(const void *unused __unused)
 {
 
 	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
 
 	KASSERT(LIST_EMPTY(&V_igi_head),
 	    ("%s: igi list not empty; ifnets not detached?", __func__));
 }
 VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_igmp_uninit, NULL);
 
 static int
 igmp_modevent(module_t mod, int type, void *unused __unused)
 {
 
     switch (type) {
     case MOD_LOAD:
     case MOD_UNLOAD:
 	break;
     default:
 	return (EOPNOTSUPP);
     }
     return (0);
 }
 
 static moduledata_t igmp_mod = {
     "igmp",
     igmp_modevent,
     0
 };
 DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
Index: stable/8/sys/netinet6/nd6_rtr.c
===================================================================
--- stable/8/sys/netinet6/nd6_rtr.c	(revision 281230)
+++ stable/8/sys/netinet6/nd6_rtr.c	(revision 281231)
@@ -1,2200 +1,2208 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/errno.h>
 #include <sys/rwlock.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/radix.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <net/if_llatbl.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/scope6_var.h>
 
 static int rtpref(struct nd_defrouter *);
 static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
 static int prelist_update __P((struct nd_prefixctl *, struct nd_defrouter *,
     struct mbuf *, int));
 static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *,	int);
 static struct nd_pfxrouter *pfxrtr_lookup __P((struct nd_prefix *,
 	struct nd_defrouter *));
 static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
 static void pfxrtr_del(struct nd_pfxrouter *);
 static struct nd_pfxrouter *find_pfxlist_reachable_router
 (struct nd_prefix *);
 static void defrouter_delreq(struct nd_defrouter *);
 static void nd6_rtmsg(int, struct rtentry *);
 
 static int in6_init_prefix_ltimes(struct nd_prefix *);
 static void in6_init_address_ltimes __P((struct nd_prefix *,
 	struct in6_addrlifetime *));
 
 static int nd6_prefix_onlink(struct nd_prefix *);
 static int nd6_prefix_offlink(struct nd_prefix *);
 
 static int rt6_deleteroute(struct radix_node *, void *);
 
 VNET_DECLARE(int, nd6_recalc_reachtm_interval);
 #define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
 
 static VNET_DEFINE(struct ifnet *, nd6_defifp);
 VNET_DEFINE(int, nd6_defifindex);
 #define	V_nd6_defifp			VNET(nd6_defifp)
 
 VNET_DEFINE(int, ip6_use_tempaddr) = 0;
 
 VNET_DEFINE(int, ip6_desync_factor);
 VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME;
 VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME;
 
 VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE;
 
 /* RTPREF_MEDIUM has to be 0! */
 #define RTPREF_HIGH	1
 #define RTPREF_MEDIUM	0
 #define RTPREF_LOW	(-1)
 #define RTPREF_RESERVED	(-2)
 #define RTPREF_INVALID	(-3)	/* internal */
 
 /*
  * Receive Router Solicitation Message - just for routers.
  * Router solicitation/advertisement is mostly managed by userland program
  * (rtadvd) so here we have no function like nd6_ra_output().
  *
  * Based on RFC 2461
  */
 void
 nd6_rs_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_router_solicit *nd_rs;
 	struct in6_addr saddr6 = ip6->ip6_src;
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 	union nd_opts ndopts;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	/* If I'm not a router, ignore it. */
 	if (V_ip6_accept_rtadv != 0 || V_ip6_forwarding != 1)
 		goto freeit;
 
 	/* Sanity checks */
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
 		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
 	/*
 	 * Don't update the neighbor cache, if src = ::.
 	 * This indicates that the src has no IP address assigned yet.
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
 		goto freeit;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len);
 	if (nd_rs == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return;
 	}
 #endif
 
 	icmp6len -= sizeof(*nd_rs);
 	nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO,
 		    "nd6_rs_input: invalid ND option, ignored\n"));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	if (ndopts.nd_opts_src_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
 		    "nd6_rs_input: lladdrlen mismatch for %s "
 		    "(if %d, RS packet %d)\n",
 		    ip6_sprintf(ip6bufs, &saddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
 
 	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0);
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	ICMP6STAT_INC(icp6s_badrs);
 	m_freem(m);
 }
 
 /*
  * Receive Router Advertisement Message.
  *
  * Based on RFC 2461
  * TODO: on-link bit on prefix information
  * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
  */
 void
 nd6_ra_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_router_advert *nd_ra;
 	struct in6_addr saddr6 = ip6->ip6_src;
 	int mcast = 0;
 	union nd_opts ndopts;
 	struct nd_defrouter *dr;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	/*
 	 * We only accept RAs only when
 	 * the system-wide variable allows the acceptance, and
 	 * per-interface variable allows RAs on the receiving interface.
 	 */
 	if (V_ip6_accept_rtadv == 0)
 		goto freeit;
 	if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
 		goto freeit;
 
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
 		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
 	if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
 		nd6log((LOG_ERR,
 		    "nd6_ra_input: src %s is not link-local\n",
 		    ip6_sprintf(ip6bufs, &saddr6)));
 		goto bad;
 	}
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len);
 	if (nd_ra == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return;
 	}
 #endif
 
 	icmp6len -= sizeof(*nd_ra);
 	nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO,
 		    "nd6_ra_input: invalid ND option, ignored\n"));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
     {
 	struct nd_defrouter dr0;
 	u_int32_t advreachable = nd_ra->nd_ra_reachable;
 
 	/* remember if this is a multicasted advertisement */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
 		mcast = 1;
 
 	bzero(&dr0, sizeof(dr0));
 	dr0.rtaddr = saddr6;
 	dr0.flags  = nd_ra->nd_ra_flags_reserved;
 	dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
 	dr0.expire = time_second + dr0.rtlifetime;
 	dr0.ifp = ifp;
 	/* unspecified or not? (RFC 2461 6.3.4) */
 	if (advreachable) {
 		advreachable = ntohl(advreachable);
 		if (advreachable <= MAX_REACHABLE_TIME &&
 		    ndi->basereachable != advreachable) {
 			ndi->basereachable = advreachable;
 			ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
 			ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */
 		}
 	}
 	if (nd_ra->nd_ra_retransmit)
 		ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
-	if (nd_ra->nd_ra_curhoplimit)
-		ndi->chlim = nd_ra->nd_ra_curhoplimit;
+	if (nd_ra->nd_ra_curhoplimit) {
+		if (ndi->chlim < nd_ra->nd_ra_curhoplimit)
+			ndi->chlim = nd_ra->nd_ra_curhoplimit;
+		else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) {
+			log(LOG_ERR, "RA with a lower CurHopLimit sent from "
+			    "%s on %s (current = %d, received = %d). "
+			    "Ignored.\n", ip6_sprintf(ip6bufs, &ip6->ip6_src),
+			    if_name(ifp), ndi->chlim, nd_ra->nd_ra_curhoplimit);
+		}
+	}
 	dr = defrtrlist_update(&dr0);
     }
 
 	/*
 	 * prefix
 	 */
 	if (ndopts.nd_opts_pi) {
 		struct nd_opt_hdr *pt;
 		struct nd_opt_prefix_info *pi = NULL;
 		struct nd_prefixctl pr;
 
 		for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi;
 		     pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end;
 		     pt = (struct nd_opt_hdr *)((caddr_t)pt +
 						(pt->nd_opt_len << 3))) {
 			if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION)
 				continue;
 			pi = (struct nd_opt_prefix_info *)pt;
 
 			if (pi->nd_opt_pi_len != 4) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid option "
 				    "len %d for prefix information option, "
 				    "ignored\n", pi->nd_opt_pi_len));
 				continue;
 			}
 
 			if (128 < pi->nd_opt_pi_prefix_len) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid prefix "
 				    "len %d for prefix information option, "
 				    "ignored\n", pi->nd_opt_pi_prefix_len));
 				continue;
 			}
 
 			if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
 			 || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid prefix "
 				    "%s, ignored\n",
 				    ip6_sprintf(ip6bufs,
 					&pi->nd_opt_pi_prefix)));
 				continue;
 			}
 
 			bzero(&pr, sizeof(pr));
 			pr.ndpr_prefix.sin6_family = AF_INET6;
 			pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
 			pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
 			pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif;
 
 			pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved &
 			    ND_OPT_PI_FLAG_ONLINK) ? 1 : 0;
 			pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved &
 			    ND_OPT_PI_FLAG_AUTO) ? 1 : 0;
 			pr.ndpr_plen = pi->nd_opt_pi_prefix_len;
 			pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time);
 			pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time);
 			(void)prelist_update(&pr, dr, m, mcast);
 		}
 	}
 
 	/*
 	 * MTU
 	 */
 	if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
 		u_long mtu;
 		u_long maxmtu;
 
 		mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
 
 		/* lower bound */
 		if (mtu < IPV6_MMTU) {
 			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
 			    "mtu=%lu sent from %s, ignoring\n",
 			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
 			goto skip;
 		}
 
 		/* upper bound */
 		maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu)
 		    ? ndi->maxmtu : ifp->if_mtu;
 		if (mtu <= maxmtu) {
 			int change = (ndi->linkmtu != mtu);
 
 			ndi->linkmtu = mtu;
 			if (change) /* in6_maxmtu may change */
 				in6_setmaxmtu();
 		} else {
 			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
 			    "mtu=%lu sent from %s; "
 			    "exceeds maxmtu %lu, ignoring\n",
 			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
 		}
 	}
 
  skip:
 
 	/*
 	 * Source link layer address
 	 */
     {
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 
 	if (ndopts.nd_opts_src_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
 		    "nd6_ra_input: lladdrlen mismatch for %s "
 		    "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
 
 	nd6_cache_lladdr(ifp, &saddr6, lladdr,
 	    lladdrlen, ND_ROUTER_ADVERT, 0);
 
 	/*
 	 * Installing a link-layer address might change the state of the
 	 * router's neighbor cache, which might also affect our on-link
 	 * detection of adveritsed prefixes.
 	 */
 	pfxlist_onlink_check();
     }
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	ICMP6STAT_INC(icp6s_badra);
 	m_freem(m);
 }
 
 /*
  * default router list proccessing sub routines
  */
 
 /* tell the change to user processes watching the routing socket. */
 static void
 nd6_rtmsg(int cmd, struct rtentry *rt)
 {
 	struct rt_addrinfo info;
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = rt_key(rt);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 	ifp = rt->rt_ifp;
 	if (ifp != NULL) {
 		IF_ADDR_RLOCK(ifp);
 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
 		ifa_ref(ifa);
 		IF_ADDR_RUNLOCK(ifp);
 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 	} else
 		ifa = NULL;
 
 	rt_missmsg_fib(cmd, &info, rt->rt_flags, 0, rt->rt_fibnum);
 	if (ifa != NULL)
 		ifa_free(ifa);
 }
 
 static void
 defrouter_addreq(struct nd_defrouter *new)
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *newrt = NULL;
 	int s;
 	int error;
 
 	bzero(&def, sizeof(def));
 	bzero(&mask, sizeof(mask));
 	bzero(&gate, sizeof(gate));
 
 	def.sin6_len = mask.sin6_len = gate.sin6_len =
 	    sizeof(struct sockaddr_in6);
 	def.sin6_family = gate.sin6_family = AF_INET6;
 	gate.sin6_addr = new->rtaddr;
 
 	s = splnet();
 	error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def,
 	    (struct sockaddr *)&gate, (struct sockaddr *)&mask,
 	    RTF_GATEWAY, &newrt, RT_DEFAULT_FIB);
 	if (newrt) {
 		nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
 		RTFREE(newrt);
 	}
 	if (error == 0)
 		new->installed = 1;
 	splx(s);
 	return;
 }
 
 struct nd_defrouter *
 defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
 {
 	struct nd_defrouter *dr;
 
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
 			return (dr);
 	}
 
 	return (NULL);		/* search failed */
 }
 
 /*
  * Remove the default route for a given router.
  * This is just a subroutine function for defrouter_select(), and should
  * not be called from anywhere else.
  */
 static void
 defrouter_delreq(struct nd_defrouter *dr)
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *oldrt = NULL;
 
 	bzero(&def, sizeof(def));
 	bzero(&mask, sizeof(mask));
 	bzero(&gate, sizeof(gate));
 
 	def.sin6_len = mask.sin6_len = gate.sin6_len =
 	    sizeof(struct sockaddr_in6);
 	def.sin6_family = gate.sin6_family = AF_INET6;
 	gate.sin6_addr = dr->rtaddr;
 
 	in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def,
 	    (struct sockaddr *)&gate,
 	    (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, RT_DEFAULT_FIB);
 	if (oldrt) {
 		nd6_rtmsg(RTM_DELETE, oldrt);
 		RTFREE(oldrt);
 	}
 
 	dr->installed = 0;
 }
 
 /*
  * remove all default routes from default router list
  */
 void
 defrouter_reset(void)
 {
 	struct nd_defrouter *dr;
 
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
 		defrouter_delreq(dr);
 
 	/*
 	 * XXX should we also nuke any default routers in the kernel, by
 	 * going through them by rtalloc1()?
 	 */
 }
 
 void
 defrtrlist_del(struct nd_defrouter *dr)
 {
 	struct nd_defrouter *deldr = NULL;
 	struct nd_prefix *pr;
 
 	/*
 	 * Flush all the routing table entries that use the router
 	 * as a next hop.
 	 */
 	if (!V_ip6_forwarding && V_ip6_accept_rtadv) /* XXX: better condition? */
 		rt6_flush(&dr->rtaddr, dr->ifp);
 
 	if (dr->installed) {
 		deldr = dr;
 		defrouter_delreq(dr);
 	}
 	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
 
 	/*
 	 * Also delete all the pointers to the router in each prefix lists.
 	 */
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		struct nd_pfxrouter *pfxrtr;
 		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
 			pfxrtr_del(pfxrtr);
 	}
 	pfxlist_onlink_check();
 
 	/*
 	 * If the router is the primary one, choose a new one.
 	 * Note that defrouter_select() will remove the current gateway
 	 * from the routing table.
 	 */
 	if (deldr)
 		defrouter_select();
 
 	free(dr, M_IP6NDP);
 }
 
 /*
  * Default Router Selection according to Section 6.3.6 of RFC 2461 and
  * draft-ietf-ipngwg-router-selection:
  * 1) Routers that are reachable or probably reachable should be preferred.
  *    If we have more than one (probably) reachable router, prefer ones
  *    with the highest router preference.
  * 2) When no routers on the list are known to be reachable or
  *    probably reachable, routers SHOULD be selected in a round-robin
  *    fashion, regardless of router preference values.
  * 3) If the Default Router List is empty, assume that all
  *    destinations are on-link.
  *
  * We assume nd_defrouter is sorted by router preference value.
  * Since the code below covers both with and without router preference cases,
  * we do not need to classify the cases by ifdef.
  *
  * At this moment, we do not try to install more than one default router,
  * even when the multipath routing is available, because we're not sure about
  * the benefits for stub hosts comparing to the risk of making the code
  * complicated and the possibility of introducing bugs.
  */
 void
 defrouter_select(void)
 {
 	int s = splnet();
 	struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
 	struct llentry *ln = NULL;
 
 	/*
 	 * This function should be called only when acting as an autoconfigured
 	 * host.  Although the remaining part of this function is not effective
 	 * if the node is not an autoconfigured host, we explicitly exclude
 	 * such cases here for safety.
 	 */
 	if (V_ip6_forwarding || !V_ip6_accept_rtadv) {
 		nd6log((LOG_WARNING,
 		    "defrouter_select: called unexpectedly (forwarding=%d, "
 		    "accept_rtadv=%d)\n", V_ip6_forwarding, V_ip6_accept_rtadv));
 		splx(s);
 		return;
 	}
 
 	/*
 	 * Let's handle easy case (3) first:
 	 * If default router list is empty, there's nothing to be done.
 	 */
 	if (TAILQ_EMPTY(&V_nd_defrouter)) {
 		splx(s);
 		return;
 	}
 
 	/*
 	 * Search for a (probably) reachable router from the list.
 	 * We just pick up the first reachable one (if any), assuming that
 	 * the ordering rule of the list described in defrtrlist_update().
 	 */
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		IF_AFDATA_RLOCK(dr->ifp);
 		if (selected_dr == NULL &&
 		    (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
 		    ND6_IS_LLINFO_PROBREACH(ln)) {
 			selected_dr = dr;
 		}
 		IF_AFDATA_RUNLOCK(dr->ifp);
 		if (ln != NULL) {
 			LLE_RUNLOCK(ln);
 			ln = NULL;
 		}
 
 		if (dr->installed && installed_dr == NULL)
 			installed_dr = dr;
 		else if (dr->installed && installed_dr) {
 			/* this should not happen.  warn for diagnosis. */
 			log(LOG_ERR, "defrouter_select: more than one router"
 			    " is installed\n");
 		}
 	}
 	/*
 	 * If none of the default routers was found to be reachable,
 	 * round-robin the list regardless of preference.
 	 * Otherwise, if we have an installed router, check if the selected
 	 * (reachable) router should really be preferred to the installed one.
 	 * We only prefer the new router when the old one is not reachable
 	 * or when the new one has a really higher preference value.
 	 */
 	if (selected_dr == NULL) {
 		if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry))
 			selected_dr = TAILQ_FIRST(&V_nd_defrouter);
 		else
 			selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
 	} else if (installed_dr) {
 		IF_AFDATA_RLOCK(installed_dr->ifp);
 		if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
 		    ND6_IS_LLINFO_PROBREACH(ln) &&
 		    rtpref(selected_dr) <= rtpref(installed_dr)) {
 			selected_dr = installed_dr;
 		}
 		IF_AFDATA_RUNLOCK(installed_dr->ifp);
 		if (ln != NULL)
 			LLE_RUNLOCK(ln);
 	}
 
 	/*
 	 * If the selected router is different than the installed one,
 	 * remove the installed router and install the selected one.
 	 * Note that the selected router is never NULL here.
 	 */
 	if (installed_dr != selected_dr) {
 		if (installed_dr)
 			defrouter_delreq(installed_dr);
 		defrouter_addreq(selected_dr);
 	}
 
 	splx(s);
 	return;
 }
 
 /*
  * for default router selection
  * regards router-preference field as a 2-bit signed integer
  */
 static int
 rtpref(struct nd_defrouter *dr)
 {
 	switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
 	case ND_RA_FLAG_RTPREF_HIGH:
 		return (RTPREF_HIGH);
 	case ND_RA_FLAG_RTPREF_MEDIUM:
 	case ND_RA_FLAG_RTPREF_RSV:
 		return (RTPREF_MEDIUM);
 	case ND_RA_FLAG_RTPREF_LOW:
 		return (RTPREF_LOW);
 	default:
 		/*
 		 * This case should never happen.  If it did, it would mean a
 		 * serious bug of kernel internal.  We thus always bark here.
 		 * Or, can we even panic?
 		 */
 		log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
 		return (RTPREF_INVALID);
 	}
 	/* NOTREACHED */
 }
 
 static struct nd_defrouter *
 defrtrlist_update(struct nd_defrouter *new)
 {
 	struct nd_defrouter *dr, *n;
 	int s = splnet();
 
 	if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
 		/* entry exists */
 		if (new->rtlifetime == 0) {
 			defrtrlist_del(dr);
 			dr = NULL;
 		} else {
 			int oldpref = rtpref(dr);
 
 			/* override */
 			dr->flags = new->flags; /* xxx flag check */
 			dr->rtlifetime = new->rtlifetime;
 			dr->expire = new->expire;
 
 			/*
 			 * If the preference does not change, there's no need
 			 * to sort the entries. Also make sure the selected
 			 * router is still installed in the kernel.
 			 */
 			if (dr->installed && rtpref(new) == oldpref) {
 				splx(s);
 				return (dr);
 			}
 
 			/*
 			 * preferred router may be changed, so relocate
 			 * this router.
 			 * XXX: calling TAILQ_REMOVE directly is a bad manner.
 			 * However, since defrtrlist_del() has many side
 			 * effects, we intentionally do so here.
 			 * defrouter_select() below will handle routing
 			 * changes later.
 			 */
 			TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
 			n = dr;
 			goto insert;
 		}
 		splx(s);
 		return (dr);
 	}
 
 	/* entry does not exist */
 	if (new->rtlifetime == 0) {
 		splx(s);
 		return (NULL);
 	}
 
 	n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT);
 	if (n == NULL) {
 		splx(s);
 		return (NULL);
 	}
 	bzero(n, sizeof(*n));
 	*n = *new;
 
 insert:
 	/*
 	 * Insert the new router in the Default Router List;
 	 * The Default Router List should be in the descending order
 	 * of router-preferece.  Routers with the same preference are
 	 * sorted in the arriving time order.
 	 */
 
 	/* insert at the end of the group */
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		if (rtpref(n) > rtpref(dr))
 			break;
 	}
 	if (dr)
 		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
 	else
 		TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
 
 	defrouter_select();
 
 	splx(s);
 
 	return (n);
 }
 
 static struct nd_pfxrouter *
 pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *search;
 
 	LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) {
 		if (search->router == dr)
 			break;
 	}
 
 	return (search);
 }
 
 static void
 pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *new;
 
 	new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
 	if (new == NULL)
 		return;
 	bzero(new, sizeof(*new));
 	new->router = dr;
 
 	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
 
 	pfxlist_onlink_check();
 }
 
 static void
 pfxrtr_del(struct nd_pfxrouter *pfr)
 {
 	LIST_REMOVE(pfr, pfr_entry);
 	free(pfr, M_IP6NDP);
 }
 
 struct nd_prefix *
 nd6_prefix_lookup(struct nd_prefixctl *key)
 {
 	struct nd_prefix *search;
 
 	LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) {
 		if (key->ndpr_ifp == search->ndpr_ifp &&
 		    key->ndpr_plen == search->ndpr_plen &&
 		    in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
 		    &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
 			break;
 		}
 	}
 
 	return (search);
 }
 
 int
 nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
     struct nd_prefix **newp)
 {
 	struct nd_prefix *new = NULL;
 	int error = 0;
 	int i, s;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
 	if (new == NULL)
 		return(ENOMEM);
 	bzero(new, sizeof(*new));
 	new->ndpr_ifp = pr->ndpr_ifp;
 	new->ndpr_prefix = pr->ndpr_prefix;
 	new->ndpr_plen = pr->ndpr_plen;
 	new->ndpr_vltime = pr->ndpr_vltime;
 	new->ndpr_pltime = pr->ndpr_pltime;
 	new->ndpr_flags = pr->ndpr_flags;
 	if ((error = in6_init_prefix_ltimes(new)) != 0) {
 		free(new, M_IP6NDP);
 		return(error);
 	}
 	new->ndpr_lastupdate = time_second;
 	if (newp != NULL)
 		*newp = new;
 
 	/* initialization */
 	LIST_INIT(&new->ndpr_advrtrs);
 	in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
 	/* make prefix in the canonical form */
 	for (i = 0; i < 4; i++)
 		new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
 		    new->ndpr_mask.s6_addr32[i];
 
 	s = splnet();
 	/* link ndpr_entry to nd_prefix list */
 	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
 	splx(s);
 
 	/* ND_OPT_PI_FLAG_ONLINK processing */
 	if (new->ndpr_raf_onlink) {
 		int e;
 
 		if ((e = nd6_prefix_onlink(new)) != 0) {
 			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
 			    "the prefix %s/%d on-link on %s (errno=%d)\n",
 			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 			/* proceed anyway. XXX: is it correct? */
 		}
 	}
 
 	if (dr)
 		pfxrtr_add(new, dr);
 
 	return 0;
 }
 
 void
 prelist_remove(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfr, *next;
 	int e, s;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* make sure to invalidate the prefix until it is really freed. */
 	pr->ndpr_vltime = 0;
 	pr->ndpr_pltime = 0;
 
 	/*
 	 * Though these flags are now meaningless, we'd rather keep the value
 	 * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
 	 * when executing "ndp -p".
 	 */
 
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
 	    (e = nd6_prefix_offlink(pr)) != 0) {
 		nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
 		    "on %s, errno=%d\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 		/* what should we do? */
 	}
 
 	if (pr->ndpr_refcnt > 0)
 		return;		/* notice here? */
 
 	s = splnet();
 
 	/* unlink ndpr_entry from nd_prefix list */
 	LIST_REMOVE(pr, ndpr_entry);
 
 	/* free list of routers that adversed the prefix */
 	LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) {
 		free(pfr, M_IP6NDP);
 	}
 	splx(s);
 
 	free(pr, M_IP6NDP);
 
 	pfxlist_onlink_check();
 }
 
 /*
  * dr - may be NULL
  */
 
 static int
 prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
     struct mbuf *m, int mcast)
 {
 	struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
 	struct ifaddr *ifa;
 	struct ifnet *ifp = new->ndpr_ifp;
 	struct nd_prefix *pr;
 	int s = splnet();
 	int error = 0;
 	int newprefix = 0;
 	int auth;
 	struct in6_addrlifetime lt6_tmp;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	auth = 0;
 	if (m) {
 		/*
 		 * Authenticity for NA consists authentication for
 		 * both IP header and IP datagrams, doesn't it ?
 		 */
 #if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM)
 		auth = ((m->m_flags & M_AUTHIPHDR) &&
 		    (m->m_flags & M_AUTHIPDGM));
 #endif
 	}
 
 	if ((pr = nd6_prefix_lookup(new)) != NULL) {
 		/*
 		 * nd6_prefix_lookup() ensures that pr and new have the same
 		 * prefix on a same interface.
 		 */
 
 		/*
 		 * Update prefix information.  Note that the on-link (L) bit
 		 * and the autonomous (A) bit should NOT be changed from 1
 		 * to 0.
 		 */
 		if (new->ndpr_raf_onlink == 1)
 			pr->ndpr_raf_onlink = 1;
 		if (new->ndpr_raf_auto == 1)
 			pr->ndpr_raf_auto = 1;
 		if (new->ndpr_raf_onlink) {
 			pr->ndpr_vltime = new->ndpr_vltime;
 			pr->ndpr_pltime = new->ndpr_pltime;
 			(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
 			pr->ndpr_lastupdate = time_second;
 		}
 
 		if (new->ndpr_raf_onlink &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 			int e;
 
 			if ((e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "prelist_update: failed to make "
 				    "the prefix %s/%d on-link on %s "
 				    "(errno=%d)\n",
 				    ip6_sprintf(ip6buf,
 					    &pr->ndpr_prefix.sin6_addr),
 				    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 				/* proceed anyway. XXX: is it correct? */
 			}
 		}
 
 		if (dr && pfxrtr_lookup(pr, dr) == NULL)
 			pfxrtr_add(pr, dr);
 	} else {
 		struct nd_prefix *newpr = NULL;
 
 		newprefix = 1;
 
 		if (new->ndpr_vltime == 0)
 			goto end;
 		if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
 			goto end;
 
 		error = nd6_prelist_add(new, dr, &newpr);
 		if (error != 0 || newpr == NULL) {
 			nd6log((LOG_NOTICE, "prelist_update: "
 			    "nd6_prelist_add failed for %s/%d on %s "
 			    "errno=%d, returnpr=%p\n",
 			    ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
 			    new->ndpr_plen, if_name(new->ndpr_ifp),
 			    error, newpr));
 			goto end; /* we should just give up in this case. */
 		}
 
 		/*
 		 * XXX: from the ND point of view, we can ignore a prefix
 		 * with the on-link bit being zero.  However, we need a
 		 * prefix structure for references from autoconfigured
 		 * addresses.  Thus, we explicitly make sure that the prefix
 		 * itself expires now.
 		 */
 		if (newpr->ndpr_raf_onlink == 0) {
 			newpr->ndpr_vltime = 0;
 			newpr->ndpr_pltime = 0;
 			in6_init_prefix_ltimes(newpr);
 		}
 
 		pr = newpr;
 	}
 
 	/*
 	 * Address autoconfiguration based on Section 5.5.3 of RFC 2462.
 	 * Note that pr must be non NULL at this point.
 	 */
 
 	/* 5.5.3 (a). Ignore the prefix without the A bit set. */
 	if (!new->ndpr_raf_auto)
 		goto end;
 
 	/*
 	 * 5.5.3 (b). the link-local prefix should have been ignored in
 	 * nd6_ra_input.
 	 */
 
 	/* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */
 	if (new->ndpr_pltime > new->ndpr_vltime) {
 		error = EINVAL;	/* XXX: won't be used */
 		goto end;
 	}
 
 	/*
 	 * 5.5.3 (d).  If the prefix advertised is not equal to the prefix of
 	 * an address configured by stateless autoconfiguration already in the
 	 * list of addresses associated with the interface, and the Valid
 	 * Lifetime is not 0, form an address.  We first check if we have
 	 * a matching prefix.
 	 * Note: we apply a clarification in rfc2462bis-02 here.  We only
 	 * consider autoconfigured addresses while RFC2462 simply said
 	 * "address".
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in6_ifaddr *ifa6;
 		u_int32_t remaininglifetime;
 
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		ifa6 = (struct in6_ifaddr *)ifa;
 
 		/*
 		 * We only consider autoconfigured addresses as per rfc2462bis.
 		 */
 		if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF))
 			continue;
 
 		/*
 		 * Spec is not clear here, but I believe we should concentrate
 		 * on unicast (i.e. not anycast) addresses.
 		 * XXX: other ia6_flags? detached or duplicated?
 		 */
 		if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
 			continue;
 
 		/*
 		 * Ignore the address if it is not associated with a prefix
 		 * or is associated with a prefix that is different from this
 		 * one.  (pr is never NULL here)
 		 */
 		if (ifa6->ia6_ndpr != pr)
 			continue;
 
 		if (ia6_match == NULL) /* remember the first one */
 			ia6_match = ifa6;
 
 		/*
 		 * An already autoconfigured address matched.  Now that we
 		 * are sure there is at least one matched address, we can
 		 * proceed to 5.5.3. (e): update the lifetimes according to the
 		 * "two hours" rule and the privacy extension.
 		 * We apply some clarifications in rfc2462bis:
 		 * - use remaininglifetime instead of storedlifetime as a
 		 *   variable name
 		 * - remove the dead code in the "two-hour" rule
 		 */
 #define TWOHOUR		(120*60)
 		lt6_tmp = ifa6->ia6_lifetime;
 
 		if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
 			remaininglifetime = ND6_INFINITE_LIFETIME;
 		else if (time_second - ifa6->ia6_updatetime >
 			 lt6_tmp.ia6t_vltime) {
 			/*
 			 * The case of "invalid" address.  We should usually
 			 * not see this case.
 			 */
 			remaininglifetime = 0;
 		} else
 			remaininglifetime = lt6_tmp.ia6t_vltime -
 			    (time_second - ifa6->ia6_updatetime);
 
 		/* when not updating, keep the current stored lifetime. */
 		lt6_tmp.ia6t_vltime = remaininglifetime;
 
 		if (TWOHOUR < new->ndpr_vltime ||
 		    remaininglifetime < new->ndpr_vltime) {
 			lt6_tmp.ia6t_vltime = new->ndpr_vltime;
 		} else if (remaininglifetime <= TWOHOUR) {
 			if (auth) {
 				lt6_tmp.ia6t_vltime = new->ndpr_vltime;
 			}
 		} else {
 			/*
 			 * new->ndpr_vltime <= TWOHOUR &&
 			 * TWOHOUR < remaininglifetime
 			 */
 			lt6_tmp.ia6t_vltime = TWOHOUR;
 		}
 
 		/* The 2 hour rule is not imposed for preferred lifetime. */
 		lt6_tmp.ia6t_pltime = new->ndpr_pltime;
 
 		in6_init_address_ltimes(pr, &lt6_tmp);
 
 		/*
 		 * We need to treat lifetimes for temporary addresses
 		 * differently, according to
 		 * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
 		 * we only update the lifetimes when they are in the maximum
 		 * intervals.
 		 */
 		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
 			u_int32_t maxvltime, maxpltime;
 
 			if (V_ip6_temp_valid_lifetime >
 			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
 			    V_ip6_desync_factor)) {
 				maxvltime = V_ip6_temp_valid_lifetime -
 				    (time_second - ifa6->ia6_createtime) -
 				    V_ip6_desync_factor;
 			} else
 				maxvltime = 0;
 			if (V_ip6_temp_preferred_lifetime >
 			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
 			    V_ip6_desync_factor)) {
 				maxpltime = V_ip6_temp_preferred_lifetime -
 				    (time_second - ifa6->ia6_createtime) -
 				    V_ip6_desync_factor;
 			} else
 				maxpltime = 0;
 
 			if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME ||
 			    lt6_tmp.ia6t_vltime > maxvltime) {
 				lt6_tmp.ia6t_vltime = maxvltime;
 			}
 			if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME ||
 			    lt6_tmp.ia6t_pltime > maxpltime) {
 				lt6_tmp.ia6t_pltime = maxpltime;
 			}
 		}
 		ifa6->ia6_lifetime = lt6_tmp;
 		ifa6->ia6_updatetime = time_second;
 	}
 	IF_ADDR_RUNLOCK(ifp);
 	if (ia6_match == NULL && new->ndpr_vltime) {
 		int ifidlen;
 
 		/*
 		 * 5.5.3 (d) (continued)
 		 * No address matched and the valid lifetime is non-zero.
 		 * Create a new address.
 		 */
 
 		/*
 		 * Prefix Length check:
 		 * If the sum of the prefix length and interface identifier
 		 * length does not equal 128 bits, the Prefix Information
 		 * option MUST be ignored.  The length of the interface
 		 * identifier is defined in a separate link-type specific
 		 * document.
 		 */
 		ifidlen = in6_if2idlen(ifp);
 		if (ifidlen < 0) {
 			/* this should not happen, so we always log it. */
 			log(LOG_ERR, "prelist_update: IFID undefined (%s)\n",
 			    if_name(ifp));
 			goto end;
 		}
 		if (ifidlen + pr->ndpr_plen != 128) {
 			nd6log((LOG_INFO,
 			    "prelist_update: invalid prefixlen "
 			    "%d for %s, ignored\n",
 			    pr->ndpr_plen, if_name(ifp)));
 			goto end;
 		}
 
 		if ((ia6 = in6_ifadd(new, mcast)) != NULL) {
 			/*
 			 * note that we should use pr (not new) for reference.
 			 */
 			pr->ndpr_refcnt++;
 			ia6->ia6_ndpr = pr;
 
 			/*
 			 * RFC 3041 3.3 (2).
 			 * When a new public address is created as described
 			 * in RFC2462, also create a new temporary address.
 			 *
 			 * RFC 3041 3.5.
 			 * When an interface connects to a new link, a new
 			 * randomized interface identifier should be generated
 			 * immediately together with a new set of temporary
 			 * addresses.  Thus, we specifiy 1 as the 2nd arg of
 			 * in6_tmpifadd().
 			 */
 			if (V_ip6_use_tempaddr) {
 				int e;
 				if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) {
 					nd6log((LOG_NOTICE, "prelist_update: "
 					    "failed to create a temporary "
 					    "address, errno=%d\n",
 					    e));
 				}
 			}
 			ifa_free(&ia6->ia_ifa);
 
 			/*
 			 * A newly added address might affect the status
 			 * of other addresses, so we check and update it.
 			 * XXX: what if address duplication happens?
 			 */
 			pfxlist_onlink_check();
 		} else {
 			/* just set an error. do not bark here. */
 			error = EADDRNOTAVAIL; /* XXX: might be unused. */
 		}
 	}
 
  end:
 	splx(s);
 	return error;
 }
 
 /*
  * A supplement function used in the on-link detection below;
  * detect if a given prefix has a (probably) reachable advertising router.
  * XXX: lengthy function name...
  */
 static struct nd_pfxrouter *
 find_pfxlist_reachable_router(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfxrtr;
 	struct llentry *ln;
 	int canreach;
 
 	LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) {
 		IF_AFDATA_RLOCK(pfxrtr->router->ifp);
 		ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp);
 		IF_AFDATA_RUNLOCK(pfxrtr->router->ifp);
 		if (ln == NULL)
 			continue;
 		canreach = ND6_IS_LLINFO_PROBREACH(ln);
 		LLE_RUNLOCK(ln);
 		if (canreach)
 			break;
 	}
 	return (pfxrtr);
 }
 
 /*
  * Check if each prefix in the prefix list has at least one available router
  * that advertised the prefix (a router is "available" if its neighbor cache
  * entry is reachable or probably reachable).
  * If the check fails, the prefix may be off-link, because, for example,
  * we have moved from the network but the lifetime of the prefix has not
  * expired yet.  So we should not use the prefix if there is another prefix
  * that has an available router.
  * But, if there is no prefix that has an available router, we still regards
  * all the prefixes as on-link.  This is because we can't tell if all the
  * routers are simply dead or if we really moved from the network and there
  * is no router around us.
  */
 void
 pfxlist_onlink_check()
 {
 	struct nd_prefix *pr;
 	struct in6_ifaddr *ifa;
 	struct nd_defrouter *dr;
 	struct nd_pfxrouter *pfxrtr = NULL;
 
 	/*
 	 * Check if there is a prefix that has a reachable advertising
 	 * router.
 	 */
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
 			break;
 	}
 
 	/*
 	 * If we have no such prefix, check whether we still have a router
 	 * that does not advertise any prefixes.
 	 */
 	if (pr == NULL) {
 		TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 			struct nd_prefix *pr0;
 
 			LIST_FOREACH(pr0, &V_nd_prefix, ndpr_entry) {
 				if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL)
 					break;
 			}
 			if (pfxrtr != NULL)
 				break;
 		}
 	}
 	if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
 		/*
 		 * There is at least one prefix that has a reachable router,
 		 * or at least a router which probably does not advertise
 		 * any prefixes.  The latter would be the case when we move
 		 * to a new link where we have a router that does not provide
 		 * prefixes and we configure an address by hand.
 		 * Detach prefixes which have no reachable advertising
 		 * router, and attach other prefixes.
 		 */
 		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 			/* XXX: a link-local prefix should never be detached */
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue;
 
 			/*
 			 * we aren't interested in prefixes without the L bit
 			 * set.
 			 */
 			if (pr->ndpr_raf_onlink == 0)
 				continue;
 
 			if (pr->ndpr_raf_auto == 0)
 				continue;
 
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
 			    find_pfxlist_reachable_router(pr) == NULL)
 				pr->ndpr_stateflags |= NDPRF_DETACHED;
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
 			    find_pfxlist_reachable_router(pr) != 0)
 				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
 		}
 	} else {
 		/* there is no prefix that has a reachable router */
 		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue;
 
 			if (pr->ndpr_raf_onlink == 0)
 				continue;
 
 			if (pr->ndpr_raf_auto == 0)
 				continue;
 
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0)
 				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
 		}
 	}
 
 	/*
 	 * Remove each interface route associated with a (just) detached
 	 * prefix, and reinstall the interface route for a (just) attached
 	 * prefix.  Note that all attempt of reinstallation does not
 	 * necessarily success, when a same prefix is shared among multiple
 	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
 	 * so we don't have to care about them.
 	 */
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		int e;
 		char ip6buf[INET6_ADDRSTRLEN];
 
 		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 			continue;
 
 		if (pr->ndpr_raf_onlink == 0)
 			continue;
 
 		if (pr->ndpr_raf_auto == 0)
 			continue;
 
 		if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 			if ((e = nd6_prefix_offlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d offlink, errno=%d\n",
 				    ip6_sprintf(ip6buf,
 					    &pr->ndpr_prefix.sin6_addr),
 					    pr->ndpr_plen, e));
 			}
 		}
 		if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
 		    pr->ndpr_raf_onlink) {
 			if ((e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d onlink, errno=%d\n",
 				    ip6_sprintf(ip6buf,
 					    &pr->ndpr_prefix.sin6_addr),
 					    pr->ndpr_plen, e));
 			}
 		}
 	}
 
 	/*
 	 * Changes on the prefix status might affect address status as well.
 	 * Make sure that all addresses derived from an attached prefix are
 	 * attached, and that all addresses derived from a detached prefix are
 	 * detached.  Note, however, that a manually configured address should
 	 * always be attached.
 	 * The precise detection logic is same as the one for prefixes.
 	 *
 	 * XXXRW: in6_ifaddrhead locking.
 	 */
 	TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
 		if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
 			continue;
 
 		if (ifa->ia6_ndpr == NULL) {
 			/*
 			 * This can happen when we first configure the address
 			 * (i.e. the address exists, but the prefix does not).
 			 * XXX: complicated relationships...
 			 */
 			continue;
 		}
 
 		if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
 			break;
 	}
 	if (ifa) {
 		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
 			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 				continue;
 
 			if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
 				continue;
 
 			if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) {
 				if (ifa->ia6_flags & IN6_IFF_DETACHED) {
 					ifa->ia6_flags &= ~IN6_IFF_DETACHED;
 					ifa->ia6_flags |= IN6_IFF_TENTATIVE;
 					nd6_dad_start((struct ifaddr *)ifa, 0);
 				}
 			} else {
 				ifa->ia6_flags |= IN6_IFF_DETACHED;
 			}
 		}
 	}
 	else {
 		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
 			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 				continue;
 
 			if (ifa->ia6_flags & IN6_IFF_DETACHED) {
 				ifa->ia6_flags &= ~IN6_IFF_DETACHED;
 				ifa->ia6_flags |= IN6_IFF_TENTATIVE;
 				/* Do we need a delay in this case? */
 				nd6_dad_start((struct ifaddr *)ifa, 0);
 			}
 		}
 	}
 }
 
 static int
 nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
 {
 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
 	struct radix_node_head *rnh;
 	struct rtentry *rt;
 	struct sockaddr_in6 mask6;
 	u_long rtflags;
 	int error, a_failure, fibnum;
 
 	/*
 	 * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs.
 	 * ifa->ifa_rtrequest = nd6_rtrequest;
 	 */
 	bzero(&mask6, sizeof(mask6));
 	mask6.sin6_len = sizeof(mask6);
 	mask6.sin6_addr = pr->ndpr_mask;
 	rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP;
 
 	a_failure = 0;
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 
 		rt = NULL;
 		error = in6_rtrequest(RTM_ADD,
 		    (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr,
 		    (struct sockaddr *)&mask6, rtflags, &rt, fibnum);
 		if (error == 0) {
 			KASSERT(rt != NULL, ("%s: in6_rtrequest return no "
 			    "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__,
 			    error, pr, ifa));
 
 			rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6);
 			/* XXX what if rhn == NULL? */
 			RADIX_NODE_HEAD_LOCK(rnh);
 			RT_LOCK(rt);
 			if (rt_setgate(rt, rt_key(rt),
 			    (struct sockaddr *)&null_sdl) == 0) {
 				struct sockaddr_dl *dl;
 
 				dl = (struct sockaddr_dl *)rt->rt_gateway;
 				dl->sdl_type = rt->rt_ifp->if_type;
 				dl->sdl_index = rt->rt_ifp->if_index;
 			}
 			RADIX_NODE_HEAD_UNLOCK(rnh);
 			nd6_rtmsg(RTM_ADD, rt);
 			RT_UNLOCK(rt);
 			pr->ndpr_stateflags |= NDPRF_ONLINK;
 		} else {
 			char ip6buf[INET6_ADDRSTRLEN];
 			char ip6bufg[INET6_ADDRSTRLEN];
 			char ip6bufm[INET6_ADDRSTRLEN];
 			struct sockaddr_in6 *sin6;
 
 			sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 			nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add "
 			    "route for a prefix (%s/%d) on %s, gw=%s, mask=%s, "
 			    "flags=%lx errno = %d\n",
 			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 			    pr->ndpr_plen, if_name(pr->ndpr_ifp),
 			    ip6_sprintf(ip6bufg, &sin6->sin6_addr),
 			    ip6_sprintf(ip6bufm, &mask6.sin6_addr),
 			    rtflags, error));
 
 			/* Save last error to return, see rtinit(). */
 			a_failure = error;
 		}
 
 		if (rt != NULL) {
 			RT_LOCK(rt);
 			RT_REMREF(rt);
 			RT_UNLOCK(rt);
 		}
 	}
 
 	/* Return the last error we got. */
 	return (a_failure);
 }
 
 static int
 nd6_prefix_onlink(struct nd_prefix *pr)
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct nd_prefix *opr;
 	int error = 0;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* sanity check */
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 		nd6log((LOG_ERR,
 		    "nd6_prefix_onlink: %s/%d is already on-link\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen));
 		return (EEXIST);
 	}
 
 	/*
 	 * Add the interface route associated with the prefix.  Before
 	 * installing the route, check if there's the same prefix on another
 	 * interface, and the prefix has already installed the interface route.
 	 * Although such a configuration is expected to be rare, we explicitly
 	 * allow it.
 	 */
 	LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
 		if (opr == pr)
 			continue;
 
 		if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
 			continue;
 
 		if (opr->ndpr_plen == pr->ndpr_plen &&
 		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
 		    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen))
 			return (0);
 	}
 
 	/*
 	 * We prefer link-local addresses as the associated interface address.
 	 */
 	/* search for a link-local addr */
 	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
 	    IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
 	if (ifa == NULL) {
 		/* XXX: freebsd does not have ifa_ifwithaf */
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family == AF_INET6)
 				break;
 		}
 		if (ifa != NULL)
 			ifa_ref(ifa);
 		IF_ADDR_RUNLOCK(ifp);
 		/* should we care about ia6_flags? */
 	}
 	if (ifa == NULL) {
 		/*
 		 * This can still happen, when, for example, we receive an RA
 		 * containing a prefix with the L bit set and the A bit clear,
 		 * after removing all IPv6 addresses on the receiving
 		 * interface.  This should, of course, be rare though.
 		 */
 		nd6log((LOG_NOTICE,
 		    "nd6_prefix_onlink: failed to find any ifaddr"
 		    " to add route for a prefix(%s/%d) on %s\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(ifp)));
 		return (0);
 	}
 
 	error = nd6_prefix_onlink_rtrequest(pr, ifa);
 
 	if (ifa != NULL)
 		ifa_free(ifa);
 
 	return (error);
 }
 
 static int
 nd6_prefix_offlink(struct nd_prefix *pr)
 {
 	int error = 0;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct nd_prefix *opr;
 	struct sockaddr_in6 sa6, mask6;
 	struct rtentry *rt;
 	char ip6buf[INET6_ADDRSTRLEN];
 	int fibnum, a_failure;
 
 	/* sanity check */
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 		nd6log((LOG_ERR,
 		    "nd6_prefix_offlink: %s/%d is already off-link\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen));
 		return (EEXIST);
 	}
 
 	bzero(&sa6, sizeof(sa6));
 	sa6.sin6_family = AF_INET6;
 	sa6.sin6_len = sizeof(sa6);
 	bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr,
 	    sizeof(struct in6_addr));
 	bzero(&mask6, sizeof(mask6));
 	mask6.sin6_family = AF_INET6;
 	mask6.sin6_len = sizeof(sa6);
 	bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
 
 	a_failure = 0;
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		rt = NULL;
 		error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
 		    (struct sockaddr *)&mask6, 0, &rt, fibnum);
 		if (error == 0) {
 			/* report the route deletion to the routing socket. */
 			if (rt != NULL)
 				nd6_rtmsg(RTM_DELETE, rt);
 		} else {
 			/* Save last error to return, see rtinit(). */
 			a_failure = error;
 		}
 		if (rt != NULL) {
 			RTFREE(rt);
 		}
 	}
 	error = a_failure;
 	if (error == 0) {
 		pr->ndpr_stateflags &= ~NDPRF_ONLINK;
 
 		/*
 		 * There might be the same prefix on another interface,
 		 * the prefix which could not be on-link just because we have
 		 * the interface route (see comments in nd6_prefix_onlink).
 		 * If there's one, try to make the prefix on-link on the
 		 * interface.
 		 */
 		LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
 			if (opr == pr)
 				continue;
 
 			if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0)
 				continue;
 
 			/*
 			 * KAME specific: detached prefixes should not be
 			 * on-link.
 			 */
 			if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0)
 				continue;
 
 			if (opr->ndpr_plen == pr->ndpr_plen &&
 			    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
 			    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
 				int e;
 
 				if ((e = nd6_prefix_onlink(opr)) != 0) {
 					nd6log((LOG_ERR,
 					    "nd6_prefix_offlink: failed to "
 					    "recover a prefix %s/%d from %s "
 					    "to %s (errno = %d)\n",
 					    ip6_sprintf(ip6buf,
 						&opr->ndpr_prefix.sin6_addr),
 					    opr->ndpr_plen, if_name(ifp),
 					    if_name(opr->ndpr_ifp), e));
 				}
 			}
 		}
 	} else {
 		/* XXX: can we still set the NDPRF_ONLINK flag? */
 		nd6log((LOG_ERR,
 		    "nd6_prefix_offlink: failed to delete route: "
 		    "%s/%d on %s (errno = %d)\n",
 		    ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
 		    if_name(ifp), error));
 	}
 
 	return (error);
 }
 
 static struct in6_ifaddr *
 in6_ifadd(struct nd_prefixctl *pr, int mcast)
 {
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct ifaddr *ifa;
 	struct in6_aliasreq ifra;
 	struct in6_ifaddr *ia, *ib;
 	int error, plen0;
 	struct in6_addr mask;
 	int prefixlen = pr->ndpr_plen;
 	int updateflags;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	in6_prefixlen2mask(&mask, prefixlen);
 
 	/*
 	 * find a link-local address (will be interface ID).
 	 * Is it really mandatory? Theoretically, a global or a site-local
 	 * address can be configured without a link-local address, if we
 	 * have a unique interface identifier...
 	 *
 	 * it is not mandatory to have a link-local address, we can generate
 	 * interface identifier on the fly.  we do this because:
 	 * (1) it should be the easiest way to find interface identifier.
 	 * (2) RFC2462 5.4 suggesting the use of the same interface identifier
 	 * for multiple addresses on a single interface, and possible shortcut
 	 * of DAD.  we omitted DAD for this reason in the past.
 	 * (3) a user can prevent autoconfiguration of global address
 	 * by removing link-local address by hand (this is partly because we
 	 * don't have other way to control the use of IPv6 on an interface.
 	 * this has been our design choice - cf. NRL's "ifconfig auto").
 	 * (4) it is easier to manage when an interface has addresses
 	 * with the same interface identifier, than to have multiple addresses
 	 * with different interface identifiers.
 	 */
 	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
 	if (ifa)
 		ib = (struct in6_ifaddr *)ifa;
 	else
 		return NULL;
 
 	/* prefixlen + ifidlen must be equal to 128 */
 	plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
 	if (prefixlen != plen0) {
 		ifa_free(ifa);
 		nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
 		    "(prefix=%d ifid=%d)\n",
 		    if_name(ifp), prefixlen, 128 - plen0));
 		return NULL;
 	}
 
 	/* make ifaddr */
 
 	bzero(&ifra, sizeof(ifra));
 	/*
 	 * in6_update_ifa() does not use ifra_name, but we accurately set it
 	 * for safety.
 	 */
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 	ifra.ifra_addr.sin6_family = AF_INET6;
 	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 	/* prefix */
 	ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr;
 	ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
 	ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
 	ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
 
 	/* interface ID */
 	ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
 	ifa_free(ifa);
 
 	/* new prefix mask. */
 	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_prefixmask.sin6_family = AF_INET6;
 	bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
 	    sizeof(ifra.ifra_prefixmask.sin6_addr));
 
 	/* lifetimes. */
 	ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
 	ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
 
 	/* XXX: scope zone ID? */
 
 	ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
 
 	/*
 	 * Make sure that we do not have this address already.  This should
 	 * usually not happen, but we can still see this case, e.g., if we
 	 * have manually configured the exact address to be configured.
 	 */
 	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
 	    &ifra.ifra_addr.sin6_addr);
 	if (ifa != NULL) {
 		ifa_free(ifa);
 		/* this should be rare enough to make an explicit log */
 		log(LOG_INFO, "in6_ifadd: %s is already configured\n",
 		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
 		return (NULL);
 	}
 
 	/*
 	 * Allocate ifaddr structure, link into chain, etc.
 	 * If we are going to create a new address upon receiving a multicasted
 	 * RA, we need to impose a random delay before starting DAD.
 	 * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
 	 */
 	updateflags = 0;
 	if (mcast)
 		updateflags |= IN6_IFAUPDATE_DADDELAY;
 	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
 		nd6log((LOG_ERR,
 		    "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
 		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
 		    if_name(ifp), error));
 		return (NULL);	/* ifaddr must not have been allocated. */
 	}
 
 	ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
 	/*
 	 * XXXRW: Assumption of non-NULLness here might not be true with
 	 * fine-grained locking -- should we validate it?  Or just return
 	 * earlier ifa rather than looking it up again?
 	 */
 	return (ia);		/* this is always non-NULL  and referenced. */
 }
 
 /*
  * ia0 - corresponding public address
  */
 int
 in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
 {
 	struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
 	struct in6_ifaddr *newia, *ia;
 	struct in6_aliasreq ifra;
 	int i, error;
 	int trylimit = 3;	/* XXX: adhoc value */
 	int updateflags;
 	u_int32_t randid[2];
 	time_t vltime0, pltime0;
 
 	bzero(&ifra, sizeof(ifra));
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 	ifra.ifra_addr = ia0->ia_addr;
 	/* copy prefix mask */
 	ifra.ifra_prefixmask = ia0->ia_prefixmask;
 	/* clear the old IFID */
 	for (i = 0; i < 4; i++) {
 		ifra.ifra_addr.sin6_addr.s6_addr32[i] &=
 		    ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
 	}
 
   again:
 	if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
 	    (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) {
 		nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good "
 		    "random IFID\n"));
 		return (EINVAL);
 	}
 	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
 	    (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
 	    (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
 
 	/*
 	 * in6_get_tmpifid() quite likely provided a unique interface ID.
 	 * However, we may still have a chance to see collision, because
 	 * there may be a time lag between generation of the ID and generation
 	 * of the address.  So, we'll do one more sanity check.
 	 */
 	IN6_IFADDR_RLOCK();
 	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
 		    &ifra.ifra_addr.sin6_addr)) {
 			if (trylimit-- == 0) {
 				IN6_IFADDR_RUNLOCK();
 				/*
 				 * Give up.  Something strange should have
 				 * happened.
 				 */
 				nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
 				    "find a unique random IFID\n"));
 				return (EEXIST);
 			}
 			IN6_IFADDR_RUNLOCK();
 			forcegen = 1;
 			goto again;
 		}
 	}
 	IN6_IFADDR_RUNLOCK();
 
 	/*
 	 * The Valid Lifetime is the lower of the Valid Lifetime of the
          * public address or TEMP_VALID_LIFETIME.
 	 * The Preferred Lifetime is the lower of the Preferred Lifetime
          * of the public address or TEMP_PREFERRED_LIFETIME -
          * DESYNC_FACTOR.
 	 */
 	if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 		vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
 		    (ia0->ia6_lifetime.ia6t_vltime -
 		    (time_second - ia0->ia6_updatetime));
 		if (vltime0 > V_ip6_temp_valid_lifetime)
 			vltime0 = V_ip6_temp_valid_lifetime;
 	} else
 		vltime0 = V_ip6_temp_valid_lifetime;
 	if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 		pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
 		    (ia0->ia6_lifetime.ia6t_pltime -
 		    (time_second - ia0->ia6_updatetime));
 		if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
 			pltime0 = V_ip6_temp_preferred_lifetime -
 			    V_ip6_desync_factor;
 		}
 	} else
 		pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor;
 	ifra.ifra_lifetime.ia6t_vltime = vltime0;
 	ifra.ifra_lifetime.ia6t_pltime = pltime0;
 
 	/*
 	 * A temporary address is created only if this calculated Preferred
 	 * Lifetime is greater than REGEN_ADVANCE time units.
 	 */
 	if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance)
 		return (0);
 
 	/* XXX: scope zone ID? */
 
 	ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY);
 
 	/* allocate ifaddr structure, link into chain, etc. */
 	updateflags = 0;
 	if (delay)
 		updateflags |= IN6_IFAUPDATE_DADDELAY;
 	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0)
 		return (error);
 
 	newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
 	if (newia == NULL) {	/* XXX: can it happen? */
 		nd6log((LOG_ERR,
 		    "in6_tmpifadd: ifa update succeeded, but we got "
 		    "no ifaddr\n"));
 		return (EINVAL); /* XXX */
 	}
 	newia->ia6_ndpr = ia0->ia6_ndpr;
 	newia->ia6_ndpr->ndpr_refcnt++;
 	ifa_free(&newia->ia_ifa);
 
 	/*
 	 * A newly added address might affect the status of other addresses.
 	 * XXX: when the temporary address is generated with a new public
 	 * address, the onlink check is redundant.  However, it would be safe
 	 * to do the check explicitly everywhere a new address is generated,
 	 * and, in fact, we surely need the check when we create a new
 	 * temporary address due to deprecation of an old temporary address.
 	 */
 	pfxlist_onlink_check();
 
 	return (0);
 }
 
 static int
 in6_init_prefix_ltimes(struct nd_prefix *ndpr)
 {
 	if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
 		ndpr->ndpr_preferred = 0;
 	else
 		ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
 	if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 		ndpr->ndpr_expire = 0;
 	else
 		ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;
 
 	return 0;
 }
 
 static void
 in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
 {
 	/* init ia6t_expire */
 	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_expire = 0;
 	else {
 		lt6->ia6t_expire = time_second;
 		lt6->ia6t_expire += lt6->ia6t_vltime;
 	}
 
 	/* init ia6t_preferred */
 	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_preferred = 0;
 	else {
 		lt6->ia6t_preferred = time_second;
 		lt6->ia6t_preferred += lt6->ia6t_pltime;
 	}
 }
 
 /*
  * Delete all the routing table entries that use the specified gateway.
  * XXX: this function causes search through all entries of routing table, so
  * it shouldn't be called when acting as a router.
  */
 void
 rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
 {
 	struct radix_node_head *rnh;
 	u_int fibnum;
 	int s = splnet();
 
 	/* We'll care only link-local addresses */
 	if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
 		splx(s);
 		return;
 	}
 
 	/* XXX Do we really need to walk any but the default FIB? */
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		rnh = rt_tables_get_rnh(fibnum, AF_INET6);
 		if (rnh == NULL)
 			continue;
 
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	}
 	splx(s);
 }
 
 static int
 rt6_deleteroute(struct radix_node *rn, void *arg)
 {
 #define SIN6(s)	((struct sockaddr_in6 *)s)
 	struct rtentry *rt = (struct rtentry *)rn;
 	struct in6_addr *gate = (struct in6_addr *)arg;
 
 	if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
 		return (0);
 
 	if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) {
 		return (0);
 	}
 
 	/*
 	 * Do not delete a static route.
 	 * XXX: this seems to be a bit ad-hoc. Should we consider the
 	 * 'cloned' bit instead?
 	 */
 	if ((rt->rt_flags & RTF_STATIC) != 0)
 		return (0);
 
 	/*
 	 * We delete only host route. This means, in particular, we don't
 	 * delete default route.
 	 */
 	if ((rt->rt_flags & RTF_HOST) == 0)
 		return (0);
 
 	return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
 	    rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum));
 #undef SIN6
 }
 
 int
 nd6_setdefaultiface(int ifindex)
 {
 	int error = 0;
 
 	if (ifindex < 0 || V_if_index < ifindex)
 		return (EINVAL);
 	if (ifindex != 0 && !ifnet_byindex(ifindex))
 		return (EINVAL);
 
 	if (V_nd6_defifindex != ifindex) {
 		V_nd6_defifindex = ifindex;
 		if (V_nd6_defifindex > 0)
 			V_nd6_defifp = ifnet_byindex(V_nd6_defifindex);
 		else
 			V_nd6_defifp = NULL;
 
 		/*
 		 * Our current implementation assumes one-to-one maping between
 		 * interfaces and links, so it would be natural to use the
 		 * default interface as the default link.
 		 */
 		scope6_setdefault(V_nd6_defifp);
 	}
 
 	return (error);
 }
Index: stable/9/contrib/ntp/ntpd/ntp_crypto.c
===================================================================
--- stable/9/contrib/ntp/ntpd/ntp_crypto.c	(revision 281230)
+++ stable/9/contrib/ntp/ntpd/ntp_crypto.c	(revision 281231)
@@ -1,4201 +1,4235 @@
 /*
  * ntp_crypto.c - NTP version 4 public key routines
  */
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 #ifdef OPENSSL
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/param.h>
 #include <unistd.h>
 #include <fcntl.h>
 
 #include "ntpd.h"
 #include "ntp_stdlib.h"
 #include "ntp_unixtime.h"
 #include "ntp_string.h"
 #include <ntp_random.h>
 
 #include "openssl/asn1_mac.h"
 #include "openssl/bn.h"
 #include "openssl/err.h"
 #include "openssl/evp.h"
 #include "openssl/pem.h"
 #include "openssl/rand.h"
 #include "openssl/x509v3.h"
 
 #ifdef KERNEL_PLL
 #include "ntp_syscall.h"
 #endif /* KERNEL_PLL */
 
 /*
  * Extension field message format
  *
  * These are always signed and saved before sending in network byte
  * order. They must be converted to and from host byte order for
  * processing.
  *
  * +-------+-------+
  * |   op  |  len  | <- extension pointer
  * +-------+-------+
  * |    assocID    |
  * +---------------+
  * |   timestamp   | <- value pointer
  * +---------------+
  * |   filestamp   |
  * +---------------+
  * |   value len   |
  * +---------------+
  * |               |
  * =     value     =
  * |               |
  * +---------------+
  * | signature len |
  * +---------------+
  * |               |
  * =   signature   =
  * |               |
  * +---------------+
  *
  * The CRYPTO_RESP bit is set to 0 for requests, 1 for responses.
  * Requests carry the association ID of the receiver; responses carry
  * the association ID of the sender. Some messages include only the
  * operation/length and association ID words and so have length 8
  * octets. Ohers include the value structure and associated value and
  * signature fields. These messages include the timestamp, filestamp,
  * value and signature words and so have length at least 24 octets. The
  * signature and/or value fields can be empty, in which case the
  * respective length words are zero. An empty value with nonempty
  * signature is syntactically valid, but semantically questionable.
  *
  * The filestamp represents the time when a cryptographic data file such
  * as a public/private key pair is created. It follows every reference
  * depending on that file and serves as a means to obsolete earlier data
  * of the same type. The timestamp represents the time when the
  * cryptographic data of the message were last signed. Creation of a
  * cryptographic data file or signing a message can occur only when the
  * creator or signor is synchronized to an authoritative source and
  * proventicated to a trusted authority.
  *
  * Note there are four conditions required for server trust. First, the
  * public key on the certificate must be verified, which involves a
  * number of format, content and consistency checks. Next, the server
  * identity must be confirmed by one of four schemes: private
  * certificate, IFF scheme, GQ scheme or certificate trail hike to a
  * self signed trusted certificate. Finally, the server signature must
  * be verified.
  */
 /*
  * Cryptodefines
  */
 #define TAI_1972	10	/* initial TAI offset (s) */
 #define MAX_LEAP	100	/* max UTC leapseconds (s) */
 #define VALUE_LEN	(6 * 4) /* min response field length */
+#define MAX_VALLEN	(65535 - VALUE_LEN)
 #define YEAR		(60 * 60 * 24 * 365) /* seconds in year */
 
 /*
  * Global cryptodata in host byte order
  */
 u_int32	crypto_flags = 0x0;	/* status word */
 
 /*
  * Global cryptodata in network byte order
  */
 struct cert_info *cinfo = NULL;	/* certificate info/value */
 struct value hostval;		/* host value */
 struct value pubkey;		/* public key */
 struct value tai_leap;		/* leapseconds table */
 EVP_PKEY *iffpar_pkey = NULL;	/* IFF parameters */
 EVP_PKEY *gqpar_pkey = NULL;	/* GQ parameters */
 EVP_PKEY *mvpar_pkey = NULL;	/* MV parameters */
 char	*iffpar_file = NULL; /* IFF parameters file */
 char	*gqpar_file = NULL;	/* GQ parameters file */
 char	*mvpar_file = NULL;	/* MV parameters file */
 
 /*
  * Private cryptodata in host byte order
  */
 static char *passwd = NULL;	/* private key password */
 static EVP_PKEY *host_pkey = NULL; /* host key */
 static EVP_PKEY *sign_pkey = NULL; /* sign key */
 static const EVP_MD *sign_digest = NULL; /* sign digest */
 static u_int sign_siglen;	/* sign key length */
 static char *rand_file = NULL;	/* random seed file */
 static char *host_file = NULL;	/* host key file */
 static char *sign_file = NULL;	/* sign key file */
 static char *cert_file = NULL;	/* certificate file */
 static char *leap_file = NULL;	/* leapseconds file */
 static tstamp_t if_fstamp = 0;	/* IFF filestamp */
 static tstamp_t gq_fstamp = 0;	/* GQ file stamp */
 static tstamp_t mv_fstamp = 0;	/* MV filestamp */
 static u_int ident_scheme = 0;	/* server identity scheme */
 
 /*
  * Cryptotypes
  */
 static	int	crypto_verify	P((struct exten *, struct value *,
 				    struct peer *));
-static	int	crypto_encrypt	P((struct exten *, struct value *,
-				    keyid_t *));
+static	int	crypto_encrypt	P((const u_char *, u_int, keyid_t *,
+				    struct value *));
 static	int	crypto_alice	P((struct peer *, struct value *));
 static	int	crypto_alice2	P((struct peer *, struct value *));
 static	int	crypto_alice3	P((struct peer *, struct value *));
 static	int	crypto_bob	P((struct exten *, struct value *));
 static	int	crypto_bob2	P((struct exten *, struct value *));
 static	int	crypto_bob3	P((struct exten *, struct value *));
 static	int	crypto_iff	P((struct exten *, struct peer *));
 static	int	crypto_gq	P((struct exten *, struct peer *));
 static	int	crypto_mv	P((struct exten *, struct peer *));
 static	u_int	crypto_send	P((struct exten *, struct value *));
 static	tstamp_t crypto_time	P((void));
 static	u_long	asn2ntp		P((ASN1_TIME *));
 static	struct cert_info *cert_parse P((u_char *, u_int, tstamp_t));
 static	int	cert_sign	P((struct exten *, struct value *));
 static	int	cert_valid	P((struct cert_info *, EVP_PKEY *));
 static	int	cert_install	P((struct exten *, struct peer *));
 static	void	cert_free	P((struct cert_info *));
 static	EVP_PKEY *crypto_key	P((char *, tstamp_t *));
 static	int	bighash		P((BIGNUM *, BIGNUM *));
 static	struct cert_info *crypto_cert P((char *));
 static	void	crypto_tai	P((char *));
 
 #ifdef SYS_WINNT
 int
 readlink(char * link, char * file, int len) {
 	return (-1);
 }
 #endif
 
 /*
  * session_key - generate session key
  *
  * This routine generates a session key from the source address,
  * destination address, key ID and private value. The value of the
  * session key is the MD5 hash of these values, while the next key ID is
  * the first four octets of the hash.
  *
  * Returns the next key ID
  */
 keyid_t
 session_key(
 	struct sockaddr_storage *srcadr, /* source address */
 	struct sockaddr_storage *dstadr, /* destination address */
 	keyid_t	keyno,		/* key ID */
 	keyid_t	private,	/* private value */
 	u_long	lifetime 	/* key lifetime */
 	)
 {
 	EVP_MD_CTX ctx;		/* message digest context */
 	u_char dgst[EVP_MAX_MD_SIZE]; /* message digest */
 	keyid_t	keyid;		/* key identifer */
 	u_int32	header[10];	/* data in network byte order */
 	u_int	hdlen, len;
 
 	if (!dstadr)
 		return 0;
 	
 	/*
 	 * Generate the session key and key ID. If the lifetime is
 	 * greater than zero, install the key and call it trusted.
 	 */
 	hdlen = 0;
 	switch(srcadr->ss_family) {
 	case AF_INET:
 		header[0] = ((struct sockaddr_in *)srcadr)->sin_addr.s_addr;
 		header[1] = ((struct sockaddr_in *)dstadr)->sin_addr.s_addr;
 		header[2] = htonl(keyno);
 		header[3] = htonl(private);
 		hdlen = 4 * sizeof(u_int32);
 		break;
 
 	case AF_INET6:
 		memcpy(&header[0], &GET_INADDR6(*srcadr),
 		    sizeof(struct in6_addr));
 		memcpy(&header[4], &GET_INADDR6(*dstadr),
 		    sizeof(struct in6_addr));
 		header[8] = htonl(keyno);
 		header[9] = htonl(private);
 		hdlen = 10 * sizeof(u_int32);
 		break;
 	}
 	EVP_DigestInit(&ctx, EVP_md5());
 	EVP_DigestUpdate(&ctx, (u_char *)header, hdlen);
 	EVP_DigestFinal(&ctx, dgst, &len);
 	memcpy(&keyid, dgst, 4);
 	keyid = ntohl(keyid);
 	if (lifetime != 0) {
 		MD5auth_setkey(keyno, dgst, len);
 		authtrust(keyno, lifetime);
 	}
 #ifdef DEBUG
 	if (debug > 1)
 		printf(
 		    "session_key: %s > %s %08x %08x hash %08x life %lu\n",
 		    stoa(srcadr), stoa(dstadr), keyno,
 		    private, keyid, lifetime);
 #endif
 	return (keyid);
 }
 
 
 /*
  * make_keylist - generate key list
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PER	host certificate expired
  *
  * This routine constructs a pseudo-random sequence by repeatedly
  * hashing the session key starting from a given source address,
  * destination address, private value and the next key ID of the
  * preceeding session key. The last entry on the list is saved along
  * with its sequence number and public signature.
  */
 int
 make_keylist(
 	struct peer *peer,	/* peer structure pointer */
 	struct interface *dstadr /* interface */
 	)
 {
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	struct autokey *ap;	/* autokey pointer */
 	struct value *vp;	/* value pointer */
 	keyid_t	keyid = 0;	/* next key ID */
 	keyid_t	cookie;		/* private value */
 	u_long	lifetime;
 	u_int	len, mpoll;
 	int	i;
 
 	if (!dstadr)
 		return XEVNT_OK;
 	
 	/*
 	 * Allocate the key list if necessary.
 	 */
 	tstamp = crypto_time();
 	if (peer->keylist == NULL)
 		peer->keylist = emalloc(sizeof(keyid_t) *
 		    NTP_MAXSESSION);
 
 	/*
 	 * Generate an initial key ID which is unique and greater than
 	 * NTP_MAXKEY.
 	 */
 	while (1) {
 		keyid = (ntp_random() + NTP_MAXKEY + 1) & ((1 <<
 		    sizeof(keyid_t)) - 1);
 		if (authhavekey(keyid))
 			continue;
 		break;
 	}
 
 	/*
 	 * Generate up to NTP_MAXSESSION session keys. Stop if the
 	 * next one would not be unique or not a session key ID or if
 	 * it would expire before the next poll. The private value
 	 * included in the hash is zero if broadcast mode, the peer
 	 * cookie if client mode or the host cookie if symmetric modes.
 	 */
 	mpoll = 1 << min(peer->ppoll, peer->hpoll);
 	lifetime = min(sys_automax, NTP_MAXSESSION * mpoll);
 	if (peer->hmode == MODE_BROADCAST)
 		cookie = 0;
 	else
 		cookie = peer->pcookie;
 	for (i = 0; i < NTP_MAXSESSION; i++) {
 		peer->keylist[i] = keyid;
 		peer->keynumber = i;
 		keyid = session_key(&dstadr->sin, &peer->srcadr, keyid,
 		    cookie, lifetime);
 		lifetime -= mpoll;
 		if (auth_havekey(keyid) || keyid <= NTP_MAXKEY ||
 		    lifetime <= mpoll)
 			break;
 	}
 
 	/*
 	 * Save the last session key ID, sequence number and timestamp,
 	 * then sign these values for later retrieval by the clients. Be
 	 * careful not to use invalid key media. Use the public values
 	 * timestamp as filestamp. 
 	 */
 	vp = &peer->sndval;
 	if (vp->ptr == NULL)
 		vp->ptr = emalloc(sizeof(struct autokey));
 	ap = (struct autokey *)vp->ptr;
 	ap->seq = htonl(peer->keynumber);
 	ap->key = htonl(keyid);
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = hostval.tstamp;
 	vp->vallen = htonl(sizeof(struct autokey));
 	vp->siglen = 0;
 	if (tstamp != 0) {
 		if (tstamp < cinfo->first || tstamp > cinfo->last)
 			return (XEVNT_PER);
 
 		if (vp->sig == NULL)
 			vp->sig = emalloc(sign_siglen);
 		EVP_SignInit(&ctx, sign_digest);
 		EVP_SignUpdate(&ctx, (u_char *)vp, 12);
 		EVP_SignUpdate(&ctx, vp->ptr, sizeof(struct autokey));
 		if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 			vp->siglen = htonl(len);
 		else
 			msyslog(LOG_ERR, "make_keys %s\n",
 			    ERR_error_string(ERR_get_error(), NULL));
 		peer->flags |= FLAG_ASSOC;
 	}
 #ifdef DEBUG
 	if (debug)
 		printf("make_keys: %d %08x %08x ts %u fs %u poll %d\n",
 		    ntohl(ap->seq), ntohl(ap->key), cookie,
 		    ntohl(vp->tstamp), ntohl(vp->fstamp), peer->hpoll);
 #endif
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_recv - parse extension fields
  *
  * This routine is called when the packet has been matched to an
  * association and passed sanity, format and MAC checks. We believe the
  * extension field values only if the field has proper format and
  * length, the timestamp and filestamp are valid and the signature has
  * valid length and is verified. There are a few cases where some values
  * are believed even if the signature fails, but only if the proventic
  * bit is not set.
  */
 int
 crypto_recv(
 	struct peer *peer,	/* peer structure pointer */
 	struct recvbuf *rbufp	/* packet buffer pointer */
 	)
 {
 	const EVP_MD *dp;	/* message digest algorithm */
 	u_int32	*pkt;		/* receive packet pointer */
 	struct autokey *ap, *bp; /* autokey pointer */
 	struct exten *ep, *fp;	/* extension pointers */
 	int	has_mac;	/* length of MAC field */
 	int	authlen;	/* offset of MAC field */
 	associd_t associd;	/* association ID */
 	tstamp_t tstamp = 0;	/* timestamp */
 	tstamp_t fstamp = 0;	/* filestamp */
 	u_int	len;		/* extension field length */
 	u_int	code;		/* extension field opcode */
 	u_int	vallen = 0;	/* value length */
 	X509	*cert;		/* X509 certificate */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	keyid_t	cookie;		/* crumbles */
 	int	hismode;	/* packet mode */
 	int	rval = XEVNT_OK;
 	u_char	*ptr;
 	u_int32 temp32;
 
 	/*
 	 * Initialize. Note that the packet has already been checked for
 	 * valid format and extension field lengths. First extract the
 	 * field length, command code and association ID in host byte
 	 * order. These are used with all commands and modes. Then check
 	 * the version number, which must be 2, and length, which must
 	 * be at least 8 for requests and VALUE_LEN (24) for responses.
 	 * Packets that fail either test sink without a trace. The
 	 * association ID is saved only if nonzero.
 	 */
 	authlen = LEN_PKT_NOMAC;
 	hismode = (int)PKT_MODE((&rbufp->recv_pkt)->li_vn_mode);
 	while ((has_mac = rbufp->recv_length - authlen) > MAX_MAC_LEN) {
 		pkt = (u_int32 *)&rbufp->recv_pkt + authlen / 4;
 		ep = (struct exten *)pkt;
 		code = ntohl(ep->opcode) & 0xffff0000;
 		len = ntohl(ep->opcode) & 0x0000ffff;
 		associd = (associd_t) ntohl(pkt[1]);
 		rval = XEVNT_OK;
 #ifdef DEBUG
 		if (debug)
 			printf(
 			    "crypto_recv: flags 0x%x ext offset %d len %u code 0x%x assocID %d\n",
 			    peer->crypto, authlen, len, code >> 16,
 			    associd);
 #endif
 
 		/*
 		 * Check version number and field length. If bad,
 		 * quietly ignore the packet.
 		 */
 		if (((code >> 24) & 0x3f) != CRYPTO_VN || len < 8) {
 			sys_unknownversion++;
 			code |= CRYPTO_ERROR;
 		}
 
 		/*
 		 * Little vulnerability bandage here. If a perp tosses a
 		 * fake association ID over the fence, we better toss it
 		 * out. Only the first one counts.
 		 */
 		if (code & CRYPTO_RESP) {
 			if (peer->assoc == 0)
 				peer->assoc = associd;
 			else if (peer->assoc != associd)
 				code |= CRYPTO_ERROR;
 		}
 		if (len >= VALUE_LEN) {
 			tstamp = ntohl(ep->tstamp);
 			fstamp = ntohl(ep->fstamp);
 			vallen = ntohl(ep->vallen);
+			/*
+			 * Bug 2761: I hope this isn't too early...
+			 */
+			if (   vallen == 0
+			    || len - VALUE_LEN < vallen)
+				return XEVNT_LEN;
 		}
 		switch (code) {
 
 		/*
 		 * Install status word, host name, signature scheme and
 		 * association ID. In OpenSSL the signature algorithm is
 		 * bound to the digest algorithm, so the NID completely
 		 * defines the signature scheme. Note the request and
 		 * response are identical, but neither is validated by
 		 * signature. The request is processed here only in
 		 * symmetric modes. The server name field might be
 		 * useful to implement access controls in future.
 		 */
 		case CRYPTO_ASSOC:
 
 			/*
 			 * If the machine is running when this message
 			 * arrives, the other fellow has reset and so
 			 * must we. Otherwise, pass the extension field
 			 * to the transmit side.
 			 */
 			if (peer->crypto) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			fp = emalloc(len);
 			memcpy(fp, ep, len);
 			temp32 = CRYPTO_RESP;
 			fp->opcode |= htonl(temp32);
 			peer->cmmd = fp;
 			/* fall through */
 
 		case CRYPTO_ASSOC | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if it has already been
 			 * stored or the message has been amputated.
 			 */
 			if (peer->crypto)
 				break;
 
 			if (vallen == 0 || vallen > MAXHOSTNAME ||
-			    len < VALUE_LEN + vallen) {
+			    len - VALUE_LEN < vallen) {
 				rval = XEVNT_LEN;
 				break;
 			}
 
 			/*
 			 * Check the identity schemes are compatible. If
 			 * the client has PC, the server must have PC,
 			 * in which case the server public key and
 			 * identity are presumed valid, so we skip the
 			 * certificate and identity exchanges and move
 			 * immediately to the cookie exchange which
 			 * confirms the server signature.
 			 */
 #ifdef DEBUG
 			if (debug)
 				printf(
 				    "crypto_recv: ident host 0x%x server 0x%x\n",
 				    crypto_flags, fstamp);
 #endif
 			temp32 = (crypto_flags | ident_scheme) &
 			    fstamp & CRYPTO_FLAG_MASK;
 			if (crypto_flags & CRYPTO_FLAG_PRIV) {
 				if (!(fstamp & CRYPTO_FLAG_PRIV)) {
 					rval = XEVNT_KEY;
 					break;
 
 				} else {
 					fstamp |= CRYPTO_FLAG_VALID |
 					    CRYPTO_FLAG_VRFY |
 					    CRYPTO_FLAG_SIGN;
 				}
 			/*
 			 * In symmetric modes it is an error if either
 			 * peer requests identity and the other peer
 			 * does not support it.
 			 */
 			} else if ((hismode == MODE_ACTIVE || hismode ==
 			    MODE_PASSIVE) && ((crypto_flags | fstamp) &
 			    CRYPTO_FLAG_MASK) && !temp32) {
 				rval = XEVNT_KEY;
 				break;
 			/*
 			 * It is an error if the client requests
 			 * identity and the server does not support it.
 			 */
 			} else if (hismode == MODE_CLIENT && (fstamp &
 			    CRYPTO_FLAG_MASK) && !temp32) {
 				rval = XEVNT_KEY;
 				break;
 			}
 
 			/*
 			 * Otherwise, the identity scheme(s) are those
 			 * that both client and server support.
 			 */
 			fstamp = temp32 | (fstamp & ~CRYPTO_FLAG_MASK);
 
 			/*
 			 * Discard the message if the signature digest
 			 * NID is not supported.
 			 */
 			temp32 = (fstamp >> 16) & 0xffff;
 			dp =
 			    (const EVP_MD *)EVP_get_digestbynid(temp32);
 			if (dp == NULL) {
 				rval = XEVNT_MD;
 				break;
 			}
 
 			/*
 			 * Save status word, host name and message
 			 * digest/signature type.
 			 */
 			peer->crypto = fstamp;
 			peer->digest = dp;
 			peer->subject = emalloc(vallen + 1);
 			memcpy(peer->subject, ep->pkt, vallen);
 			peer->subject[vallen] = '\0';
 			peer->issuer = emalloc(vallen + 1);
 			strcpy(peer->issuer, peer->subject);
 			temp32 = (fstamp >> 16) & 0xffff;
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "flags 0x%x host %s signature %s", fstamp,
 			    peer->subject, OBJ_nid2ln(temp32));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Decode X509 certificate in ASN.1 format and extract
 		 * the data containing, among other things, subject
 		 * name and public key. In the default identification
 		 * scheme, the certificate trail is followed to a self
 		 * signed trusted certificate.
 		 */
 		case CRYPTO_CERT | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid.
 			 */
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * Scan the certificate list to delete old
 			 * versions and link the newest version first on
 			 * the list.
 			 */
 			if ((rval = cert_install(ep, peer)) != XEVNT_OK)
 				break;
 
 			/*
 			 * If we snatch the certificate before the
 			 * server certificate has been signed by its
 			 * server, it will be self signed. When it is,
 			 * we chase the certificate issuer, which the
 			 * server has, and keep going until a self
 			 * signed trusted certificate is found. Be sure
 			 * to update the issuer field, since it may
 			 * change.
 			 */
 			if (peer->issuer != NULL)
 				free(peer->issuer);
 			peer->issuer = emalloc(strlen(cinfo->issuer) +
 			    1);
 			strcpy(peer->issuer, cinfo->issuer);
 
 			/*
 			 * We plug in the public key and lifetime from
 			 * the first certificate received. However, note
 			 * that this certificate might not be signed by
 			 * the server, so we can't check the
 			 * signature/digest NID.
 			 */
 			if (peer->pkey == NULL) {
 				ptr = (u_char *)cinfo->cert.ptr;
 				cert = d2i_X509(NULL, &ptr,
 				    ntohl(cinfo->cert.vallen));
 				peer->pkey = X509_get_pubkey(cert);
 				X509_free(cert);
 			}
 			peer->flash &= ~TEST8;
 			temp32 = cinfo->nid;
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "cert %s 0x%x %s (%u) fs %u",
 			    cinfo->subject, cinfo->flags,
 			    OBJ_nid2ln(temp32), temp32,
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Schnorr (IFF)identity scheme. This scheme is designed
 		 * for use with shared secret group keys and where the
 		 * certificate may be generated by a third party. The
 		 * client sends a challenge to the server, which
 		 * performs a calculation and returns the result. A
 		 * positive result is possible only if both client and
 		 * server contain the same secret group key.
 		 */
 		case CRYPTO_IFF | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or certificate
 			 * trail not trusted.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VALID)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * If the the challenge matches the response,
 			 * the certificate public key, as well as the
 			 * server public key, signatyre and identity are
 			 * all verified at the same time. The server is
 			 * declared trusted, so we skip further
 			 * certificate stages and move immediately to
 			 * the cookie stage.
 			 */
 			if ((rval = crypto_iff(ep, peer)) != XEVNT_OK)
 				break;
 
 			peer->crypto |= CRYPTO_FLAG_VRFY |
 			    CRYPTO_FLAG_PROV;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN, "iff fs %u",
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Guillou-Quisquater (GQ) identity scheme. This scheme
 		 * is designed for use with public certificates carrying
 		 * the GQ public key in an extension field. The client
 		 * sends a challenge to the server, which performs a
 		 * calculation and returns the result. A positive result
 		 * is possible only if both client and server contain
 		 * the same group key and the server has the matching GQ
 		 * private key.
 		 */
 		case CRYPTO_GQ | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or certificate
 			 * trail not trusted.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VALID)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * If the the challenge matches the response,
 			 * the certificate public key, as well as the
 			 * server public key, signatyre and identity are
 			 * all verified at the same time. The server is
 			 * declared trusted, so we skip further
 			 * certificate stages and move immediately to
 			 * the cookie stage.
 			 */
 			if ((rval = crypto_gq(ep, peer)) != XEVNT_OK)
 				break;
 
 			peer->crypto |= CRYPTO_FLAG_VRFY |
 			    CRYPTO_FLAG_PROV;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN, "gq fs %u",
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * MV
 		 */
 		case CRYPTO_MV | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or certificate
 			 * trail not trusted.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VALID)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * If the the challenge matches the response,
 			 * the certificate public key, as well as the
 			 * server public key, signatyre and identity are
 			 * all verified at the same time. The server is
 			 * declared trusted, so we skip further
 			 * certificate stages and move immediately to
 			 * the cookie stage.
 			 */
 			if ((rval = crypto_mv(ep, peer)) != XEVNT_OK)
 				break;
 
 			peer->crypto |= CRYPTO_FLAG_VRFY |
 			    CRYPTO_FLAG_PROV;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN, "mv fs %u",
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Cookie request in symmetric modes. Roll a random
 		 * cookie and install in symmetric mode. Encrypt for the
 		 * response, which is transmitted later.
 		 */
 		case CRYPTO_COOK:
 
 			/*
 			 * Discard the message if invalid or certificate
 			 * trail not trusted.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VALID)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * Pass the extension field to the transmit
 			 * side. If already agreed, walk away.
 			 */
 			fp = emalloc(len);
 			memcpy(fp, ep, len);
 			temp32 = CRYPTO_RESP;
 			fp->opcode |= htonl(temp32);
 			peer->cmmd = fp;
 			if (peer->crypto & CRYPTO_FLAG_AGREE) {
 				peer->flash &= ~TEST8;
 				break;
 			}
 
 			/*
 			 * Install cookie values and light the cookie
 			 * bit. The transmit side will pick up and
 			 * encrypt it for the response.
 			 */
 			key_expire(peer);
 			peer->cookval.tstamp = ep->tstamp;
 			peer->cookval.fstamp = ep->fstamp;
 			RAND_bytes((u_char *)&peer->pcookie, 4);
 			peer->crypto &= ~CRYPTO_FLAG_AUTO;
 			peer->crypto |= CRYPTO_FLAG_AGREE;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN, "cook %x ts %u fs %u",
 			    peer->pcookie, ntohl(ep->tstamp),
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Cookie response in client and symmetric modes. If the
 		 * cookie bit is set, the working cookie is the EXOR of
 		 * the current and new values.
 		 */
 		case CRYPTO_COOK | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or identity
 			 * not confirmed or signature not verified with
 			 * respect to the cookie values.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VRFY)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, &peer->cookval,
 			    peer)) != XEVNT_OK)
 				break;
 
 			/*
 			 * Decrypt the cookie, hunting all the time for
 			 * errors.
 			 */
 			if (vallen == (u_int) EVP_PKEY_size(host_pkey)) {
 				u_int32 *cookiebuf = malloc(
 					RSA_size(host_pkey->pkey.rsa));
 				if (cookiebuf == NULL) {
 					rval = XEVNT_CKY;
 					break;
 				}
 				if (RSA_private_decrypt(vallen,
 				    (u_char *)ep->pkt,
 				    (u_char *)cookiebuf,
 				    host_pkey->pkey.rsa,
 				    RSA_PKCS1_OAEP_PADDING) != 4) {
 					rval = XEVNT_CKY;
 					free(cookiebuf);
 					break;
 				} else {
 					cookie = ntohl(*cookiebuf);
 					free(cookiebuf);
 				}
 			} else {
 				rval = XEVNT_CKY;
 				break;
 			}
 
 			/*
 			 * Install cookie values and light the cookie
 			 * bit. If this is not broadcast client mode, we
 			 * are done here.
 			 */
 			key_expire(peer);
 			peer->cookval.tstamp = ep->tstamp;
 			peer->cookval.fstamp = ep->fstamp;
 			if (peer->crypto & CRYPTO_FLAG_AGREE)
 				peer->pcookie ^= cookie;
 			else
 				peer->pcookie = cookie;
 			if (peer->hmode == MODE_CLIENT &&
 			    !(peer->cast_flags & MDF_BCLNT))
 				peer->crypto |= CRYPTO_FLAG_AUTO;
 			else
 				peer->crypto &= ~CRYPTO_FLAG_AUTO;
 			peer->crypto |= CRYPTO_FLAG_AGREE;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN, "cook %x ts %u fs %u",
 			    peer->pcookie, ntohl(ep->tstamp),
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Install autokey values in broadcast client and
 		 * symmetric modes. We have to do this every time the
 		 * sever/peer cookie changes or a new keylist is
 		 * rolled. Ordinarily, this is automatic as this message
 		 * is piggybacked on the first NTP packet sent upon
 		 * either of these events. Note that a broadcast client
 		 * or symmetric peer can receive this response without a
 		 * matching request.
 		 */
 		case CRYPTO_AUTO | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or identity
 			 * not confirmed or signature not verified with
 			 * respect to the receive autokey values.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_VRFY)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, &peer->recval,
 			    peer)) != XEVNT_OK)
 				break;
 
 			/*
 			 * Install autokey values and light the
 			 * autokey bit. This is not hard.
 			 */
 			if (peer->recval.ptr == NULL)
 				peer->recval.ptr =
 				    emalloc(sizeof(struct autokey));
 			bp = (struct autokey *)peer->recval.ptr;
 			peer->recval.tstamp = ep->tstamp;
 			peer->recval.fstamp = ep->fstamp;
 			ap = (struct autokey *)ep->pkt;
 			bp->seq = ntohl(ap->seq);
 			bp->key = ntohl(ap->key);
 			peer->pkeyid = bp->key;
 			peer->crypto |= CRYPTO_FLAG_AUTO;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "auto seq %d key %x ts %u fs %u", bp->seq,
 			    bp->key, ntohl(ep->tstamp),
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 	
 		/*
 		 * X509 certificate sign response. Validate the
 		 * certificate signed by the server and install. Later
 		 * this can be provided to clients of this server in
 		 * lieu of the self signed certificate in order to
 		 * validate the public key.
 		 */
 		case CRYPTO_SIGN | CRYPTO_RESP:
 
 			/*
 			 * Discard the message if invalid or not
 			 * proventic.
 			 */
 			if (!(peer->crypto & CRYPTO_FLAG_PROV)) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * Scan the certificate list to delete old
 			 * versions and link the newest version first on
 			 * the list.
 			 */
 			if ((rval = cert_install(ep, peer)) != XEVNT_OK)
 				break;
 
 			peer->crypto |= CRYPTO_FLAG_SIGN;
 			peer->flash &= ~TEST8;
 			temp32 = cinfo->nid;
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "sign %s 0x%x %s (%u) fs %u",
 			    cinfo->issuer, cinfo->flags,
 			    OBJ_nid2ln(temp32), temp32,
 			    ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * Install leapseconds table in symmetric modes. This
 		 * table is proventicated to the NIST primary servers,
 		 * either by copying the file containing the table from
 		 * a NIST server to a trusted server or directly using
 		 * this protocol. While the entire table is installed at
 		 * the server, presently only the current TAI offset is
 		 * provided via the kernel to other applications.
 		 */
 		case CRYPTO_TAI:
 
 			/*
 			 * Discard the message if invalid.
 			 */
 			if ((rval = crypto_verify(ep, NULL, peer)) !=
 			    XEVNT_OK)
 				break;
 
 			/*
 			 * Pass the extension field to the transmit
 			 * side. Continue below if a leapseconds table
 			 * accompanies the message.
 			 */
 			fp = emalloc(len);
 			memcpy(fp, ep, len);
 			temp32 = CRYPTO_RESP;
 			fp->opcode |= htonl(temp32);
 			peer->cmmd = fp;
 			if (len <= VALUE_LEN) {
 				peer->flash &= ~TEST8;
 				break;
 			}
 			/* fall through */
 
 		case CRYPTO_TAI | CRYPTO_RESP:
 
 			/*
 			 * If this is a response, discard the message if
 			 * signature not verified with respect to the
 			 * leapsecond table values.
 			 */
 			if (peer->cmmd == NULL) {
 				if ((rval = crypto_verify(ep,
 				    &peer->tai_leap, peer)) != XEVNT_OK)
 					break;
 			}
 
 			/*
 			 * Initialize peer variables with latest update.
 			 */
 			peer->tai_leap.tstamp = ep->tstamp;
 			peer->tai_leap.fstamp = ep->fstamp;
 			peer->tai_leap.vallen = ep->vallen;
 
 			/*
 			 * Install the new table if there is no stored
 			 * table or the new table is more recent than
 			 * the stored table. Since a filestamp may have
 			 * changed, recompute the signatures.
 			 */
 			if (ntohl(peer->tai_leap.fstamp) >
 			    ntohl(tai_leap.fstamp)) {
 				tai_leap.fstamp = ep->fstamp;
 				tai_leap.vallen = ep->vallen;
 				if (tai_leap.ptr != NULL)
 					free(tai_leap.ptr);
 				tai_leap.ptr = emalloc(vallen);
 				memcpy(tai_leap.ptr, ep->pkt, vallen);
 				crypto_update();
 			}
 			crypto_flags |= CRYPTO_FLAG_TAI;
 			peer->crypto |= CRYPTO_FLAG_LEAP;
 			peer->flash &= ~TEST8;
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "leap %u ts %u fs %u", vallen,
 			    ntohl(ep->tstamp), ntohl(ep->fstamp));
 			record_crypto_stats(&peer->srcadr, statstr);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		/*
 		 * We come here in symmetric modes for miscellaneous
 		 * commands that have value fields but are processed on
 		 * the transmit side. All we need do here is check for
 		 * valid field length. Remaining checks are below and on
 		 * the transmit side.
 		 */
 		case CRYPTO_CERT:
 		case CRYPTO_IFF:
 		case CRYPTO_GQ:
 		case CRYPTO_MV:
 		case CRYPTO_SIGN:
 			if (len < VALUE_LEN) {
 				rval = XEVNT_LEN;
 				break;
 			}
 			/* fall through */
 
 		/*
 		 * We come here for miscellaneous requests and unknown
 		 * requests and responses. If an unknown response or
 		 * error, forget it. If a request, save the extension
 		 * field for later. Unknown requests will be caught on
 		 * the transmit side.
 		 */
 		default:
 			if (code & (CRYPTO_RESP | CRYPTO_ERROR)) {
 				rval = XEVNT_ERR;
 			} else if ((rval = crypto_verify(ep, NULL,
 			    peer)) == XEVNT_OK) {
 				fp = emalloc(len);
 				memcpy(fp, ep, len);
 				temp32 = CRYPTO_RESP;
 				fp->opcode |= htonl(temp32);
 				peer->cmmd = fp;
 			}
 		}
 
 		/*
 		 * We don't log length/format/timestamp errors and
 		 * duplicates, which are log clogging vulnerabilities.
 		 * The first error found terminates the extension field
 		 * scan and we return the laundry to the caller. A
 		 * length/format/timestamp error on transmit is
 		 * cheerfully ignored, as the message is not sent.
 		 */
 		if (rval > XEVNT_TSP) {
 			snprintf(statstr, NTP_MAXSTRLEN,
 			    "error %x opcode %x ts %u fs %u", rval,
 			    code, tstamp, fstamp);
 			record_crypto_stats(&peer->srcadr, statstr);
 			report_event(rval, peer);
 #ifdef DEBUG
 			if (debug)
 				printf("crypto_recv: %s\n", statstr);
 #endif
 			break;
 
 		} else if (rval > XEVNT_OK && (code & CRYPTO_RESP)) {
 			rval = XEVNT_OK;
 		}
 		authlen += len;
 	}
 	return (rval);
 }
 
 
 /*
  * crypto_xmit - construct extension fields
  *
  * This routine is called both when an association is configured and
  * when one is not. The only case where this matters is to retrieve the
  * autokey information, in which case the caller has to provide the
  * association ID to match the association.
  *
  * Returns length of extension field.
  */
 int
 crypto_xmit(
 	struct pkt *xpkt,	/* transmit packet pointer */
 	struct sockaddr_storage *srcadr_sin,	/* active runway */
 	int	start,		/* offset to extension field */
 	struct exten *ep,	/* extension pointer */
 	keyid_t cookie		/* session cookie */
 	)
 {
 	u_int32	*pkt;		/* packet pointer */
 	struct peer *peer;	/* peer structure pointer */
 	u_int	opcode;		/* extension field opcode */
 	struct exten *fp;	/* extension pointers */
 	struct cert_info *cp, *xp; /* certificate info/value pointer */
 	char	certname[MAXHOSTNAME + 1]; /* subject name buffer */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	tstamp_t tstamp;
 	u_int	vallen;
 	u_int	len;
 	struct value vtemp;
 	associd_t associd;
 	int	rval;
 	keyid_t tcookie;
 
 	/*
 	 * Generate the requested extension field request code, length
 	 * and association ID. If this is a response and the host is not
 	 * synchronized, light the error bit and go home.
 	 */
 	pkt = (u_int32 *)xpkt + start / 4;
 	fp = (struct exten *)pkt;
 	opcode = ntohl(ep->opcode);
 	associd = (associd_t) ntohl(ep->associd);
 	fp->associd = htonl(associd);
 	len = 8;
 	rval = XEVNT_OK;
 	tstamp = crypto_time();
 	switch (opcode & 0xffff0000) {
 
 	/*
 	 * Send association request and response with status word and
 	 * host name. Note, this message is not signed and the filestamp
 	 * contains only the status word.
 	 */
 	case CRYPTO_ASSOC | CRYPTO_RESP:
 		len += crypto_send(fp, &hostval);
 		fp->fstamp = htonl(crypto_flags);
 		break;
 
 	case CRYPTO_ASSOC:
 		len += crypto_send(fp, &hostval);
 		fp->fstamp = htonl(crypto_flags | ident_scheme);
 		break;
 
 	/*
 	 * Send certificate request. Use the values from the extension
 	 * field.
 	 */
 	case CRYPTO_CERT:
 		memset(&vtemp, 0, sizeof(vtemp));
 		vtemp.tstamp = ep->tstamp;
 		vtemp.fstamp = ep->fstamp;
 		vtemp.vallen = ep->vallen;
 		vtemp.ptr = (u_char *)ep->pkt;
 		len += crypto_send(fp, &vtemp);
 		break;
 
 	/*
 	 * Send certificate response or sign request. Use the values
 	 * from the certificate cache. If the request contains no
 	 * subject name, assume the name of this host. This is for
 	 * backwards compatibility. Private certificates are never sent.
 	 */
 	case CRYPTO_SIGN:
 	case CRYPTO_CERT | CRYPTO_RESP:
 		vallen = ntohl(ep->vallen);
 		if (vallen == 8) {
 			strcpy(certname, sys_hostname);
-		} else if (vallen == 0 || vallen > MAXHOSTNAME) {
+		} else if (vallen == 0 || vallen > MAXHOSTNAME ||
+		    len - VALUE_LEN < vallen) {
 			rval = XEVNT_LEN;
 			break;
 
 		} else {
 			memcpy(certname, ep->pkt, vallen);
 			certname[vallen] = '\0';
 		}
 
 		/*
 		 * Find all certificates with matching subject. If a
 		 * self-signed, trusted certificate is found, use that.
 		 * If not, use the first one with matching subject. A
 		 * private certificate is never divulged or signed.
 		 */
 		xp = NULL;
 		for (cp = cinfo; cp != NULL; cp = cp->link) {
 			if (cp->flags & CERT_PRIV)
 				continue;
 
 			if (strcmp(certname, cp->subject) == 0) {
 				if (xp == NULL)
 					xp = cp;
 				if (strcmp(certname, cp->issuer) ==
 				    0 && cp->flags & CERT_TRUST) {
 					xp = cp;
 					break;
 				}
 			}
 		}
 
 		/*
 		 * Be careful who you trust. If not yet synchronized,
 		 * give back an empty response. If certificate not found
 		 * or beyond the lifetime, return an error. This is to
 		 * avoid a bad dude trying to get an expired certificate
 		 * re-signed. Otherwise, send it.
 		 *
 		 * Note the timestamp and filestamp are taken from the
 		 * certificate value structure. For all certificates the
 		 * timestamp is the latest signature update time. For
 		 * host and imported certificates the filestamp is the
 		 * creation epoch. For signed certificates the filestamp
 		 * is the creation epoch of the trusted certificate at
 		 * the base of the certificate trail. In principle, this
 		 * allows strong checking for signature masquerade.
 		 */
 		if (tstamp == 0)
 			break;
 
 		if (xp == NULL)
 			rval = XEVNT_CRT;
 		else if (tstamp < xp->first || tstamp > xp->last)
 			rval = XEVNT_SRV;
 		else
 			len += crypto_send(fp, &xp->cert);
 		break;
 
 	/*
 	 * Send challenge in Schnorr (IFF) identity scheme.
 	 */
 	case CRYPTO_IFF:
 		if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) {
 			rval = XEVNT_ERR;
 			break;
 		}
 		if ((rval = crypto_alice(peer, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send response in Schnorr (IFF) identity scheme.
 	 */
 	case CRYPTO_IFF | CRYPTO_RESP:
 		if ((rval = crypto_bob(ep, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send challenge in Guillou-Quisquater (GQ) identity scheme.
 	 */
 	case CRYPTO_GQ:
 		if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) {
 			rval = XEVNT_ERR;
 			break;
 		}
 		if ((rval = crypto_alice2(peer, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send response in Guillou-Quisquater (GQ) identity scheme.
 	 */
 	case CRYPTO_GQ | CRYPTO_RESP:
 		if ((rval = crypto_bob2(ep, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send challenge in MV identity scheme.
 	 */
 	case CRYPTO_MV:
 		if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) {
 			rval = XEVNT_ERR;
 			break;
 		}
 		if ((rval = crypto_alice3(peer, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send response in MV identity scheme.
 	 */
 	case CRYPTO_MV | CRYPTO_RESP:
 		if ((rval = crypto_bob3(ep, &vtemp)) == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
 			value_free(&vtemp);
 		}
 		break;
 
 	/*
 	 * Send certificate sign response. The integrity of the request
 	 * certificate has already been verified on the receive side.
 	 * Sign the response using the local server key. Use the
 	 * filestamp from the request and use the timestamp as the
 	 * current time. Light the error bit if the certificate is
 	 * invalid or contains an unverified signature.
 	 */
 	case CRYPTO_SIGN | CRYPTO_RESP:
 		if ((rval = cert_sign(ep, &vtemp)) == XEVNT_OK)
 			len += crypto_send(fp, &vtemp);
 		value_free(&vtemp);
 		break;
 
 	/*
 	 * Send public key and signature. Use the values from the public
 	 * key.
 	 */
 	case CRYPTO_COOK:
 		len += crypto_send(fp, &pubkey);
 		break;
 
 	/*
 	 * Encrypt and send cookie and signature. Light the error bit if
 	 * anything goes wrong.
 	 */
 	case CRYPTO_COOK | CRYPTO_RESP:
-		if ((opcode & 0xffff) < VALUE_LEN) {
+		vallen = ntohl(ep->vallen);	/* Must be <64k */
+		if (   vallen == 0
+		    || (vallen >= MAX_VALLEN)
+		    || (opcode & 0x0000ffff)  < VALUE_LEN + vallen) {
 			rval = XEVNT_LEN;
 			break;
 		}
 		if (PKT_MODE(xpkt->li_vn_mode) == MODE_SERVER) {
 			tcookie = cookie;
 		} else {
 			if ((peer = findpeerbyassoc(associd)) == NULL) {
 				rval = XEVNT_ERR;
 				break;
 			}
 			tcookie = peer->pcookie;
 		}
-		if ((rval = crypto_encrypt(ep, &vtemp, &tcookie)) ==
-		    XEVNT_OK)
+		if ((rval = crypto_encrypt((const u_char *)ep->pkt, vallen, &tcookie, &vtemp))
+		    == XEVNT_OK) {
 			len += crypto_send(fp, &vtemp);
-		value_free(&vtemp);
+			value_free(&vtemp);
+		}
 		break;
 
 	/*
 	 * Find peer and send autokey data and signature in broadcast
 	 * server and symmetric modes. Use the values in the autokey
 	 * structure. If no association is found, either the server has
 	 * restarted with new associations or some perp has replayed an
 	 * old message, in which case light the error bit.
 	 */
 	case CRYPTO_AUTO | CRYPTO_RESP:
 		if ((peer = findpeerbyassoc(associd)) == NULL) {
 			rval = XEVNT_ERR;
 			break;
 		}
 		peer->flags &= ~FLAG_ASSOC;
 		len += crypto_send(fp, &peer->sndval);
 		break;
 
 	/*
 	 * Send leapseconds table and signature. Use the values from the
 	 * tai structure. If no table has been loaded, just send an
 	 * empty request.
 	 */
 	case CRYPTO_TAI:
 	case CRYPTO_TAI | CRYPTO_RESP:
 		if (crypto_flags & CRYPTO_FLAG_TAI)
 			len += crypto_send(fp, &tai_leap);
 		break;
 
 	/*
 	 * Default - Fall through for requests; for unknown responses,
 	 * flag as error.
 	 */
 	default:
 		if (opcode & CRYPTO_RESP)
 			rval = XEVNT_ERR;
 	}
 
 	/*
 	 * In case of error, flame the log. If a request, toss the
 	 * puppy; if a response, return so the sender can flame, too.
 	 */
 	if (rval != XEVNT_OK) {
 		opcode |= CRYPTO_ERROR;
 		snprintf(statstr, NTP_MAXSTRLEN,
 		    "error %x opcode %x", rval, opcode);
 		record_crypto_stats(srcadr_sin, statstr);
 		report_event(rval, NULL);
 #ifdef DEBUG
 		if (debug)
 			printf("crypto_xmit: %s\n", statstr);
 #endif
 		if (!(opcode & CRYPTO_RESP))
 			return (0);
 	}
 
 	/*
 	 * Round up the field length to a multiple of 8 bytes and save
 	 * the request code and length.
 	 */
 	len = ((len + 7) / 8) * 8;
 	fp->opcode = htonl((opcode & 0xffff0000) | len);
 #ifdef DEBUG
 	if (debug)
 		printf(
 		    "crypto_xmit: flags 0x%x ext offset %d len %u code 0x%x assocID %d\n",
 		    crypto_flags, start, len, opcode >> 16, associd);
 #endif
 	return (len);
 }
 
 
 /*
  * crypto_verify - parse and verify the extension field and value
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_LEN	bad field format or length
  * XEVNT_TSP	bad timestamp
  * XEVNT_FSP	bad filestamp
  * XEVNT_PUB	bad or missing public key
  * XEVNT_SGL	bad signature length
  * XEVNT_SIG	signature not verified
  * XEVNT_ERR	protocol error
  */
 static int
 crypto_verify(
 	struct exten *ep,	/* extension pointer */
 	struct value *vp,	/* value pointer */
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	EVP_PKEY *pkey;		/* server public key */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp, tstamp1 = 0; /* timestamp */
 	tstamp_t fstamp, fstamp1 = 0; /* filestamp */
 	u_int	vallen;		/* value length */
 	u_int	siglen;		/* signature length */
 	u_int	opcode, len;
 	int	i;
 
 	/*
 	 * We require valid opcode and field lengths, timestamp,
 	 * filestamp, public key, digest, signature length and
 	 * signature, where relevant. Note that preliminary length
 	 * checks are done in the main loop.
 	 */
 	len = ntohl(ep->opcode) & 0x0000ffff;
 	opcode = ntohl(ep->opcode) & 0xffff0000;
 
 	/*
 	 * Check for valid operation code and protocol. The opcode must
 	 * not have the error bit set. If a response, it must have a
 	 * value header. If a request and does not contain a value
 	 * header, no need for further checking.
 	 */
 	if (opcode & CRYPTO_ERROR)
 		return (XEVNT_ERR);
 
  	if (opcode & CRYPTO_RESP) {
  		if (len < VALUE_LEN)
 			return (XEVNT_LEN);
 	} else {
  		if (len < VALUE_LEN)
 			return (XEVNT_OK);
 	}
 
 	/*
 	 * We have a value header. Check for valid field lengths. The
 	 * field length must be long enough to contain the value header,
 	 * value and signature. Note both the value and signature fields
 	 * are rounded up to the next word.
 	 */
 	vallen = ntohl(ep->vallen);
+	if (   vallen == 0
+	    || vallen > MAX_VALLEN)
+		return (XEVNT_LEN);
 	i = (vallen + 3) / 4;
 	siglen = ntohl(ep->pkt[i++]);
-	if (len < VALUE_LEN + ((vallen + 3) / 4) * 4 + ((siglen + 3) /
-	    4) * 4)
+	if (   siglen > MAX_VALLEN
+	    || len - VALUE_LEN < ((vallen + 3) / 4) * 4
+	    || len - VALUE_LEN - ((vallen + 3) / 4) * 4
+	      < ((siglen + 3) / 4) * 4)
 		return (XEVNT_LEN);
 
 	/*
 	 * Punt if this is a response with no data. Punt if this is a
 	 * request and a previous response is pending. 
 	 */
 	if (opcode & CRYPTO_RESP) {
 		if (vallen == 0)
 			return (XEVNT_LEN);
 	} else {
 		if (peer->cmmd != NULL)
 			return (XEVNT_LEN);
 	}
 
 	/*
 	 * Check for valid timestamp and filestamp. If the timestamp is
 	 * zero, the sender is not synchronized and signatures are
 	 * disregarded. If not, the timestamp must not precede the
 	 * filestamp. The timestamp and filestamp must not precede the
 	 * corresponding values in the value structure, if present. Once
 	 * the autokey values have been installed, the timestamp must
 	 * always be later than the corresponding value in the value
 	 * structure. Duplicate timestamps are illegal once the cookie
 	 * has been validated.
 	 */
 	tstamp = ntohl(ep->tstamp);
 	fstamp = ntohl(ep->fstamp);
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < fstamp)
 		return (XEVNT_TSP);
 
 	if (vp != NULL) {
 		tstamp1 = ntohl(vp->tstamp);
 		fstamp1 = ntohl(vp->fstamp);
 		if ((tstamp < tstamp1 || (tstamp == tstamp1 &&
 		    (peer->crypto & CRYPTO_FLAG_AUTO))))
 			return (XEVNT_TSP);
 
 		if ((tstamp < fstamp1 || fstamp < fstamp1))
 			return (XEVNT_FSP);
 	}
 
 	/*
 	 * Check for valid signature length, public key and digest
 	 * algorithm.
 	 */
 	if (crypto_flags & peer->crypto & CRYPTO_FLAG_PRIV)
 		pkey = sign_pkey;
 	else
 		pkey = peer->pkey;
 	if (siglen == 0 || pkey == NULL || peer->digest == NULL)
 		return (XEVNT_OK);
 
 	if (siglen != (u_int)EVP_PKEY_size(pkey))
 		return (XEVNT_SGL);
 
 	/*
 	 * Darn, I thought we would never get here. Verify the
 	 * signature. If the identity exchange is verified, light the
 	 * proventic bit. If no client identity scheme is specified,
 	 * avoid doing the sign exchange.
 	 */
 	EVP_VerifyInit(&ctx, peer->digest);
+	/* XXX: the "+ 12" needs to be at least documented... */
 	EVP_VerifyUpdate(&ctx, (u_char *)&ep->tstamp, vallen + 12);
 	if (EVP_VerifyFinal(&ctx, (u_char *)&ep->pkt[i], siglen, pkey) <= 0)
 		return (XEVNT_SIG);
 
 	if (peer->crypto & CRYPTO_FLAG_VRFY) {
 		peer->crypto |= CRYPTO_FLAG_PROV;
 		if (!(crypto_flags & CRYPTO_FLAG_MASK))
 			peer->crypto |= CRYPTO_FLAG_SIGN;
 	}
 	return (XEVNT_OK);
 }
 
 
 /*
- * crypto_encrypt - construct encrypted cookie and signature from
- * extension field and cookie
+ * crypto_encrypt - construct vp (encrypted cookie and signature) from
+ * the public key and cookie.
  *
- * Returns
+ * Returns:
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_CKY	bad or missing cookie
  * XEVNT_PER	host certificate expired
  */
 static int
 crypto_encrypt(
-	struct exten *ep,	/* extension pointer */
-	struct value *vp,	/* value pointer */
-	keyid_t	*cookie		/* server cookie */
+	const u_char *ptr,	/* Public Key */
+	u_int	vallen,		/* Length of Public Key */
+	keyid_t	*cookie,	/* server cookie */
+	struct value *vp	/* value pointer */
 	)
 {
 	EVP_PKEY *pkey;		/* public key */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	u_int32	temp32;
-	u_int	len;
-	u_char	*ptr;
 
 	/*
 	 * Extract the public key from the request.
 	 */
-	len = ntohl(ep->vallen);
-	ptr = (u_char *)ep->pkt;
-	pkey = d2i_PublicKey(EVP_PKEY_RSA, NULL, &ptr, len);
+	pkey = d2i_PublicKey(EVP_PKEY_RSA, NULL, &ptr, vallen);
 	if (pkey == NULL) {
 		msyslog(LOG_ERR, "crypto_encrypt %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_PUB);
 	}
 
 	/*
 	 * Encrypt the cookie, encode in ASN.1 and sign.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = hostval.tstamp;
-	len = EVP_PKEY_size(pkey);
-	vp->vallen = htonl(len);
-	vp->ptr = emalloc(len);
+	vallen = EVP_PKEY_size(pkey);
+	vp->vallen = htonl(vallen);
+	vp->ptr = emalloc(vallen);
 	temp32 = htonl(*cookie);
 	if (!RSA_public_encrypt(4, (u_char *)&temp32, vp->ptr,
 	    pkey->pkey.rsa, RSA_PKCS1_OAEP_PADDING)) {
 		msyslog(LOG_ERR, "crypto_encrypt %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		EVP_PKEY_free(pkey);
 		return (XEVNT_CKY);
 	}
 	EVP_PKEY_free(pkey);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
-	EVP_SignUpdate(&ctx, vp->ptr, len);
-	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
-		vp->siglen = htonl(len);
+	EVP_SignUpdate(&ctx, vp->ptr, vallen);
+	if (EVP_SignFinal(&ctx, vp->sig, &vallen, sign_pkey))
+		vp->siglen = htonl(sign_siglen);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_ident - construct extension field for identity scheme
  *
  * This routine determines which identity scheme is in use and
  * constructs an extension field for that scheme.
  */
 u_int
 crypto_ident(
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	char	filename[MAXFILENAME + 1];
 
 	/*
 	 * If the server identity has already been verified, no further
 	 * action is necessary. Otherwise, try to load the identity file
 	 * of the certificate issuer. If the issuer file is not found,
 	 * try the host file. If nothing found, declare a cryptobust.
 	 * Note we can't get here unless the trusted certificate has
 	 * been found and the CRYPTO_FLAG_VALID bit is set, so the
 	 * certificate issuer is valid.
 	 */
 	if (peer->ident_pkey != NULL)
 		EVP_PKEY_free(peer->ident_pkey);
 	if (peer->crypto & CRYPTO_FLAG_GQ) {
 		snprintf(filename, MAXFILENAME, "ntpkey_gq_%s",
 		    peer->issuer);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_GQ);
 
 		snprintf(filename, MAXFILENAME, "ntpkey_gq_%s",
 		    sys_hostname);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_GQ);
 	}
 	if (peer->crypto & CRYPTO_FLAG_IFF) {
 		snprintf(filename, MAXFILENAME, "ntpkey_iff_%s",
 		    peer->issuer);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_IFF);
 
 		snprintf(filename, MAXFILENAME, "ntpkey_iff_%s",
 		    sys_hostname);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_IFF);
 	}
 	if (peer->crypto & CRYPTO_FLAG_MV) {
 		snprintf(filename, MAXFILENAME, "ntpkey_mv_%s",
 		    peer->issuer);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_MV);
 
 		snprintf(filename, MAXFILENAME, "ntpkey_mv_%s",
 		    sys_hostname);
 		peer->ident_pkey = crypto_key(filename, &peer->fstamp);
 		if (peer->ident_pkey != NULL)
 			return (CRYPTO_MV);
 	}
 
 	/*
 	 * No compatible identity scheme is available. Life is hard.
 	 */
 	msyslog(LOG_INFO,
 	    "crypto_ident: no compatible identity scheme found");
 	return (0);
 }
 
 
 /*
  * crypto_args - construct extension field from arguments
  *
  * This routine creates an extension field with current timestamps and
  * specified opcode, association ID and optional string. Note that the
  * extension field is created here, but freed after the crypto_xmit()
  * call in the protocol module.
  *
  * Returns extension field pointer (no errors).
+ *
+ * XXX: opcode and len should really be 32-bit quantities and
+ * we should make sure that str is not too big.
  */
 struct exten *
 crypto_args(
 	struct peer *peer,	/* peer structure pointer */
 	u_int	opcode,		/* operation code */
 	char	*str		/* argument string */
 	)
 {
 	tstamp_t tstamp;	/* NTP timestamp */
 	struct exten *ep;	/* extension field pointer */
 	u_int	len;		/* extension field length */
+	size_t	slen;
 
 	tstamp = crypto_time();
 	len = sizeof(struct exten);
-	if (str != NULL)
-		len += strlen(str);
+	if (str != NULL) {
+		slen = strlen(str);
+		len += slen;
+	}
 	ep = emalloc(len);
 	memset(ep, 0, len);
 	if (opcode == 0)
 		return (ep);
 
 	ep->opcode = htonl(opcode + len);
 
 	/*
 	 * If a response, send our ID; if a request, send the
 	 * responder's ID.
 	 */
 	if (opcode & CRYPTO_RESP)
 		ep->associd = htonl(peer->associd);
 	else
 		ep->associd = htonl(peer->assoc);
 	ep->tstamp = htonl(tstamp);
 	ep->fstamp = hostval.tstamp;
 	ep->vallen = 0;
 	if (str != NULL) {
-		ep->vallen = htonl(strlen(str));
-		memcpy((char *)ep->pkt, str, strlen(str));
+		ep->vallen = htonl(slen);
+		memcpy((char *)ep->pkt, str, slen);
 	} else {
 		ep->pkt[0] = peer->associd;
 	}
 	return (ep);
 }
 
 
 /*
  * crypto_send - construct extension field from value components
  *
  * Returns extension field length. Note: it is not polite to send a
  * nonempty signature with zero timestamp or a nonzero timestamp with
  * empty signature, but these rules are not enforced here.
+ *
+ * XXX This code won't work on a box with 16-bit ints.
  */
 u_int
 crypto_send(
 	struct exten *ep,	/* extension field pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	u_int	len, temp32;
 	int	i;
 
 	/*
 	 * Copy data. If the data field is empty or zero length, encode
 	 * an empty value with length zero.
 	 */
 	ep->tstamp = vp->tstamp;
 	ep->fstamp = vp->fstamp;
 	ep->vallen = vp->vallen;
 	len = 12;
 	temp32 = ntohl(vp->vallen);
 	if (temp32 > 0 && vp->ptr != NULL)
 		memcpy(ep->pkt, vp->ptr, temp32);
 
 	/*
 	 * Copy signature. If the signature field is empty or zero
 	 * length, encode an empty signature with length zero.
 	 */
 	i = (temp32 + 3) / 4;
 	len += i * 4 + 4;
 	ep->pkt[i++] = vp->siglen;
 	temp32 = ntohl(vp->siglen);
 	if (temp32 > 0 && vp->sig != NULL)
 		memcpy(&ep->pkt[i], vp->sig, temp32);
 	len += temp32;
 	return (len);
 }
 
 
 /*
  * crypto_update - compute new public value and sign extension fields
  *
  * This routine runs periodically, like once a day, and when something
  * changes. It updates the timestamps on three value structures and one
  * value structure list, then signs all the structures:
  *
  * hostval	host name (not signed)
  * pubkey	public key
  * cinfo	certificate info/value list
  * tai_leap	leapseconds file
  *
  * Filestamps are proventicated data, so this routine is run only when
  * the host has been synchronized to a proventicated source. Thus, the
  * timestamp is proventicated, too, and can be used to deflect
  * clogging attacks and even cook breakfast.
  *
  * Returns void (no errors)
  */
 void
 crypto_update(void)
 {
 	EVP_MD_CTX ctx;		/* message digest context */
 	struct cert_info *cp, *cpn; /* certificate info/value */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	tstamp_t tstamp;	/* NTP timestamp */
 	u_int	len;
 
 	if ((tstamp = crypto_time()) == 0)
 		return;
 
 	hostval.tstamp = htonl(tstamp);
 
 	/*
 	 * Sign public key and timestamps. The filestamp is derived from
 	 * the host key file extension from wherever the file was
 	 * generated. 
 	 */
 	if (pubkey.vallen != 0) {
 		pubkey.tstamp = hostval.tstamp;
 		pubkey.siglen = 0;
 		if (pubkey.sig == NULL)
 			pubkey.sig = emalloc(sign_siglen);
 		EVP_SignInit(&ctx, sign_digest);
 		EVP_SignUpdate(&ctx, (u_char *)&pubkey, 12);
 		EVP_SignUpdate(&ctx, pubkey.ptr, ntohl(pubkey.vallen));
 		if (EVP_SignFinal(&ctx, pubkey.sig, &len, sign_pkey))
 			pubkey.siglen = htonl(len);
 	}
 
 	/*
 	 * Sign certificates and timestamps. The filestamp is derived
 	 * from the certificate file extension from wherever the file
 	 * was generated. Note we do not throw expired certificates
 	 * away; they may have signed younger ones.
 	 */
 	for (cp = cinfo; cp != NULL; cp = cpn) {
 		cpn = cp->link;
 		cp->cert.tstamp = hostval.tstamp;
 		cp->cert.siglen = 0;
 		if (cp->cert.sig == NULL)
 			cp->cert.sig = emalloc(sign_siglen);
 		EVP_SignInit(&ctx, sign_digest);
 		EVP_SignUpdate(&ctx, (u_char *)&cp->cert, 12);
 		EVP_SignUpdate(&ctx, cp->cert.ptr,
 		    ntohl(cp->cert.vallen));
 		if (EVP_SignFinal(&ctx, cp->cert.sig, &len, sign_pkey))
 			cp->cert.siglen = htonl(len);
 	}
 
 	/*
 	 * Sign leapseconds table and timestamps. The filestamp is
 	 * derived from the leapsecond file extension from wherever the
 	 * file was generated.
 	 */
 	if (tai_leap.vallen != 0) {
 		tai_leap.tstamp = hostval.tstamp;
 		tai_leap.siglen = 0;
 		if (tai_leap.sig == NULL)
 			tai_leap.sig = emalloc(sign_siglen);
 		EVP_SignInit(&ctx, sign_digest);
 		EVP_SignUpdate(&ctx, (u_char *)&tai_leap, 12);
 		EVP_SignUpdate(&ctx, tai_leap.ptr,
 		    ntohl(tai_leap.vallen));
 		if (EVP_SignFinal(&ctx, tai_leap.sig, &len, sign_pkey))
 			tai_leap.siglen = htonl(len);
 	}
 	snprintf(statstr, NTP_MAXSTRLEN,
 	    "update ts %u", ntohl(hostval.tstamp)); 
 	record_crypto_stats(NULL, statstr);
 #ifdef DEBUG
 	if (debug)
 		printf("crypto_update: %s\n", statstr);
 #endif
 }
 
 
 /*
  * value_free - free value structure components.
  *
  * Returns void (no errors)
  */
 void
 value_free(
 	struct value *vp	/* value structure */
 	)
 {
 	if (vp->ptr != NULL)
 		free(vp->ptr);
 	if (vp->sig != NULL)
 		free(vp->sig);
 	memset(vp, 0, sizeof(struct value));
 }
 
 
 /*
  * crypto_time - returns current NTP time in seconds.
  */
 tstamp_t
 crypto_time()
 {
 	l_fp	tstamp;		/* NTP time */	L_CLR(&tstamp);
 
 	L_CLR(&tstamp);
 	if (sys_leap != LEAP_NOTINSYNC)
 		get_systime(&tstamp);
 	return (tstamp.l_ui);
 }
 
 
 /*
  * asn2ntp - convert ASN1_TIME time structure to NTP time in seconds.
  */
 u_long
 asn2ntp	(
 	ASN1_TIME *asn1time	/* pointer to ASN1_TIME structure */
 	)
 {
 	char	*v;		/* pointer to ASN1_TIME string */
 	struct	tm tm;		/* used to convert to NTP time */
 
 	/*
 	 * Extract time string YYMMDDHHMMSSZ from ASN1 time structure.
 	 * Note that the YY, MM, DD fields start with one, the HH, MM,
 	 * SS fiels start with zero and the Z character should be 'Z'
 	 * for UTC. Also note that years less than 50 map to years
 	 * greater than 100. Dontcha love ASN.1? Better than MIL-188.
 	 */
 	if (asn1time->length > 13)
 		return ((u_long)(~0));	/* We can't use -1 here. It's invalid */
 
 	v = (char *)asn1time->data;
 	tm.tm_year = (v[0] - '0') * 10 + v[1] - '0';
 	if (tm.tm_year < 50)
 		tm.tm_year += 100;
 	tm.tm_mon = (v[2] - '0') * 10 + v[3] - '0' - 1;
 	tm.tm_mday = (v[4] - '0') * 10 + v[5] - '0';
 	tm.tm_hour = (v[6] - '0') * 10 + v[7] - '0';
 	tm.tm_min = (v[8] - '0') * 10 + v[9] - '0';
 	tm.tm_sec = (v[10] - '0') * 10 + v[11] - '0';
 	tm.tm_wday = 0;
 	tm.tm_yday = 0;
 	tm.tm_isdst = 0;
 	return (timegm(&tm) + JAN_1970);
 }
 
 
 /*
  * bigdig() - compute a BIGNUM MD5 hash of a BIGNUM number.
  */
 static int
 bighash(
 	BIGNUM	*bn,		/* BIGNUM * from */
 	BIGNUM	*bk		/* BIGNUM * to */
 	)
 {
 	EVP_MD_CTX ctx;		/* message digest context */
 	u_char dgst[EVP_MAX_MD_SIZE]; /* message digest */
 	u_char	*ptr;		/* a BIGNUM as binary string */
 	u_int	len;
 
 	len = BN_num_bytes(bn);
 	ptr = emalloc(len);
 	BN_bn2bin(bn, ptr);
 	EVP_DigestInit(&ctx, EVP_md5());
 	EVP_DigestUpdate(&ctx, ptr, len);
 	EVP_DigestFinal(&ctx, dgst, &len);
 	BN_bin2bn(dgst, len, bk);
 
 	/* XXX MEMLEAK? free ptr? */
 
 	return (1);
 }
 
 
 /*
  ***********************************************************************
  *								       *
  * The following routines implement the Schnorr (IFF) identity scheme  *
  *								       *
  ***********************************************************************
  *
  * The Schnorr (IFF) identity scheme is intended for use when
  * the ntp-genkeys program does not generate the certificates used in
  * the protocol and the group key cannot be conveyed in the certificate
  * itself. For this purpose, new generations of IFF values must be
  * securely transmitted to all members of the group before use. The
  * scheme is self contained and independent of new generations of host
  * keys, sign keys and certificates.
  *
  * The IFF identity scheme is based on DSA cryptography and algorithms
  * described in Stinson p. 285. The IFF values hide in a DSA cuckoo
  * structure, but only the primes and generator are used. The p is a
  * 512-bit prime, q a 160-bit prime that divides p - 1 and is a qth root
  * of 1 mod p; that is, g^q = 1 mod p. The TA rolls primvate random
  * group key b disguised as a DSA structure member, then computes public
  * key g^(q - b). These values are shared only among group members and
  * never revealed in messages. Alice challenges Bob to confirm identity
  * using the protocol described below.
  *
  * How it works
  *
  * The scheme goes like this. Both Alice and Bob have the public primes
  * p, q and generator g. The TA gives private key b to Bob and public
  * key v = g^(q - a) mod p to Alice.
  *
  * Alice rolls new random challenge r and sends to Bob in the IFF
  * request message. Bob rolls new random k, then computes y = k + b r
  * mod q and x = g^k mod p and sends (y, hash(x)) to Alice in the
  * response message. Besides making the response shorter, the hash makes
  * it effectivey impossible for an intruder to solve for b by observing
  * a number of these messages.
  * 
  * Alice receives the response and computes g^y v^r mod p. After a bit
  * of algebra, this simplifies to g^k. If the hash of this result
  * matches hash(x), Alice knows that Bob has the group key b. The signed
  * response binds this knowledge to Bob's private key and the public key
  * previously received in his certificate.
  *
  * crypto_alice - construct Alice's challenge in IFF scheme
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group key
  */
 static int
 crypto_alice(
 	struct peer *peer,	/* peer pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	DSA	*dsa;		/* IFF parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;
 	u_int	len;
 
 	/*
 	 * The identity parameters must have correct format and content.
 	 */
 	if (peer->ident_pkey == NULL)
 		return (XEVNT_ID);
 
 	if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_alice: defective key");
 		return (XEVNT_PUB);
 	}
 
 	/*
 	 * Roll new random r (0 < r < q). The OpenSSL library has a bug
 	 * omitting BN_rand_range, so we have to do it the hard way.
 	 */
 	bctx = BN_CTX_new();
 	len = BN_num_bytes(dsa->q);
 	if (peer->iffval != NULL)
 		BN_free(peer->iffval);
 	peer->iffval = BN_new();
 	BN_rand(peer->iffval, len * 8, -1, 1);	/* r */
 	BN_mod(peer->iffval, peer->iffval, dsa->q, bctx);
 	BN_CTX_free(bctx);
 
 	/*
 	 * Sign and send to Bob. The filestamp is from the local file.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = htonl(peer->fstamp);
 	vp->vallen = htonl(len);
 	vp->ptr = emalloc(len);
 	BN_bn2bin(peer->iffval, vp->ptr);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_bob - construct Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_ID	bad or missing group key
  * XEVNT_ERR	protocol error
  * XEVNT_PER	host expired certificate
  */
 static int
 crypto_bob(
 	struct exten *ep,	/* extension pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	DSA	*dsa;		/* IFF parameters */
 	DSA_SIG	*sdsa;		/* DSA signature context fake */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	BIGNUM	*bn, *bk, *r;
 	u_char	*ptr;
-	u_int	len;
+	u_int	len;		/* extension field length */
+	u_int	vallen = 0;	/* value length */
 
 	/*
 	 * If the IFF parameters are not valid, something awful
 	 * happened or we are being tormented.
 	 */
 	if (iffpar_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_bob: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	dsa = iffpar_pkey->pkey.dsa;
 
 	/*
 	 * Extract r from the challenge.
 	 */
-	len = ntohl(ep->vallen);
-	if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) {
+	vallen = ntohl(ep->vallen);
+	len = ntohl(ep->opcode) & 0x0000ffff;
+	if (vallen == 0 || len < VALUE_LEN || len - VALUE_LEN < vallen)
+		return XEVNT_LEN;
+	if ((r = BN_bin2bn((u_char *)ep->pkt, vallen, NULL)) == NULL) {
 		msyslog(LOG_ERR, "crypto_bob %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Bob rolls random k (0 < k < q), computes y = k + b r mod q
 	 * and x = g^k mod p, then sends (y, hash(x)) to Alice.
 	 */
 	bctx = BN_CTX_new(); bk = BN_new(); bn = BN_new();
 	sdsa = DSA_SIG_new();
-	BN_rand(bk, len * 8, -1, 1);		/* k */
+	BN_rand(bk, vallen * 8, -1, 1);		/* k */
 	BN_mod_mul(bn, dsa->priv_key, r, dsa->q, bctx); /* b r mod q */
 	BN_add(bn, bn, bk);
 	BN_mod(bn, bn, dsa->q, bctx);		/* k + b r mod q */
 	sdsa->r = BN_dup(bn);
 	BN_mod_exp(bk, dsa->g, bk, dsa->p, bctx); /* g^k mod p */
 	bighash(bk, bk);
 	sdsa->s = BN_dup(bk);
 	BN_CTX_free(bctx);
 	BN_free(r); BN_free(bn); BN_free(bk);
 
 	/*
 	 * Encode the values in ASN.1 and sign.
 	 */
-	tstamp = crypto_time();
-	memset(vp, 0, sizeof(struct value));
-	vp->tstamp = htonl(tstamp);
-	vp->fstamp = htonl(if_fstamp);
-	len = i2d_DSA_SIG(sdsa, NULL);
-	if (len <= 0) {
+	vallen = i2d_DSA_SIG(sdsa, NULL);
+	if (vallen == 0) {
 		msyslog(LOG_ERR, "crypto_bob %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		DSA_SIG_free(sdsa);
 		return (XEVNT_ERR);
 	}
-	vp->vallen = htonl(len);
-	ptr = emalloc(len);
+	if (vallen > MAX_VALLEN) {
+		msyslog(LOG_ERR, "crypto_bob: signature is too big: %d",
+		    vallen);
+		DSA_SIG_free(sdsa);
+		return (XEVNT_LEN);
+	}
+	memset(vp, 0, sizeof(struct value));
+	tstamp = crypto_time();
+	vp->tstamp = htonl(tstamp);
+	vp->fstamp = htonl(if_fstamp);
+	vp->vallen = htonl(vallen);
+	ptr = emalloc(vallen);
 	vp->ptr = ptr;
 	i2d_DSA_SIG(sdsa, &ptr);
 	DSA_SIG_free(sdsa);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
+	/* XXX: more validation to make sure the sign fits... */
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
-	EVP_SignUpdate(&ctx, vp->ptr, len);
-	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
+	EVP_SignUpdate(&ctx, vp->ptr, vallen);
+	if (EVP_SignFinal(&ctx, vp->sig, &vallen, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_iff - verify Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group key
  * XEVNT_FSP	bad filestamp
  */
 int
 crypto_iff(
 	struct exten *ep,	/* extension pointer */
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	DSA	*dsa;		/* IFF parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	DSA_SIG	*sdsa;		/* DSA parameters */
 	BIGNUM	*bn, *bk;
 	u_int	len;
 	const u_char	*ptr;
 	int	temp;
 
 	/*
 	 * If the IFF parameters are not valid or no challenge was sent,
 	 * something awful happened or we are being tormented.
 	 */
 	if (peer->ident_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_iff: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	if (ntohl(ep->fstamp) != peer->fstamp) {
 		msyslog(LOG_INFO, "crypto_iff: invalid filestamp %u",
 		    ntohl(ep->fstamp));
 		return (XEVNT_FSP);
 	}
 	if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_iff: defective key");
 		return (XEVNT_PUB);
 	}
 	if (peer->iffval == NULL) {
 		msyslog(LOG_INFO, "crypto_iff: missing challenge");
 		return (XEVNT_ID);
 	}
 
 	/*
 	 * Extract the k + b r and g^k values from the response.
 	 */
 	bctx = BN_CTX_new(); bk = BN_new(); bn = BN_new();
 	len = ntohl(ep->vallen);
 	ptr = (const u_char *)ep->pkt;
 	if ((sdsa = d2i_DSA_SIG(NULL, &ptr, len)) == NULL) {
 		msyslog(LOG_ERR, "crypto_iff %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Compute g^(k + b r) g^(q - b)r mod p.
 	 */
 	BN_mod_exp(bn, dsa->pub_key, peer->iffval, dsa->p, bctx);
 	BN_mod_exp(bk, dsa->g, sdsa->r, dsa->p, bctx);
 	BN_mod_mul(bn, bn, bk, dsa->p, bctx);
 
 	/*
 	 * Verify the hash of the result matches hash(x).
 	 */
 	bighash(bn, bn);
 	temp = BN_cmp(bn, sdsa->s);
 	BN_free(bn); BN_free(bk); BN_CTX_free(bctx);
 	BN_free(peer->iffval);
 	peer->iffval = NULL;
 	DSA_SIG_free(sdsa);
 	if (temp == 0)
 		return (XEVNT_OK);
 
 	else
 		return (XEVNT_ID);
 }
 
 
 /*
  ***********************************************************************
  *								       *
  * The following routines implement the Guillou-Quisquater (GQ)        *
  * identity scheme                                                     *
  *								       *
  ***********************************************************************
  *
  * The Guillou-Quisquater (GQ) identity scheme is intended for use when
  * the ntp-genkeys program generates the certificates used in the
  * protocol and the group key can be conveyed in a certificate extension
  * field. The scheme is self contained and independent of new
  * generations of host keys, sign keys and certificates.
  *
  * The GQ identity scheme is based on RSA cryptography and algorithms
  * described in Stinson p. 300 (with errors). The GQ values hide in a
  * RSA cuckoo structure, but only the modulus is used. The 512-bit
  * public modulus is n = p q, where p and q are secret large primes. The
  * TA rolls random group key b disguised as a RSA structure member.
  * Except for the public key, these values are shared only among group
  * members and never revealed in messages.
  *
  * When rolling new certificates, Bob recomputes the private and
  * public keys. The private key u is a random roll, while the public key
  * is the inverse obscured by the group key v = (u^-1)^b. These values
  * replace the private and public keys normally generated by the RSA
  * scheme. Alice challenges Bob to confirm identity using the protocol
  * described below.
  *
  * How it works
  *
  * The scheme goes like this. Both Alice and Bob have the same modulus n
  * and some random b as the group key. These values are computed and
  * distributed in advance via secret means, although only the group key
  * b is truly secret. Each has a private random private key u and public
  * key (u^-1)^b, although not necessarily the same ones. Bob and Alice
  * can regenerate the key pair from time to time without affecting
  * operations. The public key is conveyed on the certificate in an
  * extension field; the private key is never revealed.
  *
  * Alice rolls new random challenge r and sends to Bob in the GQ
  * request message. Bob rolls new random k, then computes y = k u^r mod
  * n and x = k^b mod n and sends (y, hash(x)) to Alice in the response
  * message. Besides making the response shorter, the hash makes it
  * effectivey impossible for an intruder to solve for b by observing
  * a number of these messages.
  * 
  * Alice receives the response and computes y^b v^r mod n. After a bit
  * of algebra, this simplifies to k^b. If the hash of this result
  * matches hash(x), Alice knows that Bob has the group key b. The signed
  * response binds this knowledge to Bob's private key and the public key
  * previously received in his certificate.
  *
  * crypto_alice2 - construct Alice's challenge in GQ scheme
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group key
  * XEVNT_PER	host certificate expired
  */
 static int
 crypto_alice2(
 	struct peer *peer,	/* peer pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	RSA	*rsa;		/* GQ parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;
 	u_int	len;
 
 	/*
 	 * The identity parameters must have correct format and content.
 	 */
 	if (peer->ident_pkey == NULL)
 		return (XEVNT_ID);
 
 	if ((rsa = peer->ident_pkey->pkey.rsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_alice2: defective key");
 		return (XEVNT_PUB);
 	}
 
 	/*
 	 * Roll new random r (0 < r < n). The OpenSSL library has a bug
 	 * omitting BN_rand_range, so we have to do it the hard way.
 	 */
 	bctx = BN_CTX_new();
 	len = BN_num_bytes(rsa->n);
 	if (peer->iffval != NULL)
 		BN_free(peer->iffval);
 	peer->iffval = BN_new();
 	BN_rand(peer->iffval, len * 8, -1, 1);	/* r mod n */
 	BN_mod(peer->iffval, peer->iffval, rsa->n, bctx);
 	BN_CTX_free(bctx);
 
 	/*
 	 * Sign and send to Bob. The filestamp is from the local file.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = htonl(peer->fstamp);
 	vp->vallen = htonl(len);
 	vp->ptr = emalloc(len);
 	BN_bn2bin(peer->iffval, vp->ptr);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_bob2 - construct Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_ID	bad or missing group key
  * XEVNT_ERR	protocol error
  * XEVNT_PER	host certificate expired
  */
 static int
 crypto_bob2(
 	struct exten *ep,	/* extension pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	RSA	*rsa;		/* GQ parameters */
 	DSA_SIG	*sdsa;		/* DSA parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	BIGNUM	*r, *k, *g, *y;
 	u_char	*ptr;
 	u_int	len;
 
 	/*
 	 * If the GQ parameters are not valid, something awful
 	 * happened or we are being tormented.
 	 */
 	if (gqpar_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_bob2: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	rsa = gqpar_pkey->pkey.rsa;
 
 	/*
 	 * Extract r from the challenge.
 	 */
 	len = ntohl(ep->vallen);
 	if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) {
 		msyslog(LOG_ERR, "crypto_bob2 %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Bob rolls random k (0 < k < n), computes y = k u^r mod n and
 	 * x = k^b mod n, then sends (y, hash(x)) to Alice. 
 	 */
 	bctx = BN_CTX_new(); k = BN_new(); g = BN_new(); y = BN_new();
 	sdsa = DSA_SIG_new();
 	BN_rand(k, len * 8, -1, 1);		/* k */
 	BN_mod(k, k, rsa->n, bctx);
 	BN_mod_exp(y, rsa->p, r, rsa->n, bctx); /* u^r mod n */
 	BN_mod_mul(y, k, y, rsa->n, bctx);	/* k u^r mod n */
 	sdsa->r = BN_dup(y);
 	BN_mod_exp(g, k, rsa->e, rsa->n, bctx); /* k^b mod n */
 	bighash(g, g);
 	sdsa->s = BN_dup(g);
 	BN_CTX_free(bctx);
 	BN_free(r); BN_free(k); BN_free(g); BN_free(y);
  
 	/*
 	 * Encode the values in ASN.1 and sign.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = htonl(gq_fstamp);
 	len = i2d_DSA_SIG(sdsa, NULL);
 	if (len <= 0) {
 		msyslog(LOG_ERR, "crypto_bob2 %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		DSA_SIG_free(sdsa);
 		return (XEVNT_ERR);
 	}
 	vp->vallen = htonl(len);
 	ptr = emalloc(len);
 	vp->ptr = ptr;
 	i2d_DSA_SIG(sdsa, &ptr);
 	DSA_SIG_free(sdsa);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_gq - verify Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group keys
  * XEVNT_ERR	protocol error
  * XEVNT_FSP	bad filestamp
  */
 int
 crypto_gq(
 	struct exten *ep,	/* extension pointer */
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	RSA	*rsa;		/* GQ parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	DSA_SIG	*sdsa;		/* RSA signature context fake */
 	BIGNUM	*y, *v;
 	const u_char	*ptr;
 	u_int	len;
 	int	temp;
 
 	/*
 	 * If the GQ parameters are not valid or no challenge was sent,
 	 * something awful happened or we are being tormented.
 	 */
 	if (peer->ident_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_gq: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	if (ntohl(ep->fstamp) != peer->fstamp) {
 		msyslog(LOG_INFO, "crypto_gq: invalid filestamp %u",
 		    ntohl(ep->fstamp));
 		return (XEVNT_FSP);
 	}
 	if ((rsa = peer->ident_pkey->pkey.rsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_gq: defective key");
 		return (XEVNT_PUB);
 	}
 	if (peer->iffval == NULL) {
 		msyslog(LOG_INFO, "crypto_gq: missing challenge");
 		return (XEVNT_ID);
 	}
 
 	/*
 	 * Extract the y = k u^r and hash(x = k^b) values from the
 	 * response.
 	 */
 	bctx = BN_CTX_new(); y = BN_new(); v = BN_new();
 	len = ntohl(ep->vallen);
 	ptr = (const u_char *)ep->pkt;
 	if ((sdsa = d2i_DSA_SIG(NULL, &ptr, len)) == NULL) {
 		msyslog(LOG_ERR, "crypto_gq %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Compute v^r y^b mod n.
 	 */
 	BN_mod_exp(v, peer->grpkey, peer->iffval, rsa->n, bctx);
 						/* v^r mod n */
 	BN_mod_exp(y, sdsa->r, rsa->e, rsa->n, bctx); /* y^b mod n */
 	BN_mod_mul(y, v, y, rsa->n, bctx);	/* v^r y^b mod n */
 
 	/*
 	 * Verify the hash of the result matches hash(x).
 	 */
 	bighash(y, y);
 	temp = BN_cmp(y, sdsa->s);
 	BN_CTX_free(bctx); BN_free(y); BN_free(v);
 	BN_free(peer->iffval);
 	peer->iffval = NULL;
 	DSA_SIG_free(sdsa);
 	if (temp == 0)
 		return (XEVNT_OK);
 
 	else
 		return (XEVNT_ID);
 }
 
 
 /*
  ***********************************************************************
  *								       *
  * The following routines implement the Mu-Varadharajan (MV) identity  *
  * scheme                                                              *
  *								       *
  ***********************************************************************
  */
 /*
  * The Mu-Varadharajan (MV) cryptosystem was originally intended when
  * servers broadcast messages to clients, but clients never send
  * messages to servers. There is one encryption key for the server and a
  * separate decryption key for each client. It operated something like a
  * pay-per-view satellite broadcasting system where the session key is
  * encrypted by the broadcaster and the decryption keys are held in a
  * tamperproof set-top box.
  *
  * The MV parameters and private encryption key hide in a DSA cuckoo
  * structure which uses the same parameters, but generated in a
  * different way. The values are used in an encryption scheme similar to
  * El Gamal cryptography and a polynomial formed from the expansion of
  * product terms (x - x[j]), as described in Mu, Y., and V.
  * Varadharajan: Robust and Secure Broadcasting, Proc. Indocrypt 2001,
  * 223-231. The paper has significant errors and serious omissions.
  *
  * Let q be the product of n distinct primes s'[j] (j = 1...n), where
  * each s'[j] has m significant bits. Let p be a prime p = 2 * q + 1, so
  * that q and each s'[j] divide p - 1 and p has M = n * m + 1
  * significant bits. The elements x mod q of Zq with the elements 2 and
  * the primes removed form a field Zq* valid for polynomial arithetic.
  * Let g be a generator of Zp; that is, gcd(g, p - 1) = 1 and g^q = 1
  * mod p. We expect M to be in the 500-bit range and n relatively small,
  * like 25, so the likelihood of a randomly generated element of x mod q
  * of Zq colliding with a factor of p - 1 is very small and can be
  * avoided. Associated with each s'[j] is an element s[j] such that s[j]
  * s'[j] = s'[j] mod q. We find s[j] as the quotient (q + s'[j]) /
  * s'[j]. These are the parameters of the scheme and they are expensive
  * to compute.
  *
  * We set up an instance of the scheme as follows. A set of random
  * values x[j] mod q (j = 1...n), are generated as the zeros of a
  * polynomial of order n. The product terms (x - x[j]) are expanded to
  * form coefficients a[i] mod q (i = 0...n) in powers of x. These are
  * used as exponents of the generator g mod p to generate the private
  * encryption key A. The pair (gbar, ghat) of public server keys and the
  * pairs (xbar[j], xhat[j]) (j = 1...n) of private client keys are used
  * to construct the decryption keys. The devil is in the details.
  *
  * The distinguishing characteristic of this scheme is the capability to
  * revoke keys. Included in the calculation of E, gbar and ghat is the
  * product s = prod(s'[j]) (j = 1...n) above. If the factor s'[j] is
  * subsequently removed from the product and E, gbar and ghat
  * recomputed, the jth client will no longer be able to compute E^-1 and
  * thus unable to decrypt the block.
  *
  * How it works
  *
  * The scheme goes like this. Bob has the server values (p, A, q, gbar,
  * ghat) and Alice the client values (p, xbar, xhat).
  *
  * Alice rolls new random challenge r (0 < r < p) and sends to Bob in
  * the MV request message. Bob rolls new random k (0 < k < q), encrypts
  * y = A^k mod p (a permutation) and sends (hash(y), gbar^k, ghat^k) to
  * Alice.
  * 
  * Alice receives the response and computes the decryption key (the
  * inverse permutation) from previously obtained (xbar, xhat) and
  * (gbar^k, ghat^k) in the message. She computes the inverse, which is
  * unique by reasons explained in the ntp-keygen.c program sources. If
  * the hash of this result matches hash(y), Alice knows that Bob has the
  * group key b. The signed response binds this knowledge to Bob's
  * private key and the public key previously received in his
  * certificate.
  *
  * crypto_alice3 - construct Alice's challenge in MV scheme
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group key
  * XEVNT_PER	host certificate expired
  */
 static int
 crypto_alice3(
 	struct peer *peer,	/* peer pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	DSA	*dsa;		/* MV parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;
 	u_int	len;
 
 	/*
 	 * The identity parameters must have correct format and content.
 	 */
 	if (peer->ident_pkey == NULL)
 		return (XEVNT_ID);
 
 	if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_alice3: defective key");
 		return (XEVNT_PUB);
 	}
 
 	/*
 	 * Roll new random r (0 < r < q). The OpenSSL library has a bug
 	 * omitting BN_rand_range, so we have to do it the hard way.
 	 */
 	bctx = BN_CTX_new();
 	len = BN_num_bytes(dsa->p);
 	if (peer->iffval != NULL)
 		BN_free(peer->iffval);
 	peer->iffval = BN_new();
 	BN_rand(peer->iffval, len * 8, -1, 1);	/* r */
 	BN_mod(peer->iffval, peer->iffval, dsa->p, bctx);
 	BN_CTX_free(bctx);
 
 	/*
 	 * Sign and send to Bob. The filestamp is from the local file.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = htonl(peer->fstamp);
 	vp->vallen = htonl(len);
 	vp->ptr = emalloc(len);
 	BN_bn2bin(peer->iffval, vp->ptr);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_bob3 - construct Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_ERR	protocol error
  * XEVNT_PER	host certificate expired
  */
 static int
 crypto_bob3(
 	struct exten *ep,	/* extension pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	DSA	*dsa;		/* MV parameters */
 	DSA	*sdsa;		/* DSA signature context fake */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	EVP_MD_CTX ctx;		/* signature context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	BIGNUM	*r, *k, *u;
 	u_char	*ptr;
 	u_int	len;
 
 	/*
 	 * If the MV parameters are not valid, something awful
 	 * happened or we are being tormented.
 	 */
 	if (mvpar_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_bob3: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	dsa = mvpar_pkey->pkey.dsa;
 
 	/*
 	 * Extract r from the challenge.
 	 */
 	len = ntohl(ep->vallen);
 	if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) {
 		msyslog(LOG_ERR, "crypto_bob3 %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Bob rolls random k (0 < k < q), making sure it is not a
 	 * factor of q. He then computes y = A^k r and sends (hash(y),
 	 * gbar^k, ghat^k) to Alice.
 	 */
 	bctx = BN_CTX_new(); k = BN_new(); u = BN_new();
 	sdsa = DSA_new();
 	sdsa->p = BN_new(); sdsa->q = BN_new(); sdsa->g = BN_new();
 	while (1) {
 		BN_rand(k, BN_num_bits(dsa->q), 0, 0);
 		BN_mod(k, k, dsa->q, bctx);
 		BN_gcd(u, k, dsa->q, bctx);
 		if (BN_is_one(u))
 			break;
 	}
 	BN_mod_exp(u, dsa->g, k, dsa->p, bctx); /* A r */
 	BN_mod_mul(u, u, r, dsa->p, bctx);
 	bighash(u, sdsa->p);
 	BN_mod_exp(sdsa->q, dsa->priv_key, k, dsa->p, bctx); /* gbar */
 	BN_mod_exp(sdsa->g, dsa->pub_key, k, dsa->p, bctx); /* ghat */
 	BN_CTX_free(bctx); BN_free(k); BN_free(r); BN_free(u);
 
 	/*
 	 * Encode the values in ASN.1 and sign.
 	 */
 	tstamp = crypto_time();
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = htonl(mv_fstamp);
 	len = i2d_DSAparams(sdsa, NULL);
 	if (len <= 0) {
 		msyslog(LOG_ERR, "crypto_bob3 %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		DSA_free(sdsa);
 		return (XEVNT_ERR);
 	}
 	vp->vallen = htonl(len);
 	ptr = emalloc(len);
 	vp->ptr = ptr;
 	i2d_DSAparams(sdsa, &ptr);
 	DSA_free(sdsa);
 	vp->siglen = 0;
 	if (tstamp == 0)
 		return (XEVNT_OK);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 	return (XEVNT_OK);
 }
 
 
 /*
  * crypto_mv - verify Bob's response to Alice's challenge
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_ID	bad or missing group key
  * XEVNT_ERR	protocol error
  * XEVNT_FSP	bad filestamp
  */
 int
 crypto_mv(
 	struct exten *ep,	/* extension pointer */
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	DSA	*dsa;		/* MV parameters */
 	DSA	*sdsa;		/* DSA parameters */
 	BN_CTX	*bctx;		/* BIGNUM context */
 	BIGNUM	*k, *u, *v;
 	u_int	len;
 	const u_char	*ptr;
 	int	temp;
 
 	/*
 	 * If the MV parameters are not valid or no challenge was sent,
 	 * something awful happened or we are being tormented.
 	 */
 	if (peer->ident_pkey == NULL) {
 		msyslog(LOG_INFO, "crypto_mv: scheme unavailable");
 		return (XEVNT_ID);
 	}
 	if (ntohl(ep->fstamp) != peer->fstamp) {
 		msyslog(LOG_INFO, "crypto_mv: invalid filestamp %u",
 		    ntohl(ep->fstamp));
 		return (XEVNT_FSP);
 	}
 	if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) {
 		msyslog(LOG_INFO, "crypto_mv: defective key");
 		return (XEVNT_PUB);
 	}
 	if (peer->iffval == NULL) {
 		msyslog(LOG_INFO, "crypto_mv: missing challenge");
 		return (XEVNT_ID);
 	}
 
 	/*
 	 * Extract the (hash(y), gbar, ghat) values from the response.
 	 */
 	bctx = BN_CTX_new(); k = BN_new(); u = BN_new(); v = BN_new();
 	len = ntohl(ep->vallen);
 	ptr = (const u_char *)ep->pkt;
 	if ((sdsa = d2i_DSAparams(NULL, &ptr, len)) == NULL) {
 		msyslog(LOG_ERR, "crypto_mv %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_ERR);
 	}
 
 	/*
 	 * Compute (gbar^xhat ghat^xbar)^-1 mod p.
 	 */
 	BN_mod_exp(u, sdsa->q, dsa->pub_key, dsa->p, bctx);
 	BN_mod_exp(v, sdsa->g, dsa->priv_key, dsa->p, bctx);
 	BN_mod_mul(u, u, v, dsa->p, bctx);
 	BN_mod_inverse(u, u, dsa->p, bctx);
 	BN_mod_mul(v, u, peer->iffval, dsa->p, bctx);
 
 	/*
 	 * The result should match the hash of r mod p.
 	 */
 	bighash(v, v);
 	temp = BN_cmp(v, sdsa->p);
 	BN_CTX_free(bctx); BN_free(k); BN_free(u); BN_free(v);
 	BN_free(peer->iffval);
 	peer->iffval = NULL;
 	DSA_free(sdsa);
 	if (temp == 0)
 		return (XEVNT_OK);
 
 	else
 		return (XEVNT_ID);
 }
 
 
 /*
  ***********************************************************************
  *								       *
  * The following routines are used to manipulate certificates          *
  *								       *
  ***********************************************************************
  */
 /*
  * cert_parse - parse x509 certificate and create info/value structures.
  *
  * The server certificate includes the version number, issuer name,
  * subject name, public key and valid date interval. If the issuer name
  * is the same as the subject name, the certificate is self signed and
  * valid only if the server is configured as trustable. If the names are
  * different, another issuer has signed the server certificate and
  * vouched for it. In this case the server certificate is valid if
  * verified by the issuer public key.
  *
  * Returns certificate info/value pointer if valid, NULL if not.
  */
 struct cert_info *		/* certificate information structure */
 cert_parse(
 	u_char	*asn1cert,	/* X509 certificate */
 	u_int	len,		/* certificate length */
 	tstamp_t fstamp		/* filestamp */
 	)
 {
 	X509	*cert;		/* X509 certificate */
 	X509_EXTENSION *ext;	/* X509v3 extension */
 	struct cert_info *ret;	/* certificate info/value */
 	BIO	*bp;
 	X509V3_EXT_METHOD *method;
 	char	pathbuf[MAXFILENAME];
 	u_char	*uptr;
 	char	*ptr;
 	int	temp, cnt, i;
 
 	/*
 	 * Decode ASN.1 objects and construct certificate structure.
 	 */
 	uptr = asn1cert;
 	if ((cert = d2i_X509(NULL, &uptr, len)) == NULL) {
 		msyslog(LOG_ERR, "cert_parse %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (NULL);
 	}
 
 	/*
 	 * Extract version, subject name and public key.
 	 */
 	ret = emalloc(sizeof(struct cert_info));
 	memset(ret, 0, sizeof(struct cert_info));
 	if ((ret->pkey = X509_get_pubkey(cert)) == NULL) {
 		msyslog(LOG_ERR, "cert_parse %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		cert_free(ret);
 		X509_free(cert);
 		return (NULL);
 	}
 	ret->version = X509_get_version(cert);
 	X509_NAME_oneline(X509_get_subject_name(cert), pathbuf,
 	    MAXFILENAME - 1);
 	ptr = strstr(pathbuf, "CN=");
 	if (ptr == NULL) {
 		msyslog(LOG_INFO, "cert_parse: invalid subject %s",
 		    pathbuf);
 		cert_free(ret);
 		X509_free(cert);
 		return (NULL);
 	}
 	ret->subject = emalloc(strlen(ptr) + 1);
 	strcpy(ret->subject, ptr + 3);
 
 	/*
 	 * Extract remaining objects. Note that the NTP serial number is
 	 * the NTP seconds at the time of signing, but this might not be
 	 * the case for other authority. We don't bother to check the
 	 * objects at this time, since the real crunch can happen only
 	 * when the time is valid but not yet certificated.
 	 */
 	ret->nid = OBJ_obj2nid(cert->cert_info->signature->algorithm);
 	ret->digest = (const EVP_MD *)EVP_get_digestbynid(ret->nid);
 	ret->serial =
 	    (u_long)ASN1_INTEGER_get(X509_get_serialNumber(cert));
 	X509_NAME_oneline(X509_get_issuer_name(cert), pathbuf,
 	    MAXFILENAME);
 	if ((ptr = strstr(pathbuf, "CN=")) == NULL) {
 		msyslog(LOG_INFO, "cert_parse: invalid issuer %s",
 		    pathbuf);
 		cert_free(ret);
 		X509_free(cert);
 		return (NULL);
 	}
 	ret->issuer = emalloc(strlen(ptr) + 1);
 	strcpy(ret->issuer, ptr + 3);
 	ret->first = asn2ntp(X509_get_notBefore(cert));
 	ret->last = asn2ntp(X509_get_notAfter(cert));
 
 	/*
 	 * Extract extension fields. These are ad hoc ripoffs of
 	 * currently assigned functions and will certainly be changed
 	 * before prime time.
 	 */
 	cnt = X509_get_ext_count(cert);
 	for (i = 0; i < cnt; i++) {
 		ext = X509_get_ext(cert, i);
 		method = X509V3_EXT_get(ext);
 		temp = OBJ_obj2nid(ext->object);
 		switch (temp) {
 
 		/*
 		 * If a key_usage field is present, we decode whether
 		 * this is a trusted or private certificate. This is
 		 * dorky; all we want is to compare NIDs, but OpenSSL
 		 * insists on BIO text strings.
 		 */
 		case NID_ext_key_usage:
 			bp = BIO_new(BIO_s_mem());
 			X509V3_EXT_print(bp, ext, 0, 0);
 			BIO_gets(bp, pathbuf, MAXFILENAME);
 			BIO_free(bp);
 #if DEBUG
 			if (debug)
 				printf("cert_parse: %s: %s\n",
 				    OBJ_nid2ln(temp), pathbuf);
 #endif
 			if (strcmp(pathbuf, "Trust Root") == 0)
 				ret->flags |= CERT_TRUST;
 			else if (strcmp(pathbuf, "Private") == 0)
 				ret->flags |= CERT_PRIV;
 			break;
 
 		/*
 		 * If a NID_subject_key_identifier field is present, it
 		 * contains the GQ public key.
 		 */
 		case NID_subject_key_identifier:
 			ret->grplen = ext->value->length - 2;
 			ret->grpkey = emalloc(ret->grplen);
 			memcpy(ret->grpkey, &ext->value->data[2],
 			    ret->grplen);
 			break;
 		}
 	}
 
 	/*
 	 * If certificate is self signed, verify signature.
 	 */
 	if (strcmp(ret->subject, ret->issuer) == 0) {
 		if (!X509_verify(cert, ret->pkey)) {
 			msyslog(LOG_INFO,
 			    "cert_parse: signature not verified %s",
 			    pathbuf);
 			cert_free(ret);
 			X509_free(cert);
 			return (NULL);
 		}
 	}
 
 	/*
 	 * Verify certificate valid times. Note that certificates cannot
 	 * be retroactive.
 	 */
 	if (ret->first > ret->last || ret->first < fstamp) {
 		msyslog(LOG_INFO,
 		    "cert_parse: invalid certificate %s first %u last %u fstamp %u",
 		    ret->subject, ret->first, ret->last, fstamp);
 		cert_free(ret);
 		X509_free(cert);
 		return (NULL);
 	}
 
 	/*
 	 * Build the value structure to sign and send later.
 	 */
 	ret->cert.fstamp = htonl(fstamp);
 	ret->cert.vallen = htonl(len);
 	ret->cert.ptr = emalloc(len);
 	memcpy(ret->cert.ptr, asn1cert, len);
 #ifdef DEBUG
 	if (debug > 1)
 		X509_print_fp(stdout, cert);
 #endif
 	X509_free(cert);
 	return (ret);
 }
 
 
 /*
  * cert_sign - sign x509 certificate equest and update value structure.
  *
  * The certificate request includes a copy of the host certificate,
  * which includes the version number, subject name and public key of the
  * host. The resulting certificate includes these values plus the
  * serial number, issuer name and valid interval of the server. The
  * valid interval extends from the current time to the same time one
  * year hence. This may extend the life of the signed certificate beyond
  * that of the signer certificate.
  *
  * It is convenient to use the NTP seconds of the current time as the
  * serial number. In the value structure the timestamp is the current
  * time and the filestamp is taken from the extension field. Note this
  * routine is called only when the client clock is synchronized to a
  * proventic source, so timestamp comparisons are valid.
  *
  * The host certificate is valid from the time it was generated for a
  * period of one year. A signed certificate is valid from the time of
  * signature for a period of one year, but only the host certificate (or
  * sign certificate if used) is actually used to encrypt and decrypt
  * signatures. The signature trail is built from the client via the
  * intermediate servers to the trusted server. Each signature on the
  * trail must be valid at the time of signature, but it could happen
  * that a signer certificate expire before the signed certificate, which
  * remains valid until its expiration. 
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_PUB	bad or missing public key
  * XEVNT_CRT	bad or missing certificate
  * XEVNT_VFY	certificate not verified
  * XEVNT_PER	host certificate expired
  */
 static int
 cert_sign(
 	struct exten *ep,	/* extension field pointer */
 	struct value *vp	/* value pointer */
 	)
 {
 	X509	*req;		/* X509 certificate request */
 	X509	*cert;		/* X509 certificate */
 	X509_EXTENSION *ext;	/* certificate extension */
 	ASN1_INTEGER *serial;	/* serial number */
 	X509_NAME *subj;	/* distinguished (common) name */
 	EVP_PKEY *pkey;		/* public key */
 	EVP_MD_CTX ctx;		/* message digest context */
 	tstamp_t tstamp;	/* NTP timestamp */
 	u_int	len;
 	u_char	*ptr;
 	int	i, temp;
 
 	/*
 	 * Decode ASN.1 objects and construct certificate structure.
 	 * Make sure the system clock is synchronized to a proventic
 	 * source.
 	 */
 	tstamp = crypto_time();
 	if (tstamp == 0)
 		return (XEVNT_TSP);
 
 	if (tstamp < cinfo->first || tstamp > cinfo->last)
 		return (XEVNT_PER);
 
 	ptr = (u_char *)ep->pkt;
 	if ((req = d2i_X509(NULL, &ptr, ntohl(ep->vallen))) == NULL) {
 		msyslog(LOG_ERR, "cert_sign %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (XEVNT_CRT);
 	}
 	/*
 	 * Extract public key and check for errors.
 	 */
 	if ((pkey = X509_get_pubkey(req)) == NULL) {
 		msyslog(LOG_ERR, "cert_sign %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		X509_free(req);
 		return (XEVNT_PUB);
 	}
 
 	/*
 	 * Generate X509 certificate signed by this server. For this
 	 * purpose the issuer name is the server name. Also copy any
 	 * extensions that might be present.
 	 */
 	cert = X509_new();
 	X509_set_version(cert, X509_get_version(req));
 	serial = ASN1_INTEGER_new();
 	ASN1_INTEGER_set(serial, tstamp);
 	X509_set_serialNumber(cert, serial);
 	X509_gmtime_adj(X509_get_notBefore(cert), 0L);
 	X509_gmtime_adj(X509_get_notAfter(cert), YEAR);
 	subj = X509_get_issuer_name(cert);
 	X509_NAME_add_entry_by_txt(subj, "commonName", MBSTRING_ASC,
 	    (u_char *)sys_hostname, strlen(sys_hostname), -1, 0);
 	subj = X509_get_subject_name(req);
 	X509_set_subject_name(cert, subj);
 	X509_set_pubkey(cert, pkey);
 	ext = X509_get_ext(req, 0);
 	temp = X509_get_ext_count(req);
 	for (i = 0; i < temp; i++) {
 		ext = X509_get_ext(req, i);
 		X509_add_ext(cert, ext, -1);
 	}
 	X509_free(req);
 
 	/*
 	 * Sign and verify the certificate.
 	 */
 	X509_sign(cert, sign_pkey, sign_digest);
 	if (!X509_verify(cert, sign_pkey)) {
 		printf("cert_sign\n%s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		X509_free(cert);
 		return (XEVNT_VFY);
 	}
 	len = i2d_X509(cert, NULL);
 
 	/*
 	 * Build and sign the value structure. We have to sign it here,
 	 * since the response has to be returned right away. This is a
 	 * clogging hazard.
 	 */
 	memset(vp, 0, sizeof(struct value));
 	vp->tstamp = htonl(tstamp);
 	vp->fstamp = ep->fstamp;
 	vp->vallen = htonl(len);
 	vp->ptr = emalloc(len);
 	ptr = vp->ptr;
 	i2d_X509(cert, &ptr);
 	vp->siglen = 0;
 	vp->sig = emalloc(sign_siglen);
 	EVP_SignInit(&ctx, sign_digest);
 	EVP_SignUpdate(&ctx, (u_char *)vp, 12);
 	EVP_SignUpdate(&ctx, vp->ptr, len);
 	if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey))
 		vp->siglen = htonl(len);
 #ifdef DEBUG
 	if (debug > 1)
 		X509_print_fp(stdout, cert);
 #endif
 	X509_free(cert);
 	return (XEVNT_OK);
 }
 
 
 /*
  * cert_valid - verify certificate with given public key
  *
  * This is pretty ugly, as the certificate has to be verified in the
  * OpenSSL X509 structure, not in the DER format in the info/value
  * structure.
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_VFY	certificate not verified
  */
 int
 cert_valid(
 	struct cert_info *cinf,	/* certificate information structure */
 	EVP_PKEY *pkey		/* public key */
 	)
 {
 	X509	*cert;		/* X509 certificate */
 	u_char	*ptr;
 
 	if (cinf->flags & CERT_SIGN)
 		return (XEVNT_OK);
 
 	ptr = (u_char *)cinf->cert.ptr;
 	cert = d2i_X509(NULL, &ptr, ntohl(cinf->cert.vallen));
 	if (cert == NULL || !X509_verify(cert, pkey))
 		return (XEVNT_VFY);
 
 	X509_free(cert);
 	return (XEVNT_OK);
 }
 
 
 /*
  * cert - install certificate in certificate list
  *
  * This routine encodes an extension field into a certificate info/value
  * structure. It searches the certificate list for duplicates and
  * expunges whichever is older. It then searches the list for other
  * certificates that might be verified by this latest one. Finally, it
  * inserts this certificate first on the list.
  *
  * Returns
  * XEVNT_OK	success
  * XEVNT_FSP	bad or missing filestamp
  * XEVNT_CRT	bad or missing certificate 
  */
 int
 cert_install(
 	struct exten *ep,	/* cert info/value */
 	struct peer *peer	/* peer structure */
 	)
 {
 	struct cert_info *cp, *xp, *yp, **zp;
 
 	/*
 	 * Parse and validate the signed certificate. If valid,
 	 * construct the info/value structure; otherwise, scamper home.
 	 */
 	if ((cp = cert_parse((u_char *)ep->pkt, ntohl(ep->vallen),
 	    ntohl(ep->fstamp))) == NULL)
 		return (XEVNT_CRT);
 
 	/*
 	 * Scan certificate list looking for another certificate with
 	 * the same subject and issuer. If another is found with the
 	 * same or older filestamp, unlink it and return the goodies to
 	 * the heap. If another is found with a later filestamp, discard
 	 * the new one and leave the building.
 	 *
 	 * Make a note to study this issue again. An earlier certificate
 	 * with a long lifetime might be overtaken by a later
 	 * certificate with a short lifetime, thus invalidating the
 	 * earlier signature. However, we gotta find a way to leak old
 	 * stuff from the cache, so we do it anyway. 
 	 */
 	yp = cp;
 	zp = &cinfo;
 	for (xp = cinfo; xp != NULL; xp = xp->link) {
 		if (strcmp(cp->subject, xp->subject) == 0 &&
 		    strcmp(cp->issuer, xp->issuer) == 0) {
 			if (ntohl(cp->cert.fstamp) <=
 			    ntohl(xp->cert.fstamp)) {
 				*zp = xp->link;;
 				cert_free(xp);
 			} else {
 				cert_free(cp);
 				return (XEVNT_FSP);
 			}
 			break;
 		}
 		zp = &xp->link;
 	}
 	yp->link = cinfo;
 	cinfo = yp;
 
 	/*
 	 * Scan the certificate list to see if Y is signed by X. This is
 	 * independent of order.
 	 */
 	for (yp = cinfo; yp != NULL; yp = yp->link) {
 		for (xp = cinfo; xp != NULL; xp = xp->link) {
 
 			/*
 			 * If the issuer of certificate Y matches the
 			 * subject of certificate X, verify the
 			 * signature of Y using the public key of X. If
 			 * so, X signs Y.
 			 */
 			if (strcmp(yp->issuer, xp->subject) != 0 ||
 				xp->flags & CERT_ERROR)
 				continue;
 
 			if (cert_valid(yp, xp->pkey) != XEVNT_OK) {
 				yp->flags |= CERT_ERROR;
 				continue;
 			}
 
 			/*
 			 * The signature Y is valid only if it begins
 			 * during the lifetime of X; however, it is not
 			 * necessarily an error, since some other
 			 * certificate might sign Y. 
 			 */
 			if (yp->first < xp->first || yp->first >
 			    xp->last)
 				continue;
 
 			yp->flags |= CERT_SIGN;
 
 			/*
 			 * If X is trusted, then Y is trusted. Note that
 			 * we might stumble over a self-signed
 			 * certificate that is not trusted, at least
 			 * temporarily. This can happen when a dude
 			 * first comes up, but has not synchronized the
 			 * clock and had its certificate signed by its
 			 * server. In case of broken certificate trail,
 			 * this might result in a loop that could
 			 * persist until timeout.
 			 */
 			if (!(xp->flags & (CERT_TRUST | CERT_VALID)))
 				continue;
 
 			yp->flags |= CERT_VALID;
 
 			/*
 			 * If subject Y matches the server subject name,
 			 * then Y has completed the certificate trail.
 			 * Save the group key and light the valid bit.
 			 */
 			if (strcmp(yp->subject, peer->subject) != 0)
 				continue;
 
 			if (yp->grpkey != NULL) {
 				if (peer->grpkey != NULL)
 					BN_free(peer->grpkey);
 				peer->grpkey = BN_bin2bn(yp->grpkey,
 				     yp->grplen, NULL);
 			}
 			peer->crypto |= CRYPTO_FLAG_VALID;
 
 			/*
 			 * If the server has an an identity scheme,
 			 * fetch the identity credentials. If not, the
 			 * identity is verified only by the trusted
 			 * certificate. The next signature will set the
 			 * server proventic.
 			 */
 			if (peer->crypto & (CRYPTO_FLAG_GQ |
 			    CRYPTO_FLAG_IFF | CRYPTO_FLAG_MV))
 				continue;
 
 			peer->crypto |= CRYPTO_FLAG_VRFY;
 		}
 	}
 
 	/*
 	 * That was awesome. Now update the timestamps and signatures.
 	 */
 	crypto_update();
 	return (XEVNT_OK);
 }
 
 
 /*
  * cert_free - free certificate information structure
  */
 void
 cert_free(
 	struct cert_info *cinf	/* certificate info/value structure */ 
 	)
 {
 	if (cinf->pkey != NULL)
 		EVP_PKEY_free(cinf->pkey);
 	if (cinf->subject != NULL)
 		free(cinf->subject);
 	if (cinf->issuer != NULL)
 		free(cinf->issuer);
 	if (cinf->grpkey != NULL)
 		free(cinf->grpkey);
 	value_free(&cinf->cert);
 	free(cinf);
 }
 
 
 /*
  ***********************************************************************
  *								       *
  * The following routines are used only at initialization time         *
  *								       *
  ***********************************************************************
  */
 /*
  * crypto_key - load cryptographic parameters and keys from files
  *
  * This routine loads a PEM-encoded public/private key pair and extracts
  * the filestamp from the file name.
  *
  * Returns public key pointer if valid, NULL if not. Side effect updates
  * the filestamp if valid.
  */
 static EVP_PKEY *
 crypto_key(
 	char	*cp,		/* file name */
 	tstamp_t *fstamp	/* filestamp */
 	)
 {
 	FILE	*str;		/* file handle */
 	EVP_PKEY *pkey = NULL;	/* public/private key */
 	char	filename[MAXFILENAME]; /* name of key file */
 	char	linkname[MAXFILENAME]; /* filestamp buffer) */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	char	*ptr;
 
 	/*
 	 * Open the key file. If the first character of the file name is
 	 * not '/', prepend the keys directory string. If something goes
 	 * wrong, abandon ship.
 	 */
 	if (*cp == '/')
 		strcpy(filename, cp);
 	else
 		snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp);
 	str = fopen(filename, "r");
 	if (str == NULL)
 		return (NULL);
 
 	/*
 	 * Read the filestamp, which is contained in the first line.
 	 */
 	if ((ptr = fgets(linkname, MAXFILENAME, str)) == NULL) {
 		msyslog(LOG_ERR, "crypto_key: no data %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 	if ((ptr = strrchr(ptr, '.')) == NULL) {
 		msyslog(LOG_ERR, "crypto_key: no filestamp %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 	if (sscanf(++ptr, "%u", fstamp) != 1) {
 		msyslog(LOG_ERR, "crypto_key: invalid timestamp %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 
 	/*
 	 * Read and decrypt PEM-encoded private key.
 	 */
 	pkey = PEM_read_PrivateKey(str, NULL, NULL, passwd);
 	fclose(str);
 	if (pkey == NULL) {
 		msyslog(LOG_ERR, "crypto_key %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		return (NULL);
 	}
 
 	/*
 	 * Leave tracks in the cryptostats.
 	 */
 	if ((ptr = strrchr(linkname, '\n')) != NULL)
 		*ptr = '\0'; 
 	snprintf(statstr, NTP_MAXSTRLEN, "%s mod %d", &linkname[2],
 	    EVP_PKEY_size(pkey) * 8);
 	record_crypto_stats(NULL, statstr);
 #ifdef DEBUG
 	if (debug)
 		printf("crypto_key: %s\n", statstr);
 	if (debug > 1) {
 		if (pkey->type == EVP_PKEY_DSA)
 			DSA_print_fp(stdout, pkey->pkey.dsa, 0);
 		else
 			RSA_print_fp(stdout, pkey->pkey.rsa, 0);
 	}
 #endif
 	return (pkey);
 }
 
 
 /*
  * crypto_cert - load certificate from file
  *
  * This routine loads a X.509 RSA or DSA certificate from a file and
  * constructs a info/cert value structure for this machine. The
  * structure includes a filestamp extracted from the file name. Later
  * the certificate can be sent to another machine by request.
  *
  * Returns certificate info/value pointer if valid, NULL if not.
  */
 static struct cert_info *	/* certificate information */
 crypto_cert(
 	char	*cp		/* file name */
 	)
 {
 	struct cert_info *ret; /* certificate information */
 	FILE	*str;		/* file handle */
 	char	filename[MAXFILENAME]; /* name of certificate file */
 	char	linkname[MAXFILENAME]; /* filestamp buffer */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	tstamp_t fstamp;	/* filestamp */
 	long	len;
 	char	*ptr;
 	char	*name, *header;
 	u_char	*data;
 
 	/*
 	 * Open the certificate file. If the first character of the file
 	 * name is not '/', prepend the keys directory string. If
 	 * something goes wrong, abandon ship.
 	 */
 	if (*cp == '/')
 		strcpy(filename, cp);
 	else
 		snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp);
 	str = fopen(filename, "r");
 	if (str == NULL)
 		return (NULL);
 
 	/*
 	 * Read the filestamp, which is contained in the first line.
 	 */
 	if ((ptr = fgets(linkname, MAXFILENAME, str)) == NULL) {
 		msyslog(LOG_ERR, "crypto_cert: no data %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 	if ((ptr = strrchr(ptr, '.')) == NULL) {
 		msyslog(LOG_ERR, "crypto_cert: no filestamp %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 	if (sscanf(++ptr, "%u", &fstamp) != 1) {
 		msyslog(LOG_ERR, "crypto_cert: invalid filestamp %s\n",
 		    filename);
 		(void)fclose(str);
 		return (NULL);
 	}
 
 	/*
 	 * Read PEM-encoded certificate and install.
 	 */
 	if (!PEM_read(str, &name, &header, &data, &len)) {
 		msyslog(LOG_ERR, "crypto_cert %s\n",
 		    ERR_error_string(ERR_get_error(), NULL));
 		(void)fclose(str);
 		return (NULL);
 	}
 	free(header);
 	if (strcmp(name, "CERTIFICATE") !=0) {
 		msyslog(LOG_INFO, "crypto_cert: wrong PEM type %s",
 		    name);
 		free(name);
 		free(data);
 		(void)fclose(str);
 		return (NULL);
 	}
 	free(name);
 
 	/*
 	 * Parse certificate and generate info/value structure.
 	 */
 	ret = cert_parse(data, len, fstamp);
 	free(data);
 	(void)fclose(str);
 	if (ret == NULL)
 		return (NULL);
 
 	if ((ptr = strrchr(linkname, '\n')) != NULL)
 		*ptr = '\0'; 
 	snprintf(statstr, NTP_MAXSTRLEN,
 	    "%s 0x%x len %lu", &linkname[2], ret->flags, len);
 	record_crypto_stats(NULL, statstr);
 #ifdef DEBUG
 	if (debug)
 		printf("crypto_cert: %s\n", statstr);
 #endif
 	return (ret);
 }
 
 
 /*
  * crypto_tai - load leapseconds table from file
  *
  * This routine loads the ERTS leapsecond file in NIST text format,
  * converts to a value structure and extracts a filestamp from the file
  * name. The data are used to establish the TAI offset from UTC, which
  * is provided to the kernel if supported. Later the data can be sent to
  * another machine on request.
  */
 static void
 crypto_tai(
 	char	*cp		/* file name */
 	)
 {
 	FILE	*str;		/* file handle */
 	char	buf[NTP_MAXSTRLEN];	/* file line buffer */
 	u_int32	leapsec[MAX_LEAP]; /* NTP time at leaps */
 	int	offset;		/* offset at leap (s) */
 	char	filename[MAXFILENAME]; /* name of leapseconds file */
 	char	linkname[MAXFILENAME]; /* file link (for filestamp) */
 	char	statstr[NTP_MAXSTRLEN]; /* statistics for filegen */
 	tstamp_t fstamp;	/* filestamp */
 	u_int	len;
 	u_int32	*ptr;
 	char	*dp;
 	int	rval, i, j;
 
 	/*
 	 * Open the file and discard comment lines. If the first
 	 * character of the file name is not '/', prepend the keys
 	 * directory string. If the file is not found, not to worry; it
 	 * can be retrieved over the net. But, if it is found with
 	 * errors, we crash and burn.
 	 */
 	if (*cp == '/')
 		strcpy(filename, cp);
 	else
 		snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp);
 	if ((str = fopen(filename, "r")) == NULL)
 		return;
 
 	/*
 	 * Extract filestamp if present.
 	 */
 	rval = readlink(filename, linkname, MAXFILENAME - 1);
 	if (rval > 0) {
 		linkname[rval] = '\0';
 		dp = strrchr(linkname, '.');
 	} else {
 		dp = strrchr(filename, '.');
 	}
 	if (dp != NULL)
 		sscanf(++dp, "%u", &fstamp);
 	else
 		fstamp = 0;
 	tai_leap.fstamp = htonl(fstamp);
 
 	/*
 	 * We are rather paranoid here, since an intruder might cause a
 	 * coredump by infiltrating naughty values. Empty lines and
 	 * comments are ignored. Other lines must begin with two
 	 * integers followed by junk or comments. The first integer is
 	 * the NTP seconds of leap insertion, the second is the offset
 	 * of TAI relative to UTC after that insertion. The second word
 	 * must equal the initial insertion of ten seconds on 1 January
 	 * 1972 plus one second for each succeeding insertion.
 	 */
 	i = 0;
 	while (i < MAX_LEAP) {
 		dp = fgets(buf, NTP_MAXSTRLEN - 1, str);
 		if (dp == NULL)
 			break;
 
 		if (strlen(buf) < 1)
 			continue;
 
 		if (*buf == '#')
 			continue;
 
 		if (sscanf(buf, "%u %d", &leapsec[i], &offset) != 2)
 			continue;
 
 		if (i != offset - TAI_1972) 
 			break;
 
 		i++;
 	}
 	fclose(str);
 	if (dp != NULL) {
 		msyslog(LOG_INFO,
 		    "crypto_tai: leapseconds file %s error %d", cp,
 		    rval);
 		exit (-1);
 	}
 
 	/*
 	 * The extension field table entries consists of the NTP seconds
 	 * of leap insertion in network byte order.
 	 */
 	len = i * sizeof(u_int32);
 	tai_leap.vallen = htonl(len);
 	ptr = emalloc(len);
 	tai_leap.ptr = (u_char *)ptr;
 	for (j = 0; j < i; j++)
 		*ptr++ = htonl(leapsec[j]);
 	crypto_flags |= CRYPTO_FLAG_TAI;
 	snprintf(statstr, NTP_MAXSTRLEN, "%s fs %u leap %u len %u", cp, fstamp,
 	   leapsec[--j], len);
 	record_crypto_stats(NULL, statstr);
 #ifdef DEBUG
 	if (debug)
 		printf("crypto_tai: %s\n", statstr);
 #endif
 }
 
 
 /*
  * crypto_setup - load keys, certificate and leapseconds table
  *
  * This routine loads the public/private host key and certificate. If
  * available, it loads the public/private sign key, which defaults to
  * the host key, and leapseconds table. The host key must be RSA, but
  * the sign key can be either RSA or DSA. In either case, the public key
  * on the certificate must agree with the sign key.
  */
 void
 crypto_setup(void)
 {
 	EVP_PKEY *pkey;		/* private/public key pair */
 	char	filename[MAXFILENAME]; /* file name buffer */
 	l_fp	seed;		/* crypto PRNG seed as NTP timestamp */
 	tstamp_t fstamp;	/* filestamp */
 	tstamp_t sstamp;	/* sign filestamp */
 	u_int	len, bytes;
 	u_char	*ptr;
 
 	/*
 	 * Initialize structures.
 	 */
 	if (!crypto_flags)
 		return;
 
 	gethostname(filename, MAXFILENAME);
 	bytes = strlen(filename) + 1;
 	sys_hostname = emalloc(bytes);
 	memcpy(sys_hostname, filename, bytes);
 	if (passwd == NULL)
 		passwd = sys_hostname;
 	memset(&hostval, 0, sizeof(hostval));
 	memset(&pubkey, 0, sizeof(pubkey));
 	memset(&tai_leap, 0, sizeof(tai_leap));
 
 	/*
 	 * Load required random seed file and seed the random number
 	 * generator. Be default, it is found in the user home
 	 * directory. The root home directory may be / or /root,
 	 * depending on the system. Wiggle the contents a bit and write
 	 * it back so the sequence does not repeat when we next restart.
 	 */
 	ERR_load_crypto_strings();
 	if (rand_file == NULL) {
 		if ((RAND_file_name(filename, MAXFILENAME)) != NULL) {
 			rand_file = emalloc(strlen(filename) + 1);
 			strcpy(rand_file, filename);
 		}
 	} else if (*rand_file != '/') {
 		snprintf(filename, MAXFILENAME, "%s/%s", keysdir,
 		    rand_file);
 		free(rand_file);
 		rand_file = emalloc(strlen(filename) + 1);
 		strcpy(rand_file, filename);
 	}
 	if (rand_file == NULL) {
 		msyslog(LOG_ERR,
 		    "crypto_setup: random seed file not specified");
 		exit (-1);
 	}
 	if ((bytes = RAND_load_file(rand_file, -1)) == 0) {
 		msyslog(LOG_ERR,
 		    "crypto_setup: random seed file %s not found\n",
 		    rand_file);
 		exit (-1);
 	}
 	arc4random_buf(&seed, sizeof(l_fp));
 	RAND_seed(&seed, sizeof(l_fp));
 	RAND_write_file(rand_file);
 	OpenSSL_add_all_algorithms();
 #ifdef DEBUG
 	if (debug)
 		printf(
 		    "crypto_setup: OpenSSL version %lx random seed file %s bytes read %d\n",
 		    SSLeay(), rand_file, bytes);
 #endif
 
 	/*
 	 * Load required host key from file "ntpkey_host_<hostname>". It
 	 * also becomes the default sign key.
 	 */
 	if (host_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_host_%s",
 		    sys_hostname);
 		host_file = emalloc(strlen(filename) + 1);
 		strcpy(host_file, filename);
 	}
 	pkey = crypto_key(host_file, &fstamp);
 	if (pkey == NULL) {
 		msyslog(LOG_ERR,
 		    "crypto_setup: host key file %s not found or corrupt",
 		    host_file);
 		exit (-1);
 	}
 	host_pkey = pkey;
 	sign_pkey = pkey;
 	sstamp = fstamp;
 	hostval.fstamp = htonl(fstamp);
 	if (host_pkey->type != EVP_PKEY_RSA) {
 		msyslog(LOG_ERR,
 		    "crypto_setup: host key is not RSA key type");
 		exit (-1);
 	}
 	hostval.vallen = htonl(strlen(sys_hostname));
 	hostval.ptr = (u_char *)sys_hostname;
 	
 	/*
 	 * Construct public key extension field for agreement scheme.
 	 */
 	len = i2d_PublicKey(host_pkey, NULL);
 	ptr = emalloc(len);
 	pubkey.ptr = ptr;
 	i2d_PublicKey(host_pkey, &ptr);
 	pubkey.vallen = htonl(len);
 	pubkey.fstamp = hostval.fstamp;
 
 	/*
 	 * Load optional sign key from file "ntpkey_sign_<hostname>". If
 	 * loaded, it becomes the sign key.
 	 */
 	if (sign_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_sign_%s",
 		    sys_hostname);
 		sign_file = emalloc(strlen(filename) + 1);
 		strcpy(sign_file, filename);
 	}
 	pkey = crypto_key(sign_file, &fstamp);
 	if (pkey != NULL) {
 		sign_pkey = pkey;
 		sstamp = fstamp;
 	}
 	sign_siglen = EVP_PKEY_size(sign_pkey);
 
 	/*
 	 * Load optional IFF parameters from file
 	 * "ntpkey_iff_<hostname>".
 	 */
 	if (iffpar_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_iff_%s",
 		    sys_hostname);
 		iffpar_file = emalloc(strlen(filename) + 1);
 		strcpy(iffpar_file, filename);
 	}
 	iffpar_pkey = crypto_key(iffpar_file, &if_fstamp);
 	if (iffpar_pkey != NULL)
 		crypto_flags |= CRYPTO_FLAG_IFF;
 
 	/*
 	 * Load optional GQ parameters from file "ntpkey_gq_<hostname>".
 	 */
 	if (gqpar_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_gq_%s",
 		    sys_hostname);
 		gqpar_file = emalloc(strlen(filename) + 1);
 		strcpy(gqpar_file, filename);
 	}
 	gqpar_pkey = crypto_key(gqpar_file, &gq_fstamp);
 	if (gqpar_pkey != NULL)
 		crypto_flags |= CRYPTO_FLAG_GQ;
 
 	/*
 	 * Load optional MV parameters from file "ntpkey_mv_<hostname>".
 	 */
 	if (mvpar_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_mv_%s",
 		    sys_hostname);
 		mvpar_file = emalloc(strlen(filename) + 1);
 		strcpy(mvpar_file, filename);
 	}
 	mvpar_pkey = crypto_key(mvpar_file, &mv_fstamp);
 	if (mvpar_pkey != NULL)
 		crypto_flags |= CRYPTO_FLAG_MV;
 
 	/*
 	 * Load required certificate from file "ntpkey_cert_<hostname>".
 	 */
 	if (cert_file == NULL) {
 		snprintf(filename, MAXFILENAME, "ntpkey_cert_%s",
 		    sys_hostname);
 		cert_file = emalloc(strlen(filename) + 1);
 		strcpy(cert_file, filename);
 	}
 	if ((cinfo = crypto_cert(cert_file)) == NULL) {
 		msyslog(LOG_ERR,
 		    "certificate file %s not found or corrupt",
 		    cert_file);
 		exit (-1);
 	}
 
 	/*
 	 * The subject name must be the same as the host name, unless
 	 * the certificate is private, in which case it may have come
 	 * from another host.
 	 */
 	if (!(cinfo->flags & CERT_PRIV) && strcmp(cinfo->subject,
 	    sys_hostname) != 0) {
 		msyslog(LOG_ERR,
 		    "crypto_setup: certificate %s not for this host",
 		    cert_file);
 		cert_free(cinfo);
 		exit (-1);
 	}
 
 	/*
 	 * It the certificate is trusted, the subject must be the same
 	 * as the issuer, in other words it must be self signed.
 	 */
 	if (cinfo->flags & CERT_TRUST && strcmp(cinfo->subject,
 	    cinfo->issuer) != 0) {
 		if (cert_valid(cinfo, sign_pkey) != XEVNT_OK) {
 			msyslog(LOG_ERR,
 			    "crypto_setup: certificate %s is trusted, but not self signed.",
 			    cert_file);
 			cert_free(cinfo);
 			exit (-1);
 		}
 	}
 	sign_digest = cinfo->digest;
 	if (cinfo->flags & CERT_PRIV)
 		crypto_flags |= CRYPTO_FLAG_PRIV;
 	crypto_flags |= cinfo->nid << 16;
 
 	/*
 	 * Load optional leapseconds table from file "ntpkey_leap". If
 	 * the file is missing or defective, the values can later be
 	 * retrieved from a server.
 	 */
 	if (leap_file == NULL)
 		leap_file = "ntpkey_leap";
 	crypto_tai(leap_file);
 #ifdef DEBUG
 	if (debug)
 		printf(
 		    "crypto_setup: flags 0x%x host %s signature %s\n",
 		    crypto_flags, sys_hostname, OBJ_nid2ln(cinfo->nid));
 #endif
 }
 
 
 /*
  * crypto_config - configure data from crypto configuration command.
  */
 void
 crypto_config(
 	int	item,		/* configuration item */
 	char	*cp		/* file name */
 	)
 {
 	switch (item) {
 
 	/*
 	 * Set random seed file name.
 	 */
 	case CRYPTO_CONF_RAND:
 		rand_file = emalloc(strlen(cp) + 1);
 		strcpy(rand_file, cp);
 		break;
 
 	/*
 	 * Set private key password.
 	 */
 	case CRYPTO_CONF_PW:
 		passwd = emalloc(strlen(cp) + 1);
 		strcpy(passwd, cp);
 		break;
 
 	/*
 	 * Set host file name.
 	 */
 	case CRYPTO_CONF_PRIV:
 		host_file = emalloc(strlen(cp) + 1);
 		strcpy(host_file, cp);
 		break;
 
 	/*
 	 * Set sign key file name.
 	 */
 	case CRYPTO_CONF_SIGN:
 		sign_file = emalloc(strlen(cp) + 1);
 		strcpy(sign_file, cp);
 		break;
 
 	/*
 	 * Set iff parameters file name.
 	 */
 	case CRYPTO_CONF_IFFPAR:
 		iffpar_file = emalloc(strlen(cp) + 1);
 		strcpy(iffpar_file, cp);
 		break;
 
 	/*
 	 * Set gq parameters file name.
 	 */
 	case CRYPTO_CONF_GQPAR:
 		gqpar_file = emalloc(strlen(cp) + 1);
 		strcpy(gqpar_file, cp);
 		break;
 
 	/*
 	 * Set mv parameters file name.
 	 */
 	case CRYPTO_CONF_MVPAR:
 		mvpar_file = emalloc(strlen(cp) + 1);
 		strcpy(mvpar_file, cp);
 		break;
 
 	/*
 	 * Set identity scheme.
 	 */
 	case CRYPTO_CONF_IDENT:
 		if (!strcasecmp(cp, "iff"))
 			ident_scheme |= CRYPTO_FLAG_IFF;
 		else if (!strcasecmp(cp, "gq"))
 			ident_scheme |= CRYPTO_FLAG_GQ;
 		else if (!strcasecmp(cp, "mv"))
 			ident_scheme |= CRYPTO_FLAG_MV;
 		break;
 
 	/*
 	 * Set certificate file name.
 	 */
 	case CRYPTO_CONF_CERT:
 		cert_file = emalloc(strlen(cp) + 1);
 		strcpy(cert_file, cp);
 		break;
 
 	/*
 	 * Set leapseconds file name.
 	 */
 	case CRYPTO_CONF_LEAP:
 		leap_file = emalloc(strlen(cp) + 1);
 		strcpy(leap_file, cp);
 		break;
 	}
 	crypto_flags |= CRYPTO_FLAG_ENAB;
 }
 # else
 int ntp_crypto_bs_pubkey;
 # endif /* OPENSSL */
Index: stable/9/contrib/ntp/ntpd/ntp_proto.c
===================================================================
--- stable/9/contrib/ntp/ntpd/ntp_proto.c	(revision 281230)
+++ stable/9/contrib/ntp/ntpd/ntp_proto.c	(revision 281231)
@@ -1,3451 +1,3461 @@
 /*
  * ntp_proto.c - NTP version 4 protocol machinery
  *
  * ATTENTION: Get approval from Dave Mills on all changes to this file!
  *
  */
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 #include "ntpd.h"
 #include "ntp_stdlib.h"
 #include "ntp_unixtime.h"
 #include "ntp_control.h"
 #include "ntp_string.h"
 
 #include <stdio.h>
 
 #if defined(VMS) && defined(VMS_LOCALUNIT)	/*wjm*/
 #include "ntp_refclock.h"
 #endif
 
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
 #include <sys/sysctl.h>
 #endif
 
 /*
  * This macro defines the authentication state. If x is 1 authentication
  * is required; othewise it is optional.
  */
 #define	AUTH(x, y)	((x) ? (y) == AUTH_OK : (y) == AUTH_OK || \
 			    (y) == AUTH_NONE)
 
 /*
  * System variables are declared here. See Section 3.2 of the
  * specification.
  */
 u_char	sys_leap;		/* system leap indicator */
 u_char	sys_stratum;		/* stratum of system */
 s_char	sys_precision;		/* local clock precision (log2 s) */
 double	sys_rootdelay;		/* roundtrip delay to primary source */
 double	sys_rootdispersion;	/* dispersion to primary source */
 u_int32 sys_refid;		/* source/loop in network byte order */
 static	double sys_offset;	/* current local clock offset */
 l_fp	sys_reftime;		/* time we were last updated */
 struct	peer *sys_peer;		/* our current peer */
 struct	peer *sys_pps;		/* our PPS peer */
 struct	peer *sys_prefer;	/* our cherished peer */
 int	sys_kod;		/* kod credit */
 int	sys_kod_rate = 2;	/* max kod packets per second */
 #ifdef OPENSSL
 u_long	sys_automax;		/* maximum session key lifetime */
 #endif /* OPENSSL */
 
 /*
  * Nonspecified system state variables.
  */
 int	sys_bclient;		/* broadcast client enable */
 double	sys_bdelay;		/* broadcast client default delay */
 int	sys_calldelay;		/* modem callup delay (s) */
 int	sys_authenticate;	/* requre authentication for config */
 l_fp	sys_authdelay;		/* authentication delay */
 static	u_long sys_authdly[2];	/* authentication delay shift reg */
 static	double sys_mindisp = MINDISPERSE; /* min disp increment (s) */
 static	double sys_maxdist = MAXDISTANCE; /* selection threshold (s) */
 double	sys_jitter;		/* system jitter (s) */
 static	int sys_hopper;		/* anticlockhop counter */
 static	int sys_maxhop = MAXHOP; /* anticlockhop counter threshold */
 int	leap_next;		/* leap consensus */
 keyid_t	sys_private;		/* private value for session seed */
 int	sys_manycastserver;	/* respond to manycast client pkts */
 int	peer_ntpdate;		/* active peers in ntpdate mode */
 int	sys_survivors;		/* truest of the truechimers */
 #ifdef OPENSSL
 char	*sys_hostname;		/* gethostname() name */
 #endif /* OPENSSL */
 
 /*
  * TOS and multicast mapping stuff
  */
 int	sys_floor = 0;		/* cluster stratum floor */
 int	sys_ceiling = STRATUM_UNSPEC; /* cluster stratum ceiling */
 int	sys_minsane = 1;	/* minimum candidates */
 int	sys_minclock = NTP_MINCLOCK; /* minimum survivors */
 int	sys_maxclock = NTP_MAXCLOCK; /* maximum candidates */
 int	sys_cohort = 0;		/* cohort switch */
 int	sys_orphan = STRATUM_UNSPEC + 1; /* orphan stratum */
 double	sys_orphandelay = 0;	/* orphan root delay */
 int	sys_beacon = BEACON;	/* manycast beacon interval */
 int	sys_ttlmax;		/* max ttl mapping vector index */
 u_char	sys_ttl[MAX_TTL];	/* ttl mapping vector */
 
 /*
  * Statistics counters
  */
 u_long	sys_stattime;		/* time since reset */
 u_long	sys_received;		/* packets received */
 u_long	sys_processed;		/* packets processed */
 u_long	sys_newversionpkt;	/* current version */
 u_long	sys_oldversionpkt;	/* recent version */
 u_long	sys_unknownversion;	/* invalid version */
 u_long	sys_restricted;		/* access denied */
 u_long	sys_badlength;		/* bad length or format */
 u_long	sys_badauth;		/* bad authentication */
 u_long	sys_limitrejected;	/* rate exceeded */
 
 static	double	root_distance	P((struct peer *));
 static	void	clock_combine	P((struct peer **, int));
 static	void	peer_xmit	P((struct peer *));
 static	void	fast_xmit	P((struct recvbuf *, int, keyid_t,
 				    int));
 static	void	clock_update	P((void));
 static	int	default_get_precision	P((void));
 static	int	peer_unfit	P((struct peer *));
 
 
 /*
  * transmit - Transmit Procedure. See Section 3.4.2 of the
  *	specification.
  */
 void
 transmit(
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	int	hpoll;
 
 	/*
 	 * The polling state machine. There are two kinds of machines,
 	 * those that never expect a reply (broadcast and manycast
 	 * server modes) and those that do (all other modes). The dance
 	 * is intricate...
 	 */
 	/*
 	 * Orphan mode is active when enabled and when no servers less
 	 * than the orphan statum are available. In this mode packets
 	 * are sent at the orphan stratum. An orphan with no other
 	 * synchronization source is an orphan parent. It assumes root
 	 * delay zero and reference ID the loopback address. All others
 	 * are orphan children with root delay randomized over a 1-s
 	 * range. The root delay is used by the election algorithm to
 	 * select the order of synchronization.
 	 */
 	hpoll = peer->hpoll;
 	if (sys_orphan < STRATUM_UNSPEC && sys_peer == NULL) {
 		sys_leap = LEAP_NOWARNING;
 		sys_stratum = sys_orphan;
 		sys_refid = htonl(LOOPBACKADR);
 		sys_rootdelay = 0;
 		sys_rootdispersion = 0;
 	}
 
 	/*
 	 * In broadcast mode the poll interval is never changed from
 	 * minpoll.
 	 */
 	if (peer->cast_flags & (MDF_BCAST | MDF_MCAST)) {
 		peer->outdate = current_time;
 		peer_xmit(peer);
 		poll_update(peer, hpoll);
 		return;
 	}
 
 	/*
 	 * In manycast mode we start with unity ttl. The ttl is
 	 * increased by one for each poll until either sys_maxclock
 	 * servers have been found or the maximum ttl is reached. When
 	 * sys_maxclock servers are found we stop polling until one or
 	 * more servers have timed out or until less than minpoll
 	 * associations turn up. In this case additional better servers
 	 * are dragged in and preempt the existing ones.
 	 */
 	if (peer->cast_flags & MDF_ACAST) {
 		peer->outdate = current_time;
 		if (peer->unreach > sys_beacon) {
 			peer->unreach = 0;
 			peer->ttl = 0;
 			peer_xmit(peer);
 		} else if (sys_survivors < sys_minclock ||
 		    peer_preempt < sys_maxclock) {
 			if (peer->ttl < sys_ttlmax)
 				peer->ttl++;
 			peer_xmit(peer);
 		}
 		peer->unreach++;
 		poll_update(peer, hpoll);
 		return;
 	}
 
 	/*
 	 * In unicast modes the dance is much more intricate. It is
 	 * desigmed to back off whenever possible to minimize network
 	 * traffic.
 	 */
 	if (peer->burst == 0) {
 		u_char oreach;
 
 		/*
 		 * Update the reachability status. If not heard for
 		 * three consecutive polls, stuff infinity in the clock
 		 * filter. 
 		 */
 		oreach = peer->reach;
 		peer->outdate = current_time;
 		if (peer == sys_peer)
 			sys_hopper++;
 		peer->reach <<= 1;
 		if (!(peer->reach & 0x07))
 			clock_filter(peer, 0., 0., MAXDISPERSE);
 		if (!peer->reach) {
 
 			/*
 			 * Here the peer is unreachable. If it was
 			 * previously reachable, raise a trap.
 			 */
 			if (oreach) {
 				report_event(EVNT_UNREACH, peer);
 				peer->timereachable = current_time;
 			}
 
 			/*
 			 * Send a burst if enabled, but only once after
 			 * a peer becomes unreachable. If the prempt
 			 * flag is dim, bump the unreach counter by one;
 			 * otherwise, bump it by three.
 			 */
 			if (peer->flags & FLAG_IBURST &&
 			    peer->unreach == 0) {
 				peer->burst = NTP_BURST;
 			}
 			if (!(peer->flags & FLAG_PREEMPT))	
 				peer->unreach++;
 			else
 				peer->unreach += 3;
 		} else {
 
 			/*
 			 * Here the peer is reachable. Set the poll
 			 * interval to the system poll interval. Send a
 			 * burst only if enabled and the peer is fit.
 			 *
 			 * Respond to the peer evaluation produced by
 			 * the selection algorithm. If less than the
 			 * outlyer level, up the unreach by three. If
 			 * there are excess associations, up the unreach
 			 * by two if not a candidate and by one if so.
 			 */
 			if (!(peer->flags & FLAG_PREEMPT)) {
 				peer->unreach = 0;
 			} else if (peer->status < CTL_PST_SEL_SELCAND) {
 				peer->unreach += 3;
 			} else if (peer_preempt > sys_maxclock) {
 				if (peer->status < CTL_PST_SEL_SYNCCAND)
 					peer->unreach += 2;
 				else
 					peer->unreach++;
 			} else {
 				peer->unreach = 0;
 			}
 			hpoll = sys_poll;
 			if (peer->flags & FLAG_BURST &&
 			    !peer_unfit(peer))
 				peer->burst = NTP_BURST;
 		}
 
 		/*
 		 * Watch for timeout. If ephemeral or preemptable, toss
 		 * the rascal; otherwise, bump the poll interval.
 		 */ 
 		if (peer->unreach >= NTP_UNREACH) {
 			if (peer->flags & FLAG_PREEMPT ||
 			    !(peer->flags & FLAG_CONFIG)) {
 				peer_clear(peer, "TIME");
 				unpeer(peer);
 				return;
 			} else {
 				hpoll++;
 			}
 		}
 	} else {
 		peer->burst--;
 
 		/*
 		 * If a broadcast client at this point, the burst has
 		 * concluded, so we switch to client mode and purge the
 		 * keylist, since no further transmissions will be made.
 		 */
 		if (peer->burst == 0) {
 			if (peer->cast_flags & MDF_BCLNT) {
 				peer->hmode = MODE_BCLIENT;
 #ifdef OPENSSL
 				key_expire(peer);
 #endif /* OPENSSL */
 			}
 
 			/*
 			 * If ntpdate mode and the clock has not been
 			 * set and all peers have completed the burst,
 			 * we declare a successful failure.
 			 */
 			if (mode_ntpdate) {
 				peer_ntpdate--;
 				if (peer_ntpdate == 0) {
 					msyslog(LOG_NOTICE,
 					    "no reply; clock not set");
 					exit (0);
 				}
 			}
 		}
 	}
 
 	/*
 	 * Do not transmit if in broadcast client mode. 
 	 */
 	if (peer->hmode != MODE_BCLIENT)
 		peer_xmit(peer);
 	poll_update(peer, hpoll);
 }
 
 
 /*
  * receive - Receive Procedure.  See section 3.4.3 in the specification.
  */
 void
 receive(
 	struct recvbuf *rbufp
 	)
 {
 	register struct peer *peer;	/* peer structure pointer */
 	register struct pkt *pkt;	/* receive packet pointer */
 	int	hisversion;		/* packet version */
 	int	hisleap;		/* packet leap indicator */
 	int	hismode;		/* packet mode */
 	int	hisstratum;		/* packet stratum */
 	int	restrict_mask;		/* restrict bits */
 	int	has_mac;		/* length of MAC field */
 	int	authlen;		/* offset of MAC field */
 	int	is_authentic = 0;	/* cryptosum ok */
 	keyid_t	skeyid = 0;		/* key ID */
 	struct sockaddr_storage *dstadr_sin; /* active runway */
 	struct peer *peer2;		/* aux peer structure pointer */
 	l_fp	p_org;			/* origin timestamp */
 	l_fp	p_rec;			/* receive timestamp */
 	l_fp	p_xmt;			/* transmit timestamp */
 #ifdef OPENSSL
 	keyid_t tkeyid = 0;		/* temporary key ID */
 	keyid_t	pkeyid = 0;		/* previous key ID */
 	struct autokey *ap;		/* autokey structure pointer */
 	int	rval;			/* cookie snatcher */
 #endif /* OPENSSL */
 	int retcode = AM_NOMATCH;
 	int	at_listhead;
 
 	/*
 	 * Monitor the packet and get restrictions. Note that the packet
 	 * length for control and private mode packets must be checked
 	 * by the service routines. Note that no statistics counters are
 	 * recorded for restrict violations, since these counters are in
 	 * the restriction routine. Note the careful distinctions here
 	 * between a packet with a format error and a packet that is
 	 * simply discarded without prejudice. Some restrictions have to
 	 * be handled later in order to generate a kiss-of-death packet.
 	 */
 	/*
 	 * Bogus port check is before anything, since it probably
 	 * reveals a clogging attack.
 	 */
 	sys_received++;
 	if (SRCPORT(&rbufp->recv_srcadr) == 0) {
 		sys_badlength++;
 		return;				/* bogus port */
 	}
 	at_listhead = ntp_monitor(rbufp);
 	restrict_mask = restrictions(&rbufp->recv_srcadr, at_listhead);
 #ifdef DEBUG
 	if (debug > 1)
 		printf("receive: at %ld %s<-%s flags %x restrict %03x\n",
 		    current_time, stoa(&rbufp->dstadr->sin),
 		    stoa(&rbufp->recv_srcadr),
 		    rbufp->dstadr->flags, restrict_mask);
 #endif
 	if (restrict_mask & RES_IGNORE) {
 		sys_restricted++;
 		return;				/* ignore everything */
 	}
 	pkt = &rbufp->recv_pkt;
 	hisversion = PKT_VERSION(pkt->li_vn_mode);
 	hisleap = PKT_LEAP(pkt->li_vn_mode);
 	hismode = (int)PKT_MODE(pkt->li_vn_mode);
 	hisstratum = PKT_TO_STRATUM(pkt->stratum);
 	if (hismode == MODE_PRIVATE) {
 		if (restrict_mask & RES_NOQUERY) {
 			sys_restricted++;
 			return;			/* no query private */
 		}
 		process_private(rbufp, ((restrict_mask &
 		    RES_NOMODIFY) == 0));
 		return;
 	}
 	if (hismode == MODE_CONTROL) {
 		if (restrict_mask & RES_NOQUERY) {
 			sys_restricted++;
 			return;			/* no query control */
 		}
 		process_control(rbufp, restrict_mask);
 		return;
 	}
 	if (restrict_mask & RES_DONTSERVE) {
 		sys_restricted++;
 		return;				/* no time */
 	}
 	if (rbufp->recv_length < LEN_PKT_NOMAC) {
 		sys_badlength++;
 		return;				/* runt packet */
 	}
 	
 	/*
 	 * Version check must be after the query packets, since they
 	 * intentionally use early version.
 	 */
 	if (hisversion == NTP_VERSION) {
 		sys_newversionpkt++;		/* new version */
 	} else if (!(restrict_mask & RES_VERSION) && hisversion >=
 	    NTP_OLDVERSION) {
 		sys_oldversionpkt++;		/* previous version */
 	} else {
 		sys_unknownversion++;
 		return;				/* old version */
 	}
 
 	/*
 	 * Figure out his mode and validate the packet. This has some
 	 * legacy raunch that probably should be removed. In very early
 	 * NTP versions mode 0 was equivalent to what later versions
 	 * would interpret as client mode.
 	 */
 	if (hismode == MODE_UNSPEC) {
 		if (hisversion == NTP_OLDVERSION) {
 			hismode = MODE_CLIENT;
 		} else {
 			sys_badlength++;
 			return;                 /* invalid mode */
 		}
 	}
 
 	/*
 	 * Parse the extension field if present. We figure out whether
 	 * an extension field is present by measuring the MAC size. If
 	 * the number of words following the packet header is 0, no MAC
 	 * is present and the packet is not authenticated. If 1, the
 	 * packet is a crypto-NAK; if 3, the packet is authenticated
 	 * with DES; if 5, the packet is authenticated with MD5. If 2 or
 	 * 4, the packet is a runt and discarded forthwith. If greater
 	 * than 5, an extension field is present, so we subtract the
 	 * length of the field and go around again.
 	 */
 	authlen = LEN_PKT_NOMAC;
 	has_mac = rbufp->recv_length - authlen;
 	while (has_mac > 0) {
 		int temp;
 
-		if (has_mac % 4 != 0 || has_mac < 0) {
+		if (has_mac % 4 != 0 || has_mac < MIN_MAC_LEN) {
 			sys_badlength++;
 			return;			/* bad MAC length */
 		}
 		if (has_mac == 1 * 4 || has_mac == 3 * 4 || has_mac ==
 		    MAX_MAC_LEN) {
 			skeyid = ntohl(((u_int32 *)pkt)[authlen / 4]);
 			break;
 
 		} else if (has_mac > MAX_MAC_LEN) {
 			temp = ntohl(((u_int32 *)pkt)[authlen / 4]) &
 			    0xffff;
 			if (temp < 4 || temp > NTP_MAXEXTEN || temp % 4
 			    != 0) {
 				sys_badlength++;
 				return;		/* bad MAC length */
 			}
 			authlen += temp;
 			has_mac -= temp;
 		} else {
 			sys_badlength++;
 			return;			/* bad MAC length */
 		}
 	}
+	/*
+	 * If has_mac is < 0 we had a malformed packet.
+	 */
+	if (has_mac < 0) {
+		sys_badlength++;
+		return;		/* bad length */
+	}
 #ifdef OPENSSL
 	pkeyid = tkeyid = 0;
 #endif /* OPENSSL */
 
 	/*
 	 * We have tossed out as many buggy packets as possible early in
 	 * the game to reduce the exposure to a clogging attack. Now we
 	 * have to burn some cycles to find the association and
 	 * authenticate the packet if required. Note that we burn only
 	 * MD5 cycles, again to reduce exposure. There may be no
 	 * matching association and that's okay.
 	 *
 	 * More on the autokey mambo. Normally the local interface is
 	 * found when the association was mobilized with respect to a
 	 * designated remote address. We assume packets arriving from
 	 * the remote address arrive via this interface and the local
 	 * address used to construct the autokey is the unicast address
 	 * of the interface. However, if the sender is a broadcaster,
 	 * the interface broadcast address is used instead.
 	 & Notwithstanding this technobabble, if the sender is a
 	 * multicaster, the broadcast address is null, so we use the
 	 * unicast address anyway. Don't ask.
 	 */
 	peer = findpeer(&rbufp->recv_srcadr, rbufp->dstadr,  hismode,
 	    &retcode);
 	dstadr_sin = &rbufp->dstadr->sin;
 	NTOHL_FP(&pkt->org, &p_org);
 	NTOHL_FP(&pkt->rec, &p_rec);
 	NTOHL_FP(&pkt->xmt, &p_xmt);
 
 	/*
 	 * Authentication is conditioned by three switches:
 	 *
 	 * NOPEER  (RES_NOPEER) do not mobilize an association unless
 	 *         authenticated
 	 * NOTRUST (RES_DONTTRUST) do not allow access unless
 	 *         authenticated (implies NOPEER)
 	 * enable  (sys_authenticate) master NOPEER switch, by default
 	 *         on
 	 *
 	 * The NOPEER and NOTRUST can be specified on a per-client basis
 	 * using the restrict command. The enable switch if on implies
 	 * NOPEER for all clients. There are four outcomes:
 	 *
 	 * NONE    The packet has no MAC.
 	 * OK      the packet has a MAC and authentication succeeds
 	 * ERROR   the packet has a MAC and authentication fails
 	 * CRYPTO  crypto-NAK. The MAC has four octets only.
 	 *
 	 * Note: The AUTH(x, y) macro is used to filter outcomes. If x
 	 * is zero, acceptable outcomes of y are NONE and OK. If x is
 	 * one, the only acceptable outcome of y is OK.
 	 */
 	if (has_mac == 0) {
 		is_authentic = AUTH_NONE; /* not required */
 #ifdef DEBUG
 		if (debug)
 			printf("receive: at %ld %s<-%s mode %d code %d auth %d\n",
 			    current_time, stoa(dstadr_sin),
 			    stoa(&rbufp->recv_srcadr), hismode, retcode,
 			    is_authentic);
 #endif
 	} else if (has_mac == 4) {
 			is_authentic = AUTH_CRYPTO; /* crypto-NAK */
 #ifdef DEBUG
 		if (debug)
 			printf(
 			    "receive: at %ld %s<-%s mode %d code %d keyid %08x len %d mac %d auth %d\n",
 			    current_time, stoa(dstadr_sin),
 			    stoa(&rbufp->recv_srcadr), hismode, retcode,
 			    skeyid, authlen, has_mac, is_authentic);
 #endif
 	} else {
 #ifdef OPENSSL
 		/*
 		 * For autokey modes, generate the session key
 		 * and install in the key cache. Use the socket
 		 * broadcast or unicast address as appropriate.
 		 */
 		if (skeyid > NTP_MAXKEY) {
 		
 			/*
 			 * More on the autokey dance (AKD). A cookie is
 			 * constructed from public and private values.
 			 * For broadcast packets, the cookie is public
 			 * (zero). For packets that match no
 			 * association, the cookie is hashed from the
 			 * addresses and private value. For server
 			 * packets, the cookie was previously obtained
 			 * from the server. For symmetric modes, the
 			 * cookie was previously constructed using an
 			 * agreement protocol; however, should PKI be
 			 * unavailable, we construct a fake agreement as
 			 * the EXOR of the peer and host cookies.
 			 *
 			 * hismode	ephemeral	persistent
 			 * =======================================
 			 * active	0		cookie#
 			 * passive	0%		cookie#
 			 * client	sys cookie	0%
 			 * server	0%		sys cookie
 			 * broadcast	0		0
 			 *
 			 * # if unsync, 0
 			 * % can't happen
 			 */
 			if (hismode == MODE_BROADCAST) {
 
 				/*
 				 * For broadcaster, use the interface
 				 * broadcast address when available;
 				 * otherwise, use the unicast address
 				 * found when the association was
 				 * mobilized. However, if this is from
 				 * the wildcard interface, game over.
 				 */
 				if (crypto_flags && rbufp->dstadr ==
 				    any_interface) {
 					sys_restricted++;
 					return;	     /* no wildcard */
 				}
 				pkeyid = 0;
 				if (!SOCKNUL(&rbufp->dstadr->bcast))
 					dstadr_sin =
 					    &rbufp->dstadr->bcast;
 			} else if (peer == NULL) {
 				pkeyid = session_key(
 				    &rbufp->recv_srcadr, dstadr_sin, 0,
 				    sys_private, 0);
 			} else {
 				pkeyid = peer->pcookie;
 			}
 
 			/*
 			 * The session key includes both the public
 			 * values and cookie. In case of an extension
 			 * field, the cookie used for authentication
 			 * purposes is zero. Note the hash is saved for
 			 * use later in the autokey mambo.
 			 */
 			if (authlen > LEN_PKT_NOMAC && pkeyid != 0) {
 				session_key(&rbufp->recv_srcadr,
 				    dstadr_sin, skeyid, 0, 2);
 				tkeyid = session_key(
 				    &rbufp->recv_srcadr, dstadr_sin,
 				    skeyid, pkeyid, 0);
 			} else {
 				tkeyid = session_key(
 				    &rbufp->recv_srcadr, dstadr_sin,
 				    skeyid, pkeyid, 2);
 			}
 
 		}
 #endif /* OPENSSL */
 
 		/*
 		 * Compute the cryptosum. Note a clogging attack may
 		 * succeed in bloating the key cache. If an autokey,
 		 * purge it immediately, since we won't be needing it
 		 * again. If the packet is authentic, it can mobilize an
 		 * association. Note that there is no key zero.
 		 */
 		if (!authdecrypt(skeyid, (u_int32 *)pkt, authlen,
 		    has_mac)) {
 			is_authentic = AUTH_ERROR;
 			sys_badauth++;
 			return;
 		} else {
 			is_authentic = AUTH_OK;
 		}
 #ifdef OPENSSL
 		if (skeyid > NTP_MAXKEY)
 			authtrust(skeyid, 0);
 #endif /* OPENSSL */
 #ifdef DEBUG
 		if (debug)
 			printf(
 			    "receive: at %ld %s<-%s mode %d code %d keyid %08x len %d mac %d auth %d\n",
 			    current_time, stoa(dstadr_sin),
 			    stoa(&rbufp->recv_srcadr), hismode, retcode,
 			    skeyid, authlen, has_mac, is_authentic);
 #endif
 	}
 
 	/*
 	 * The association matching rules are implemented by a set of
 	 * routines and an association table. A packet matching an
 	 * association is processed by the peer process for that
 	 * association. If there are no errors, an ephemeral association
 	 * is mobilized: a broadcast packet mobilizes a broadcast client
 	 * aassociation; a manycast server packet mobilizes a manycast
 	 * client association; a symmetric active packet mobilizes a
 	 * symmetric passive association.
 	 */
 	switch (retcode) {
 
 	/*
 	 * This is a client mode packet not matching any association. If
 	 * an ordinary client, simply toss a server mode packet back
 	 * over the fence. If a manycast client, we have to work a
 	 * little harder.
 	 */
 	case AM_FXMIT:
 
 		/*
 		 * The vanilla case is when this is not a multicast
 		 * interface. If authentication succeeds, return a
 		 * server mode packet; if not and the key ID is nonzero,
 		 * return a crypto-NAK.
 		 */
 		if (!(rbufp->dstadr->flags & INT_MCASTOPEN)) {
 			if (AUTH(restrict_mask & RES_DONTTRUST,
 			   is_authentic))
 				fast_xmit(rbufp, MODE_SERVER, skeyid,
 				    restrict_mask);
 			else if (is_authentic == AUTH_ERROR)
 				fast_xmit(rbufp, MODE_SERVER, 0,
 				    restrict_mask);
 			return;			/* hooray */
 		}
 
 		/*
 		 * This must be manycast. Do not respond if not
 		 * configured as a manycast server.
 		 */
 		if (!sys_manycastserver) {
 			sys_restricted++;
 			return;			/* not enabled */
 		}
 
 		/*
 		 * Do not respond if unsynchronized or stratum is below
 		 * the floor or at or above the ceiling.
 		 */
 		if (sys_leap == LEAP_NOTINSYNC || sys_stratum <
 		    sys_floor || sys_stratum >= sys_ceiling)
 			return;			/* bad stratum */
 
 		/*
 		 * Do not respond if our stratum is greater than the
 		 * manycaster or it has already synchronized to us.
 		 */
 		if (sys_peer == NULL || hisstratum < sys_stratum ||
 		    (sys_cohort && hisstratum == sys_stratum) ||
 		    rbufp->dstadr->addr_refid == pkt->refid)
 			return;			/* no help */
 
 		/*
 		 * Respond only if authentication succeeds. Don't do a
 		 * crypto-NAK, as that would not be useful.
 		 */
 		if (AUTH(restrict_mask & RES_DONTTRUST, is_authentic))
 			fast_xmit(rbufp, MODE_SERVER, skeyid,
 			    restrict_mask);
 
 		return;				/* hooray */
 
 	/*
 	 * This is a server mode packet returned in response to a client
 	 * mode packet sent to a multicast group address. The origin
 	 * timestamp is a good nonce to reliably associate the reply
 	 * with what was sent. If there is no match, that's curious and
 	 * could be an intruder attempting to clog, so we just ignore
 	 * it.
 	 *
 	 * If the packet is authentic and the manycast association is
 	 * found, we mobilize a client association and copy pertinent
 	 * variables from the manycast association to the new client
 	 * association. If not, just ignore the packet.
 	 *
 	 * There is an implosion hazard at the manycast client, since
 	 * the manycast servers send the server packet immediately. If
 	 * the guy is already here, don't fire up a duplicate.
 	 */
 	case AM_MANYCAST:
 		if (!AUTH(sys_authenticate | (restrict_mask &
 		    (RES_NOPEER | RES_DONTTRUST)), is_authentic))
 			return;			/* bad auth */
 
 		if ((peer2 = findmanycastpeer(rbufp)) == NULL) {
 			sys_restricted++;
 			return;			/* not enabled */
 		}
 		if ((peer = newpeer(&rbufp->recv_srcadr,
 		    rbufp->dstadr, MODE_CLIENT,
 		    hisversion, NTP_MINDPOLL, NTP_MAXDPOLL,
 		    FLAG_IBURST | FLAG_PREEMPT, MDF_UCAST | MDF_ACLNT,
 		    0, skeyid)) == NULL)
 			return;			/* system error */
 
 		/*
 		 * We don't need these, but it warms the billboards.
 		 */
 		peer->ttl = peer2->ttl;
 		break;
 
 	/*
 	 * This is the first packet received from a broadcast server. If
 	 * the packet is authentic and we are enabled as broadcast
 	 * client, mobilize a broadcast client association. We don't
 	 * kiss any frogs here.
 	 */
 	case AM_NEWBCL:
 		if (!AUTH(sys_authenticate | (restrict_mask &
 		    (RES_NOPEER | RES_DONTTRUST)), is_authentic))
 			return;			/* bad auth */
 
 		/*
 		 * Do not respond if unsynchronized or stratum is below
 		 * the floor or at or above the ceiling.
 		 */
 		if (hisleap == LEAP_NOTINSYNC || hisstratum <
 		    sys_floor || hisstratum >= sys_ceiling)
 			return;			/* bad stratum */
 
 		switch (sys_bclient) {
 
 		/*
 		 * If not enabled, just skedaddle.
 		 */
 		case 0:
 			sys_restricted++;
 			return;			/* not enabled */
 
 		/*
 		 * Execute the initial volley in order to calibrate the
 		 * propagation delay and run the Autokey protocol, if
 		 * enabled.
 		 */
 		case 1:
 			if ((peer = newpeer(&rbufp->recv_srcadr,
 			    rbufp->dstadr, MODE_CLIENT, hisversion,
 			    NTP_MINDPOLL, NTP_MAXDPOLL, FLAG_MCAST |
 			    FLAG_IBURST, MDF_BCLNT, 0, skeyid)) ==
 			    NULL)
 				return;		/* system error */
 #ifdef OPENSSL
 			if (skeyid > NTP_MAXKEY)
 				crypto_recv(peer, rbufp);
 #endif /* OPENSSL */
 			return;			/* hooray */
 
 
 		/*
 		 * Do not execute the initial volley.
 		 */
 		case 2:
 #ifdef OPENSSL
 			/*
 			 * If a two-way exchange is not possible,
 			 * neither is Autokey.
 			 */
 			if (skeyid > NTP_MAXKEY) {
 				msyslog(LOG_INFO,
 				    "receive: autokey requires two-way communication");
 				return;		/* no autokey */
 			}
 #endif /* OPENSSL */
 			if ((peer = newpeer(&rbufp->recv_srcadr,
 			    rbufp->dstadr, MODE_BCLIENT, hisversion,
 			    NTP_MINDPOLL, NTP_MAXDPOLL, 0, MDF_BCLNT, 0,
 			    skeyid)) == NULL)
 				return;		/* system error */
 		}
 		break;
 
 	/*
 	 * This is the first packet received from a symmetric active
 	 * peer. If the packet is authentic and the first he sent,
 	 * mobilize a passive association. If not, kiss the frog.
 	 */
 	case AM_NEWPASS:
 
 		/*
 		 * If the inbound packet is correctly authenticated and
 		 * enabled, a symmetric passive association is
 		 * mobilized. If not but correctly authenticated, a
 		 * symmetric active response is sent. If authentication
 		 * fails, send a crypto-NAK packet. 
 		 */
 		if (!AUTH(restrict_mask & RES_DONTTRUST, is_authentic))
 		    {
 			if (is_authentic == AUTH_ERROR)
 				fast_xmit(rbufp, MODE_ACTIVE, 0,
 				    restrict_mask);
 			return;			/* bad auth */
 		}
 		if (!AUTH(sys_authenticate | (restrict_mask &
 		    RES_NOPEER), is_authentic)) {
 			fast_xmit(rbufp, MODE_ACTIVE, skeyid,
 			    restrict_mask);
 			return;			/* hooray */
 		}
 
 		/*
 		 * Do not respond if stratum is below the floor.
 		 */
 		if (hisstratum < sys_floor)
 			return;			/* bad stratum */
 
 		if ((peer = newpeer(&rbufp->recv_srcadr,
 		    rbufp->dstadr, MODE_PASSIVE, hisversion,
 		    NTP_MINDPOLL, NTP_MAXDPOLL, 0, MDF_UCAST, 0,
 		    skeyid)) == NULL)
 			return;			/* system error */
 		break;
 
 	/*
 	 * Process regular packet. Nothing special.
 	 */
 	case AM_PROCPKT:
 		break;
 
 	/*
 	 * A passive packet matches a passive association. This is
 	 * usually the result of reconfiguring a client on the fly. As
 	 * this association might be legitamate and this packet an
 	 * attempt to deny service, just ignore it.
 	 */
 	case AM_ERR:
 		return;
 
 	/*
 	 * For everything else there is the bit bucket.
 	 */
 	default:
 		return;
 	}
 	peer->flash &= ~PKT_TEST_MASK;
 
 	/*
 	 * Next comes a rigorous schedule of timestamp checking. If the
 	 * transmit timestamp is zero, the server is horribly broken.
 	 */
 	if (L_ISZERO(&p_xmt)) {
 		return;				/* read rfc1305 */
 
 	/*
 	 * If the transmit timestamp duplicates a previous one, the
 	 * packet is a replay. This prevents the bad guys from replaying
 	 * the most recent packet, authenticated or not.
 	 */
 	} else if (L_ISEQU(&peer->org, &p_xmt)) {
 		peer->flash |= TEST1;
 		peer->oldpkt++;
 		return;				/* duplicate packet */
 	
 
 	/*
 	 * If this is a broadcast mode packet, skip further checking.
 	 */
 	} else if (hismode != MODE_BROADCAST) {
 		if (L_ISZERO(&p_org))
 			peer->flash |= TEST3;	/* protocol unsynch */
 		else if (!L_ISEQU(&p_org, &peer->xmt))
 			peer->flash |= TEST2;	/* bogus packet */
 	}
 
 	/*
-	 * Update the origin and destination timestamps. If
-	 * unsynchronized or bogus abandon ship. If the crypto machine
+	 * If unsynchronized or bogus abandon ship. If the crypto machine
 	 * breaks, light the crypto bit and plaint the log.
 	 */
-	peer->org = p_xmt;
-	peer->rec = rbufp->recv_time;
 	if (peer->flash & PKT_TEST_MASK) {
 #ifdef OPENSSL
 		if (crypto_flags && (peer->flags & FLAG_SKEY)) {
 			rval = crypto_recv(peer, rbufp);
 			if (rval != XEVNT_OK) {
 				peer_clear(peer, "CRYP");
 				peer->flash |= TEST9; /* crypto error */
 			}
 		}
 #endif /* OPENSSL */
 		return;				/* unsynch */
 	}
 
 	/*
 	 * The timestamps are valid and the receive packet matches the
 	 * last one sent. If the packet is a crypto-NAK, the server
 	 * might have just changed keys. We reset the association
 	 * and restart the protocol.
 	 */
 	if (is_authentic == AUTH_CRYPTO) {
 		peer_clear(peer, "AUTH");
 		return;				/* crypto-NAK */
 
 	/* 
 	 * If the association is authenticated, the key ID is nonzero
 	 * and received packets must be authenticated. This is designed
 	 * to avoid a bait-and-switch attack, which was possible in past
 	 * versions. If symmetric modes, return a crypto-NAK. The peer
 	 * should restart the protocol.
 	 */
-	} else if (!AUTH(peer->keyid || (restrict_mask & RES_DONTTRUST),
-	    is_authentic)) {
+	} else if (!AUTH(peer->keyid || has_mac ||
+	    (restrict_mask & RES_DONTTRUST), is_authentic)) {
 		peer->flash |= TEST5;
-		if (hismode == MODE_ACTIVE || hismode == MODE_PASSIVE)
+		if (has_mac &&
+		    (hismode == MODE_ACTIVE || hismode == MODE_PASSIVE))
 			fast_xmit(rbufp, MODE_ACTIVE, 0, restrict_mask);
 		return;				/* bad auth */
 	}
 
 	/*
 	 * That was hard and I am sweaty, but the packet is squeaky
 	 * clean. Get on with real work.
+	 *
+	 * Update the origin and destination timestamps.
 	 */
+	peer->org = p_xmt;
+	peer->rec = rbufp->recv_time;
+
 	peer->received++;
 	peer->timereceived = current_time;
 	if (is_authentic == AUTH_OK)
 		peer->flags |= FLAG_AUTHENTIC;
 	else
 		peer->flags &= ~FLAG_AUTHENTIC;
 #ifdef OPENSSL
 	/*
 	 * More autokey dance. The rules of the cha-cha are as follows:
 	 *
 	 * 1. If there is no key or the key is not auto, do nothing.
 	 *
 	 * 2. If this packet is in response to the one just previously
 	 *    sent or from a broadcast server, do the extension fields.
 	 *    Otherwise, assume bogosity and bail out.
 	 *
 	 * 3. If an extension field contains a verified signature, it is
 	 *    self-authenticated and we sit the dance.
 	 *
 	 * 4. If this is a server reply, check only to see that the
 	 *    transmitted key ID matches the received key ID.
 	 *
 	 * 5. Check to see that one or more hashes of the current key ID
 	 *    matches the previous key ID or ultimate original key ID
 	 *    obtained from the broadcaster or symmetric peer. If no
 	 *    match, sit the dance and wait for timeout.
 	 *
 	 * In case of crypto error, fire the orchestra and stop dancing.
 	 * This is considered a permanant error, so light the crypto bit
 	 * to suppress further requests. If preemptable or ephemeral,
 	 * scuttle the ship.
 	 */
 	if (crypto_flags && (peer->flags & FLAG_SKEY)) {
 		peer->flash |= TEST8;
 		rval = crypto_recv(peer, rbufp);
 		if (rval != XEVNT_OK) {
 			peer_clear(peer, "CRYP");
 			peer->flash |= TEST9;	/* crypto error */
 			if (peer->flags & FLAG_PREEMPT ||
 			    !(peer->flags & FLAG_CONFIG))
 				unpeer(peer);
 			return;
 
 		} else if (hismode == MODE_SERVER) {
 			if (skeyid == peer->keyid)
 				peer->flash &= ~TEST8;
 		} else if (!(peer->flash & TEST8)) {
 			peer->pkeyid = skeyid;
 		} else if ((ap = (struct autokey *)peer->recval.ptr) !=
 		    NULL) {
 			int i;
 
 			for (i = 0; ; i++) {
 				if (tkeyid == peer->pkeyid ||
 				    tkeyid == ap->key) {
 					peer->flash &= ~TEST8;
 					peer->pkeyid = skeyid;
 					break;
 				}
 				if (i > ap->seq)
 					break;
 				tkeyid = session_key(
 				    &rbufp->recv_srcadr, dstadr_sin,
 				    tkeyid, pkeyid, 0);
 			}
 		}
 		if (!(peer->crypto & CRYPTO_FLAG_PROV)) /* test 9 */
 			peer->flash |= TEST8;	/* not proventic */
 
 		/*
 		 * If the transmit queue is nonempty, clamp the host
 		 * poll interval to the packet poll interval.
 		 */
 		if (peer->cmmd != 0) {
 			peer->ppoll = pkt->ppoll;
 			poll_update(peer, peer->hpoll);
 		}
 	}
 #endif /* OPENSSL */
 
 	/*
 	 * The dance is complete and the flash bits have been lit. Toss
 	 * the packet over the fence for processing, which may light up
 	 * more flashers.
 	 */
 	process_packet(peer, pkt);
 
 	/*
 	 * Well, that was nice. If TEST4 is lit, either the crypto
 	 * machine jammed or a kiss-o'-death packet flew in, either of
 	 * which is fatal.
 	 */
 	if (peer->flash & TEST4) {
 		msyslog(LOG_INFO, "receive: fatal error %04x for %s",
 		    peer->flash, stoa(&peer->srcadr));
 		return;
 	}
 }
 
 
 /*
  * process_packet - Packet Procedure, a la Section 3.4.4 of the
  *	specification. Or almost, at least. If we're in here we have a
  *	reasonable expectation that we will be having a long term
  *	relationship with this host.
  */
 void
 process_packet(
 	register struct peer *peer,
 	register struct pkt *pkt
 	)
 {
 	double	t34, t21;
 	double	p_offset, p_del, p_disp;
 	l_fp	p_rec, p_xmt, p_org, p_reftime;
 	l_fp	ci;
 	u_char	pmode, pleap, pstratum;
 
 	sys_processed++;
 	peer->processed++;
 	p_del = FPTOD(NTOHS_FP(pkt->rootdelay));
 	p_disp = FPTOD(NTOHS_FP(pkt->rootdispersion));
 	NTOHL_FP(&pkt->reftime, &p_reftime);
 	NTOHL_FP(&pkt->rec, &p_rec);
 	NTOHL_FP(&pkt->xmt, &p_xmt);
 	pmode = PKT_MODE(pkt->li_vn_mode);
 	pleap = PKT_LEAP(pkt->li_vn_mode);
 	if (pmode != MODE_BROADCAST)
 		NTOHL_FP(&pkt->org, &p_org);
 	else
 		p_org = peer->rec;
 	pstratum = PKT_TO_STRATUM(pkt->stratum);
 
 	/*
 	 * Test for kiss-o'death packet)
 	 */
 	if (pleap == LEAP_NOTINSYNC && pstratum == STRATUM_UNSPEC) {
 		if (memcmp(&pkt->refid, "DENY", 4) == 0) {
 			peer_clear(peer, "DENY");
 			peer->flash |= TEST4;	/* access denied */
 		}
 	}
 
 	/*
 	 * Capture the header values.
 	 */
 	record_raw_stats(&peer->srcadr, peer->dstadr ? &peer->dstadr->sin : NULL, &p_org,
 	    &p_rec, &p_xmt, &peer->rec);
 	peer->leap = pleap;
 	peer->stratum = min(pstratum, STRATUM_UNSPEC);
 	peer->pmode = pmode;
 	peer->ppoll = pkt->ppoll;
 	peer->precision = pkt->precision;
 	peer->rootdelay = p_del;
 	peer->rootdispersion = p_disp;
 	peer->refid = pkt->refid;		/* network byte order */
 	peer->reftime = p_reftime;
 
 	/*
 	 * Verify the server is synchronized; that is, the leap bits and
 	 * stratum are valid, the root delay and root dispersion are
 	 * valid and the reference timestamp is not later than the
 	 * transmit timestamp.
 	 */
 	if (pleap == LEAP_NOTINSYNC ||		/* test 6 */
 	    pstratum < sys_floor || pstratum >= sys_ceiling)
 		peer->flash |= TEST6;		/* peer not synch */
 	if (p_del < 0 || p_disp < 0 || p_del /	/* test 7 */
 	    2 + p_disp >= MAXDISPERSE || !L_ISHIS(&p_xmt, &p_reftime))
 		peer->flash |= TEST7;		/* bad header */
 
 	/*
 	 * If any tests fail at this point, the packet is discarded.
 	 * Note that some flashers may have already been set in the
 	 * receive() routine.
 	 */
 	if (peer->flash & PKT_TEST_MASK) {
 #ifdef DEBUG
 		if (debug)
 			printf("packet: flash header %04x\n",
 			    peer->flash);
 #endif
 		return;
 	}
 	if (!(peer->reach)) {
 		report_event(EVNT_REACH, peer);
 		peer->timereachable = current_time;
 	}
 	poll_update(peer, peer->hpoll);
 	peer->reach |= 1;
 
 	/*
 	 * For a client/server association, calculate the clock offset,
 	 * roundtrip delay and dispersion. The equations are reordered
 	 * from the spec for more efficient use of temporaries. For a
 	 * broadcast association, offset the last measurement by the
 	 * computed delay during the client/server volley. Note that
 	 * org has been set to the time of last reception. Note the
 	 * computation of dispersion includes the system precision plus
 	 * that due to the frequency error since the origin time.
 	 *
 	 * It is very important to respect the hazards of overflow. The
 	 * only permitted operation on raw timestamps is subtraction,
 	 * where the result is a signed quantity spanning from 68 years
 	 * in the past to 68 years in the future. To avoid loss of
 	 * precision, these calculations are done using 64-bit integer
 	 * arithmetic. However, the offset and delay calculations are
 	 * sums and differences of these first-order differences, which
 	 * if done using 64-bit integer arithmetic, would be valid over
 	 * only half that span. Since the typical first-order
 	 * differences are usually very small, they are converted to 64-
 	 * bit doubles and all remaining calculations done in floating-
 	 * point arithmetic. This preserves the accuracy while retaining
 	 * the 68-year span.
 	 *
 	 * Let t1 = p_org, t2 = p_rec, t3 = p_xmt, t4 = peer->rec:
 	 */
 	ci = p_xmt;			/* t3 - t4 */
 	L_SUB(&ci, &peer->rec);
 	LFPTOD(&ci, t34);
 	ci = p_rec;			/* t2 - t1 */
 	L_SUB(&ci, &p_org);
 	LFPTOD(&ci, t21);
 	ci = peer->rec;			/* t4 - t1 */
 	L_SUB(&ci, &p_org);
 
 	/*
 	 * If running in a broadcast association, the clock offset is
 	 * (t1 - t0) corrected by the one-way delay, but we can't
 	 * measure that directly. Therefore, we start up in MODE_CLIENT
 	 * mode, set FLAG_MCAST and exchange eight messages to determine
 	 * the clock offset. When the last message is sent, we switch to
 	 * MODE_BCLIENT mode. The next broadcast message after that
 	 * computes the broadcast offset and clears FLAG_MCAST.
 	 */
 	if (pmode == MODE_BROADCAST) {
 		p_offset = t34;
 		if (peer->flags & FLAG_MCAST) {
 			peer->estbdelay = peer->offset - p_offset;
 			if (peer->hmode == MODE_CLIENT)
 				return;
 
 			peer->flags &= ~(FLAG_MCAST | FLAG_BURST);
 		}
 		p_offset += peer->estbdelay;
 		p_del = peer->delay;
 		p_disp = 0;
 	} else {
 		p_offset = (t21 + t34) / 2.;
 		p_del = t21 - t34;
 		LFPTOD(&ci, p_disp);
 		p_disp = LOGTOD(sys_precision) +
 		    LOGTOD(peer->precision) + clock_phi * p_disp;
 	}
 	p_del = max(p_del, LOGTOD(sys_precision));
 	clock_filter(peer, p_offset, p_del, p_disp);
 	record_peer_stats(&peer->srcadr, ctlpeerstatus(peer),
 	    peer->offset, peer->delay, peer->disp, peer->jitter);
 }
 
 
 /*
  * clock_update - Called at system process update intervals.
  */
 static void
 clock_update(void)
 {
 	u_char	oleap;
 	u_char	ostratum;
 	double	dtemp;
 
 	/*
 	 * There must be a system peer at this point. If we just changed
 	 * the system peer, but have a newer sample from the old one,
 	 * wait until newer data are available.
 	 */
 	if (sys_poll < sys_peer->minpoll)
 		sys_poll = sys_peer->minpoll;
 	if (sys_poll > sys_peer->maxpoll)
 		sys_poll = sys_peer->maxpoll;
 	poll_update(sys_peer, sys_poll);
 	if (sys_peer->epoch <= sys_clocktime)
 		return;
 
 #ifdef DEBUG
 	if (debug)
 		printf("clock_update: at %ld assoc %d \n", current_time,
 		    peer_associations);
 #endif
 	oleap = sys_leap;
 	ostratum = sys_stratum;
 	switch (local_clock(sys_peer, sys_offset)) {
 
 	/*
 	 * Clock exceeds panic threshold. Life as we know it ends.
 	 */
 	case -1:
 		report_event(EVNT_SYSFAULT, NULL);
 		exit (-1);
 		/* not reached */
 
 	/*
 	 * Clock was stepped. Flush all time values of all peers.
 	 */
 	case 2:
 		clear_all();
 		sys_leap = LEAP_NOTINSYNC;
 		sys_stratum = STRATUM_UNSPEC;
 		sys_peer = NULL;
 		sys_rootdelay = 0;
 		sys_rootdispersion = 0;
 		memcpy(&sys_refid, "STEP", 4);
 		report_event(EVNT_CLOCKRESET, NULL);
 		break;
 
 	/*
 	 * Clock was slewed. Update the system stratum, leap bits, root
 	 * delay, root dispersion, reference ID and reference time. If
 	 * the leap changes, we gotta reroll the keys. Except for
 	 * reference clocks, the minimum dispersion increment is not
 	 * less than sys_mindisp.
 	 */
 	case 1:
 		sys_leap = leap_next;
 		sys_stratum = min(sys_peer->stratum + 1,
 		    STRATUM_UNSPEC);
 		sys_reftime = sys_peer->rec;
 
 		/*
 		 * In orphan mode the stratum defaults to the orphan
 		 * stratum. The root delay is set to a random value
 		 * generated at startup. The root dispersion is set from
 		 * the peer dispersion; the peer root dispersion is
 		 * ignored.
 		 */
 		dtemp = sys_peer->disp + clock_phi * (current_time -
 		    sys_peer->update) + sys_jitter +
 		    fabs(sys_peer->offset);
 #ifdef REFCLOCK
 		if (!(sys_peer->flags & FLAG_REFCLOCK) && dtemp <
 		    sys_mindisp)
 			dtemp = sys_mindisp;
 #else
 		if (dtemp < sys_mindisp)
 			dtemp = sys_mindisp;
 #endif /* REFCLOCK */
 		if (sys_stratum >= sys_orphan) {
 			sys_stratum = sys_orphan;
 			sys_rootdelay = sys_peer->delay;
 			sys_rootdispersion = dtemp;
 		} else {
 			sys_rootdelay = sys_peer->delay +
 			    sys_peer->rootdelay;
 			sys_rootdispersion = dtemp +
 			    sys_peer->rootdispersion;
 		}
 		if (oleap == LEAP_NOTINSYNC) {
 			report_event(EVNT_SYNCCHG, NULL);
 #ifdef OPENSSL
 			expire_all();
 			crypto_update();
 #endif /* OPENSSL */
 		}
 		break;
 	/*
 	 * Popcorn spike or step threshold exceeded. Pretend it never
 	 * happened.
 	 */
 	default:
 		break;
 	}
 	if (ostratum != sys_stratum)
 		report_event(EVNT_PEERSTCHG, NULL);
 }
 
 
 /*
  * poll_update - update peer poll interval
  */
 void
 poll_update(
 	struct peer *peer,
 	int	mpoll
 	)
 {
 	int	hpoll;
 
 	/*
 	 * This routine figures out when the next poll should be sent.
 	 * That turns out to be wickedly complicated. The big problem is
 	 * that sometimes the time for the next poll is in the past.
 	 * Watch out for races here between the receive process and the
 	 * poll process. The key assertion is that, if nextdate equals
 	 * current_time, the call is from the poll process; otherwise,
 	 * it is from the receive process.
 	 *
 	 * First, bracket the poll interval according to the type of
 	 * association and options. If a fixed interval is configured,
 	 * use minpoll. This primarily is for reference clocks, but
 	 * works for any association.
 	 */
 	if (peer->flags & FLAG_FIXPOLL) {
 		hpoll = peer->minpoll;
 
 	/*
 	 * The ordinary case; clamp the poll interval between minpoll
 	 * and maxpoll.
 	 */
 	} else {
 		hpoll = max(min(peer->maxpoll, mpoll), peer->minpoll);
 	}
 #ifdef OPENSSL
 	/*
 	 * Bit of crass arrogance at this point. If the poll interval
 	 * has changed and we have a keylist, the lifetimes in the
 	 * keylist are probably bogus. In this case purge the keylist
 	 * and regenerate it later.
 	 */
 	if (hpoll != peer->hpoll)
 		key_expire(peer);
 #endif /* OPENSSL */
 	peer->hpoll = hpoll;
 
 	/*
 	 * Now we figure out if there is an override. If during the
 	 * crypto protocol and a message is pending, make it wait not
 	 * more than two seconds.
 	 */
 #ifdef OPENSSL
 	if (peer->cmmd != NULL && (sys_leap != LEAP_NOTINSYNC ||
 	    peer->crypto)) {
 		peer->nextdate = current_time + RESP_DELAY;
 
 	/*
 	 * If we get called from the receive routine while a burst is
 	 * pending, just slink away. If from the poll routine and a
 	 * reference clock or a pending crypto response, delay for one
 	 * second. If this is the first sent in a burst, wait for the
 	 * modem to come up. For others in the burst, delay two seconds.
 	 */
 	} else if (peer->burst > 0) {
 #else /* OPENSSL */
 	if (peer->burst > 0) {
 #endif /* OPENSSL */
 		if (peer->nextdate != current_time)
 			return;
 #ifdef REFCLOCK
 		else if (peer->flags & FLAG_REFCLOCK)
 			peer->nextdate += RESP_DELAY;
 #endif /* REFCLOCK */
 		else if (peer->flags & (FLAG_IBURST | FLAG_BURST) &&
 		    peer->burst == NTP_BURST)
 			peer->nextdate += sys_calldelay;
 		else
 			peer->nextdate += BURST_DELAY;
 	/*
 	 * The ordinary case; use the minimum of the host and peer
 	 * intervals, but not less than minpoll. In other words,
 	 * oversampling is okay but understampling is evil.
 	 */
 	} else {
 		peer->nextdate = peer->outdate +
 		    RANDPOLL(max(min(peer->ppoll, hpoll),
 		    peer->minpoll));
 	}
 
 	/*
 	 * If the time for the next poll has already happened, bring it
 	 * up to the next second after this one. This way the only way
 	 * to get nexdate == current time is from the poll routine.
 	 */
 	if (peer->nextdate <= current_time)
 		peer->nextdate = current_time + 1;
 #ifdef DEBUG
 	if (debug > 1)
 		printf("poll_update: at %lu %s flags %04x poll %d burst %d last %lu next %lu\n",
 		    current_time, ntoa(&peer->srcadr), peer->flags,
 		    peer->hpoll, peer->burst, peer->outdate,
 		    peer->nextdate);
 #endif
 }
 
 /*
  * peer_crypto_clear - discard crypto information
  */
 void
 peer_crypto_clear(
 		  struct peer *peer
 		  )
 {
 	/*
 	 * If cryptographic credentials have been acquired, toss them to
 	 * Valhalla. Note that autokeys are ephemeral, in that they are
 	 * tossed immediately upon use. Therefore, the keylist can be
 	 * purged anytime without needing to preserve random keys. Note
 	 * that, if the peer is purged, the cryptographic variables are
 	 * purged, too. This makes it much harder to sneak in some
 	 * unauthenticated data in the clock filter.
 	 */
 	DPRINTF(1, ("peer_crypto_clear: at %ld next %ld assoc ID %d\n",
 		    current_time, peer->nextdate, peer->associd));
 
 #ifdef OPENSSL
 	peer->assoc = 0;
 	peer->crypto = 0;
 
 	if (peer->pkey != NULL)
 		EVP_PKEY_free(peer->pkey);
 	peer->pkey = NULL;
 
 	peer->digest = NULL;	/* XXX MEMLEAK? check whether this needs to be freed in any way - never was freed */
 
 	if (peer->subject != NULL)
 		free(peer->subject);
 	peer->subject = NULL;
 
 	if (peer->issuer != NULL)
 		free(peer->issuer);
 	peer->issuer = NULL;
 
 	peer->pkeyid = 0;
 
 	peer->pcookie = 0;
 
 	if (peer->ident_pkey != NULL)
 		EVP_PKEY_free(peer->ident_pkey);
 	peer->ident_pkey = NULL;
 	
 	memset(&peer->fstamp, 0, sizeof(peer->fstamp));
 
 	if (peer->iffval != NULL)
 		BN_free(peer->iffval);
 	peer->iffval = NULL;
 
 	if (peer->grpkey != NULL)
 		BN_free(peer->grpkey);
 	peer->grpkey = NULL;
 
 	value_free(&peer->cookval);
 	value_free(&peer->recval);
 
 	if (peer->cmmd != NULL) {
 		free(peer->cmmd);
 		peer->cmmd = NULL;
 	}
 
 	key_expire(peer);
 
 	value_free(&peer->encrypt);
 #endif /* OPENSSL */
 }
 
 /*
  * peer_clear - clear peer filter registers.  See Section 3.4.8 of the spec.
  */
 void
 peer_clear(
 	struct peer *peer,		/* peer structure */
 	char	*ident			/* tally lights */
 	)
 {
 	int	i;
 
 	peer_crypto_clear(peer);
 	
 	if (peer == sys_peer)
 		sys_peer = NULL;
 
 	/*
 	 * Wipe the association clean and initialize the nonzero values.
 	 */
 	memset(CLEAR_TO_ZERO(peer), 0, LEN_CLEAR_TO_ZERO);
 	peer->estbdelay = sys_bdelay;
 	peer->ppoll = peer->maxpoll;
 	peer->hpoll = peer->minpoll;
 	peer->disp = MAXDISPERSE;
 	peer->jitter = LOGTOD(sys_precision);
 	for (i = 0; i < NTP_SHIFT; i++) {
 		peer->filter_order[i] = i;
 		peer->filter_disp[i] = MAXDISPERSE;
 	}
 #ifdef REFCLOCK
 	if (!(peer->flags & FLAG_REFCLOCK)) {
 		peer->leap = LEAP_NOTINSYNC;
 		peer->stratum = STRATUM_UNSPEC;
 		memcpy(&peer->refid, ident, 4);
 	}
 #else
 	peer->leap = LEAP_NOTINSYNC;
 	peer->stratum = STRATUM_UNSPEC;
 	memcpy(&peer->refid, ident, 4);
 #endif /* REFCLOCK */
 
 	/*
 	 * During initialization use the association count to spread out
 	 * the polls at one-second intervals. Othersie, randomize over
 	 * the minimum poll interval in order to avoid broadcast
 	 * implosion.
 	 */
 	peer->nextdate = peer->update = peer->outdate = current_time;
 	if (initializing)
 		peer->nextdate += peer_associations;
 	else if (peer->hmode == MODE_PASSIVE)
 		peer->nextdate += RESP_DELAY;
 	else
 		peer->nextdate += (ntp_random() & ((1 << NTP_MINDPOLL) -
 		    1));
 
 	DPRINTF(1, ("peer_clear: at %ld next %ld assoc ID %d refid %s\n",
 		    current_time, peer->nextdate, peer->associd, ident));
 }
 
 
 /*
  * clock_filter - add incoming clock sample to filter register and run
  *		  the filter procedure to find the best sample.
  */
 void
 clock_filter(
 	struct peer *peer,		/* peer structure pointer */
 	double	sample_offset,		/* clock offset */
 	double	sample_delay,		/* roundtrip delay */
 	double	sample_disp		/* dispersion */
 	)
 {
 	double	dst[NTP_SHIFT];		/* distance vector */
 	int	ord[NTP_SHIFT];		/* index vector */
 	int	i, j, k, m;
 	double	dtemp, etemp;
 
 	/*
 	 * Shift the new sample into the register and discard the oldest
 	 * one. The new offset and delay come directly from the
 	 * timestamp calculations. The dispersion grows from the last
 	 * outbound packet or reference clock update to the present time
 	 * and increased by the sum of the peer precision and the system
 	 * precision. The delay can sometimes swing negative due to
 	 * frequency skew, so it is clamped non-negative.
 	 */
 	j = peer->filter_nextpt;
 	peer->filter_offset[j] = sample_offset;
 	peer->filter_delay[j] = max(0, sample_delay);
 	peer->filter_disp[j] = sample_disp;
 	peer->filter_epoch[j] = current_time;
 	j = (j + 1) % NTP_SHIFT;
 	peer->filter_nextpt = j;
 
 	/*
 	 * Update dispersions since the last update and at the same
 	 * time initialize the distance and index lists. The distance
 	 * list uses a compound metric. If the sample is valid and
 	 * younger than the minimum Allan intercept, use delay;
 	 * otherwise, use biased dispersion.
 	 */
 	dtemp = clock_phi * (current_time - peer->update);
 	peer->update = current_time;
 	for (i = NTP_SHIFT - 1; i >= 0; i--) {
 		if (i != 0)
 			peer->filter_disp[j] += dtemp;
 		if (peer->filter_disp[j] >= MAXDISPERSE) 
 			peer->filter_disp[j] = MAXDISPERSE;
 		if (peer->filter_disp[j] >= MAXDISPERSE)
 			dst[i] = MAXDISPERSE;
 		else if (peer->update - peer->filter_epoch[j] >
 		    allan_xpt)
 			dst[i] = sys_maxdist + peer->filter_disp[j];
 		else
 			dst[i] = peer->filter_delay[j];
 		ord[i] = j;
 		j++; j %= NTP_SHIFT;
 	}
 
         /*
 	 * If the clock discipline has stabilized, sort the samples in
 	 * both lists by distance. Note, we do not displace a higher
 	 * distance sample by a lower distance one unless lower by at
 	 * least the precision.  
 	 */
 	if (state == 4) {
 		for (i = 1; i < NTP_SHIFT; i++) {
 			for (j = 0; j < i; j++) {
 				if (dst[j] > dst[i] +
 				    LOGTOD(sys_precision)) {
 					k = ord[j];
 					ord[j] = ord[i];
 					ord[i] = k;
 					etemp = dst[j];
 					dst[j] = dst[i];
 					dst[i] = etemp;
 				}
 			}
 		}
 	}
 
 	/*
 	 * Copy the index list to the association structure so ntpq
 	 * can see it later. Prune the distance list to samples less
 	 * than max distance, but keep at least two valid samples for
 	 * jitter calculation.
 	 */
 	m = 0;
 	for (i = 0; i < NTP_SHIFT; i++) {
 		peer->filter_order[i] = (u_char) ord[i];
 		if (dst[i] >= MAXDISPERSE || (m >= 2 && dst[i] >=
 		    sys_maxdist))
 			continue;
 		m++;
 	}
 	
 	/*
 	 * Compute the dispersion and jitter. The dispersion is weighted
 	 * exponentially by NTP_FWEIGHT (0.5) so it is normalized close
 	 * to 1.0. The jitter is the RMS differences relative to the
 	 * lowest delay sample. If no acceptable samples remain in the
 	 * shift register, quietly tiptoe home leaving only the
 	 * dispersion.
 	 */
 	peer->disp = peer->jitter = 0;
 	k = ord[0];
 	for (i = NTP_SHIFT - 1; i >= 0; i--) {
 		j = ord[i];
 		peer->disp = NTP_FWEIGHT * (peer->disp +
 		    peer->filter_disp[j]);
 		if (i < m)
 			peer->jitter += DIFF(peer->filter_offset[j],
 			    peer->filter_offset[k]);
 	}
 
 	/*
 	 * If no acceptable samples remain in the shift register,
 	 * quietly tiptoe home leaving only the dispersion. Otherwise,
 	 * save the offset, delay and jitter. Note the jitter must not
 	 * be less than the precision.
 	 */
 	if (m == 0)
 		return;
 
 	etemp = fabs(peer->offset - peer->filter_offset[k]);
 	peer->offset = peer->filter_offset[k];
 	peer->delay = peer->filter_delay[k];
 	if (m > 1)
 		peer->jitter /= m - 1;
 	peer->jitter = max(SQRT(peer->jitter), LOGTOD(sys_precision));
 
 	/*
 	 * A new sample is useful only if it is younger than the last
 	 * one used. Note the order is FIFO if the clock discipline has
 	 * not stabilized.
 	 */
 	if (peer->filter_epoch[k] <= peer->epoch) {
 #ifdef DEBUG
 		if (debug)
 			printf("clock_filter: discard %lu\n",
 			    peer->epoch - peer->filter_epoch[k]);
 #endif
 		return;
 	}
 
 	/*
 	 * If the difference between the last offset and the current one
 	 * exceeds the jitter by CLOCK_SGATE and the interval since the
 	 * last update is less than twice the system poll interval,
 	 * consider the update a popcorn spike and ignore it.
 	 */
 	if (etemp > CLOCK_SGATE * peer->jitter && m > 1 &&
 	    peer->filter_epoch[k] - peer->epoch < 2. *
 	    ULOGTOD(sys_poll)) {
 #ifdef DEBUG
 		if (debug)
 			printf("clock_filter: popcorn %.6f %.6f\n",
 			    etemp, dtemp);
 #endif
 		return;
 	}
 
 	/*
 	 * The mitigated sample statistics are saved for later
 	 * processing. If not in a burst, tickle the select.
 	 */
 	peer->epoch = peer->filter_epoch[k];
 #ifdef DEBUG
 	if (debug)
 		printf(
 		    "clock_filter: n %d off %.6f del %.6f dsp %.6f jit %.6f, age %lu\n",
 		    m, peer->offset, peer->delay, peer->disp,
 		    peer->jitter, current_time - peer->epoch);
 #endif
 	if (peer->burst == 0 || sys_leap == LEAP_NOTINSYNC)
 		clock_select();
 }
 
 
 /*
  * clock_select - find the pick-of-the-litter clock
  *
  * LOCKCLOCK: If the local clock is the prefer peer, it will always be
  * enabled, even if declared falseticker, (2) only the prefer peer can
  * be selected as the system peer, (3) if the external source is down,
  * the system leap bits are set to 11 and the stratum set to infinity.
  */
 void
 clock_select(void)
 {
 	struct peer *peer;
 	int	i, j, k, n;
 	int	nlist, nl3;
 
 	int	allow, osurv;
 	double	d, e, f, g;
 	double	high, low;
 	double	synch[NTP_MAXASSOC], error[NTP_MAXASSOC];
 	struct peer *osys_peer;
 	struct peer *typeacts = NULL;
 	struct peer *typelocal = NULL;
 	struct peer *typesystem = NULL;
 
 	static int list_alloc = 0;
 	static struct endpoint *endpoint = NULL;
 	static int *indx = NULL;
 	static struct peer **peer_list = NULL;
 	static u_int endpoint_size = 0;
 	static u_int indx_size = 0;
 	static u_int peer_list_size = 0;
 
 	/*
 	 * Initialize and create endpoint, index and peer lists big
 	 * enough to handle all associations.
 	 */
 	osys_peer = sys_peer;
 	sys_peer = NULL;
 	sys_pps = NULL;
 	sys_prefer = NULL;
 	osurv = sys_survivors;
 	sys_survivors = 0;
 #ifdef LOCKCLOCK
 	sys_leap = LEAP_NOTINSYNC;
 	sys_stratum = STRATUM_UNSPEC;
 	memcpy(&sys_refid, "DOWN", 4);
 #endif /* LOCKCLOCK */
 	nlist = 0;
 	for (n = 0; n < NTP_HASH_SIZE; n++)
 		nlist += peer_hash_count[n];
 	if (nlist > list_alloc) {
 		if (list_alloc > 0) {
 			free(endpoint);
 			free(indx);
 			free(peer_list);
 		}
 		while (list_alloc < nlist) {
 			list_alloc += 5;
 			endpoint_size += 5 * 3 * sizeof(*endpoint);
 			indx_size += 5 * 3 * sizeof(*indx);
 			peer_list_size += 5 * sizeof(*peer_list);
 		}
 		endpoint = (struct endpoint *)emalloc(endpoint_size);
 		indx = (int *)emalloc(indx_size);
 		peer_list = (struct peer **)emalloc(peer_list_size);
 	}
 
 	/*
 	 * Initially, we populate the island with all the rifraff peers
 	 * that happen to be lying around. Those with seriously
 	 * defective clocks are immediately booted off the island. Then,
 	 * the falsetickers are culled and put to sea. The truechimers
 	 * remaining are subject to repeated rounds where the most
 	 * unpopular at each round is kicked off. When the population
 	 * has dwindled to sys_minclock, the survivors split a million
 	 * bucks and collectively crank the chimes.
 	 */
 	nlist = nl3 = 0;	/* none yet */
 	for (n = 0; n < NTP_HASH_SIZE; n++) {
 		for (peer = peer_hash[n]; peer != NULL; peer =
 		    peer->next) {
 			peer->flags &= ~FLAG_SYSPEER;
 			peer->status = CTL_PST_SEL_REJECT;
 
 			/*
 			 * Leave the island immediately if the peer is
 			 * unfit to synchronize.
 			 */
 			if (peer_unfit(peer))
 				continue;
 
 			/*
 			 * Don't allow the local clock or modem drivers
 			 * in the kitchen at this point, unless the
 			 * prefer peer. Do that later, but only if
 			 * nobody else is around. These guys are all
 			 * configured, so we never throw them away.
 			 */
 #ifdef REFCLOCK
 			if (peer->refclktype == REFCLK_LOCALCLOCK
 #if defined(VMS) && defined(VMS_LOCALUNIT)
 			/* wjm: VMS_LOCALUNIT taken seriously */
 			    && REFCLOCKUNIT(&peer->srcadr) !=
 			    VMS_LOCALUNIT
 #endif	/* VMS && VMS_LOCALUNIT */
 				) {
 				typelocal = peer;
 #ifndef LOCKCLOCK
 				if (!(peer->flags & FLAG_PREFER))
 					continue; /* no local clock */
 #endif /* LOCKCLOCK */
 			}
 			if (peer->sstclktype == CTL_SST_TS_TELEPHONE) {
 				typeacts = peer;
 				if (!(peer->flags & FLAG_PREFER))
 					continue; /* no acts */
 			}
 #endif /* REFCLOCK */
 
 			/*
 			 * If we get this far, the peer can stay on the
 			 * island, but does not yet have the immunity
 			 * idol.
 			 */
 			peer->status = CTL_PST_SEL_SANE;
 			peer_list[nlist++] = peer;
 
 			/*
 			 * Insert each interval endpoint on the sorted
 			 * list.
 			 */
 			e = peer->offset;	 /* Upper end */
 			f = root_distance(peer);
 			e = e + f;
 			for (i = nl3 - 1; i >= 0; i--) {
 				if (e >= endpoint[indx[i]].val)
 					break;
 
 				indx[i + 3] = indx[i];
 			}
 			indx[i + 3] = nl3;
 			endpoint[nl3].type = 1;
 			endpoint[nl3++].val = e;
 
 			e = e - f;		/* Center point */
 			for (; i >= 0; i--) {
 				if (e >= endpoint[indx[i]].val)
 					break;
 
 				indx[i + 2] = indx[i];
 			}
 			indx[i + 2] = nl3;
 			endpoint[nl3].type = 0;
 			endpoint[nl3++].val = e;
 
 			e = e - f;		/* Lower end */
 			for (; i >= 0; i--) {
 				if (e >= endpoint[indx[i]].val)
 					break;
 
 				indx[i + 1] = indx[i];
 			}
 			indx[i + 1] = nl3;
 			endpoint[nl3].type = -1;
 			endpoint[nl3++].val = e;
 		}
 	}
 #ifdef DEBUG
 	if (debug > 2)
 		for (i = 0; i < nl3; i++)
 			printf("select: endpoint %2d %.6f\n",
 			   endpoint[indx[i]].type,
 			   endpoint[indx[i]].val);
 #endif
 	/*
 	 * This is the actual algorithm that cleaves the truechimers
 	 * from the falsetickers. The original algorithm was described
 	 * in Keith Marzullo's dissertation, but has been modified for
 	 * better accuracy.
 	 *
 	 * Briefly put, we first assume there are no falsetickers, then
 	 * scan the candidate list first from the low end upwards and
 	 * then from the high end downwards. The scans stop when the
 	 * number of intersections equals the number of candidates less
 	 * the number of falsetickers. If this doesn't happen for a
 	 * given number of falsetickers, we bump the number of
 	 * falsetickers and try again. If the number of falsetickers
 	 * becomes equal to or greater than half the number of
 	 * candidates, the Albanians have won the Byzantine wars and
 	 * correct synchronization is not possible.
 	 *
 	 * Here, nlist is the number of candidates and allow is the
 	 * number of falsetickers. Upon exit, the truechimers are the
 	 * susvivors with offsets not less than low and not greater than
 	 * high. There may be none of them.
 	 */
 	low = 1e9;
 	high = -1e9;
 	for (allow = 0; 2 * allow < nlist; allow++) {
 		int	found;
 
 		/*
 		 * Bound the interval (low, high) as the largest
 		 * interval containing points from presumed truechimers.
 		 */
 		found = 0;
 		n = 0;
 		for (i = 0; i < nl3; i++) {
 			low = endpoint[indx[i]].val;
 			n -= endpoint[indx[i]].type;
 			if (n >= nlist - allow)
 				break;
 			if (endpoint[indx[i]].type == 0)
 				found++;
 		}
 		n = 0;
 		for (j = nl3 - 1; j >= 0; j--) {
 			high = endpoint[indx[j]].val;
 			n += endpoint[indx[j]].type;
 			if (n >= nlist - allow)
 				break;
 			if (endpoint[indx[j]].type == 0)
 				found++;
 		}
 
 		/*
 		 * If the number of candidates found outside the
 		 * interval is greater than the number of falsetickers,
 		 * then at least one truechimer is outside the interval,
 		 * so go around again. This is what makes this algorithm
 		 * different than Marzullo's.
 		 */
 		if (found > allow)
 			continue;
 
 		/*
 		 * If an interval containing truechimers is found, stop.
 		 * If not, increase the number of falsetickers and go
 		 * around again.
 		 */
 		if (high > low)
 			break;
 	}
 
 	/*
 	 * Clustering algorithm. Construct candidate list in order first
 	 * by stratum then by root distance, but keep only the best
 	 * NTP_MAXASSOC of them. Scan the list to find falsetickers, who
 	 * leave the island immediately. The TRUE peer is always a
 	 * truechimer. We must leave at least one peer to collect the
 	 * million bucks. If in orphan mode, rascals found with lower
 	 * stratum are guaranteed a seat on the bus.
 	 */
 	j = 0;
 	for (i = 0; i < nlist; i++) {
 		peer = peer_list[i];
 		if (nlist > 1 && (peer->offset <= low || peer->offset >=
 		    high) && !(peer->flags & FLAG_TRUE) &&
 		    !(sys_stratum >= sys_orphan && peer->stratum <
 		    sys_orphan))
 			continue;
 
 		peer->status = CTL_PST_SEL_DISTSYSPEER;
 
 		/*
 		 * The order metric is formed from the stratum times
 		 * max distance (1.) plus the root distance. It strongly
 		 * favors the lowest stratum, but a higher stratum peer
 		 * can capture the clock if the low stratum dominant
 		 * hasn't been heard for awhile.
 		 */
 		d = root_distance(peer) + peer->stratum * sys_maxdist;
 		if (j >= NTP_MAXASSOC) {
 			if (d >= synch[j - 1])
 				continue;
 			else
 				j--;
 		}
 		for (k = j; k > 0; k--) {
 			if (d >= synch[k - 1])
 				break;
 
 			peer_list[k] = peer_list[k - 1];
 			error[k] = error[k - 1];
 			synch[k] = synch[k - 1];
 		}
 		peer_list[k] = peer;
 		error[k] = peer->jitter;
 		synch[k] = d;
 		j++;
 	}
 	nlist = j;
 
 	/*
 	 * If no survivors remain at this point, check if the local
 	 * clock or modem drivers have been found. If so, nominate one
 	 * of them as the only survivor. Otherwise, give up and leave
 	 * the island to the rats.
 	 */
 	if (nlist == 0) {
 		if (typeacts != 0) {
 			typeacts->status = CTL_PST_SEL_DISTSYSPEER;
 			peer_list[0] = typeacts;
 			nlist = 1;
 		} else if (typelocal != 0) {
 			typelocal->status = CTL_PST_SEL_DISTSYSPEER;
 			peer_list[0] = typelocal;
 			nlist = 1;
 		} else {
 			if (osys_peer != NULL) {
 				NLOG(NLOG_SYNCSTATUS)
 				    msyslog(LOG_INFO,
 				    "no servers reachable");
 				report_event(EVNT_PEERSTCHG, NULL);
 			}
 		}
 	}
 
 	/*
 	 * We can only trust the survivors if the number of candidates
 	 * sys_minsane is at least the number required to detect and
 	 * cast out one falsticker. For the Byzantine agreement
 	 * algorithm used here, that number is 4; however, the default
 	 * sys_minsane is 1 to speed initial synchronization. Careful
 	 * operators will tinker a higher value and use at least that
 	 * number of synchronization sources.
 	 */
 	if (nlist < sys_minsane)
 		return;
 
 	for (i = 0; i < nlist; i++)
 		peer_list[i]->status = CTL_PST_SEL_SELCAND;
 
 	/*
 	 * Now, vote outlyers off the island by select jitter weighted
 	 * by root distance. Continue voting as long as there are more
 	 * than sys_minclock survivors and the minimum select jitter is
 	 * greater than the maximum peer jitter. Stop if we are about to
 	 * discard a TRUE or PREFER  peer, who of course has the
 	 * immunity idol.
 	 */
 	while (1) {
 		d = 1e9;
 		e = -1e9;
 		f = g = 0;
 		k = 0;
 		for (i = 0; i < nlist; i++) {
 			if (error[i] < d)
 				d = error[i];
 			f = 0;
 			if (nlist > 1) {
 				for (j = 0; j < nlist; j++)
 					f += DIFF(peer_list[j]->offset,
 					    peer_list[i]->offset);
 				f = SQRT(f / (nlist - 1));
 			}
 			if (f * synch[i] > e) {
 				g = f;
 				e = f * synch[i];
 				k = i;
 			}
 		}
 		f = max(f, LOGTOD(sys_precision));
 		if (nlist <= sys_minclock || f <= d ||
 		    peer_list[k]->flags & (FLAG_TRUE | FLAG_PREFER))
 			break;
 #ifdef DEBUG
 		if (debug > 2)
 			printf(
 			    "select: drop %s select %.6f jitter %.6f\n",
 			    ntoa(&peer_list[k]->srcadr), g, d);
 #endif
 		for (j = k + 1; j < nlist; j++) {
 			peer_list[j - 1] = peer_list[j];
 			error[j - 1] = error[j];
 		}
 		nlist--;
 	}
 
 	/*
 	 * What remains is a list usually not greater than sys_minclock
 	 * peers. We want only a peer at the lowest stratum to become
 	 * the system peer, although all survivors are eligible for the
 	 * combining algorithm. Consider each peer in turn and OR the
 	 * leap bits on the assumption that, if some of them honk
 	 * nonzero bits, they must know what they are doing. Check for
 	 * prefer and pps peers at any stratum. Note that the head of
 	 * the list is at the lowest stratum and that unsynchronized
 	 * peers cannot survive this far.
 	 */
 	leap_next = 0;
 	for (i = 0; i < nlist; i++) {
 		peer = peer_list[i];
 		sys_survivors++;
 		leap_next |= peer->leap;
 		peer->status = CTL_PST_SEL_SYNCCAND;
 		if (peer->flags & FLAG_PREFER)
 			sys_prefer = peer;
 		if (peer == osys_peer)
 			typesystem = peer;
 #ifdef REFCLOCK
 		if (peer->refclktype == REFCLK_ATOM_PPS)
 			sys_pps = peer;
 #endif /* REFCLOCK */
 #if DEBUG
 		if (debug > 1)
 			printf("cluster: survivor %s metric %.6f\n",
 			    ntoa(&peer_list[i]->srcadr), synch[i]);
 #endif
 	}
 
 	/*
 	 * Anticlockhop provision. Keep the current system peer if it is
 	 * a survivor but not first in the list. But do that only HOPPER
 	 * times.
 	 */
 	if (osys_peer == NULL || typesystem == NULL || typesystem ==
 	    peer_list[0] || sys_hopper > sys_maxhop) {
 		typesystem = peer_list[0];
 		sys_hopper = 0;
 	} else {
 		peer->selbroken++;
 	}
 
 	/*
 	 * Mitigation rules of the game. There are several types of
 	 * peers that can be selected here: (1) orphan, (2) prefer peer
 	 * (flag FLAG_PREFER) (3) pps peers (type REFCLK_ATOM_PPS), (4)
 	 * the existing system peer, if any, and (5) the head of the
 	 * survivor list.
 	 */
 	if (typesystem->stratum >= sys_orphan) {
 
 		/*
 		 * If in orphan mode, choose the system peer. If the
 		 * lowest distance, we are the orphan parent and the
 		 * offset is zero.
 		 */
 		sys_peer = typesystem;
 		sys_peer->status = CTL_PST_SEL_SYSPEER;
 		if (sys_orphandelay < sys_peer->rootdelay) {
 			sys_offset = 0;
 			sys_refid = htonl(LOOPBACKADR);
 		} else {
 			sys_offset = sys_peer->offset;
 			sys_refid = addr2refid(&sys_peer->srcadr);
 		}
 		sys_jitter = LOGTOD(sys_precision);
 #ifdef DEBUG
 		if (debug > 1)
 			printf("select: orphan offset %.6f\n",
 			    sys_offset);
 #endif
 	} else if (sys_prefer) {
 
 		/*
 		 * If a pps peer is present, choose it; otherwise,
 		 * choose the prefer peer.
 		 */
 		if (sys_pps) {
 			sys_peer = sys_pps;
 			sys_peer->status = CTL_PST_SEL_PPS;
 			sys_offset = sys_peer->offset;
 			if (!pps_control)
 				NLOG(NLOG_SYSEVENT)
 				    msyslog(LOG_INFO,
 				    "pps sync enabled");
 			pps_control = current_time;
 #ifdef DEBUG
 			if (debug > 1)
 				printf("select: pps offset %.6f\n",
 				    sys_offset);
 #endif
 		} else {
 			sys_peer = sys_prefer;
 			sys_peer->status = CTL_PST_SEL_SYSPEER;
 			sys_offset = sys_peer->offset;
 #ifdef DEBUG
 			if (debug > 1)
 				printf("select: prefer offset %.6f\n",
 				    sys_offset);
 #endif
 		}
 		if (sys_peer->stratum == STRATUM_REFCLOCK ||
 		    sys_peer->stratum == STRATUM_UNSPEC)
 			sys_refid = sys_peer->refid;
 		else
 			sys_refid = addr2refid(&sys_peer->srcadr);
 		sys_jitter = sys_peer->jitter;
 	} else {
 
 		/*
 		 * Otherwise, choose the anticlockhopper.
 		 */ 
 		sys_peer = typesystem;
 		sys_peer->status = CTL_PST_SEL_SYSPEER;
 		clock_combine(peer_list, nlist);
 		if (sys_peer->stratum == STRATUM_REFCLOCK ||
 		    sys_peer->stratum == STRATUM_UNSPEC)
 			sys_refid = sys_peer->refid;
 		else
 			sys_refid = addr2refid(&sys_peer->srcadr);
 		sys_jitter = SQRT(SQUARE(sys_peer->jitter) +
 		    SQUARE(sys_jitter));
 #ifdef DEBUG
 		if (debug > 1)
 			printf("select: combine offset %.6f\n",
 			   sys_offset);
 #endif
 	}
 
 	/*
 	 * We have found the alpha male.
 	 */
 	sys_peer->flags |= FLAG_SYSPEER;
 	if (osys_peer != sys_peer) {
 		char *src;
 
 		report_event(EVNT_PEERSTCHG, NULL);
 
 #ifdef REFCLOCK
                 if (sys_peer->flags & FLAG_REFCLOCK)
                         src = refnumtoa(&sys_peer->srcadr);
                 else
 #endif /* REFCLOCK */
                         src = ntoa(&sys_peer->srcadr);
 		NLOG(NLOG_SYNCSTATUS)
 		    msyslog(LOG_INFO, "synchronized to %s, stratum %d",
 			src, sys_peer->stratum);
 	}
 	clock_update();
 }
 
 
 /*
  * clock_combine - compute system offset and jitter from selected peers
  */
 static void
 clock_combine(
 	struct peer **peers,		/* survivor list */
 	int	npeers			/* number of survivors */
 	)
 {
 	int	i;
 	double	x, y, z, w;
 
 	y = z = w = 0;
 	for (i = 0; i < npeers; i++) {
 		x = root_distance(peers[i]);
 		y += 1. / x;
 		z += peers[i]->offset / x;
 		w += SQUARE(peers[i]->offset - peers[0]->offset) / x;
 	}
 	sys_offset = z / y;
 	sys_jitter = SQRT(w / y);
 }
 
 /*
  * root_distance - compute synchronization distance from peer to root
  */
 static double
 root_distance(
 	struct peer *peer
 	)
 {
 	double	dist;
 
 	/*
 	 * Careful squeak here. The value returned must be greater than
 	 * the minimum root dispersion in order to avoid clockhop with
 	 * highly precise reference clocks. In orphan mode lose the peer
 	 * root delay, as that is used by the election algorithm.
 	 */
 	if (peer->stratum >= sys_orphan)
 		dist = 0;
 	else
 		dist = peer->rootdelay;
 	dist += max(sys_mindisp, dist + peer->delay) / 2 +
 	    peer->rootdispersion + peer->disp + clock_phi *
 	    (current_time - peer->update) + peer->jitter;
 	return (dist);
 }
 
 /*
  * peer_xmit - send packet for persistent association.
  */
 static void
 peer_xmit(
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	struct pkt xpkt;	/* transmit packet */
 	int	sendlen, authlen;
 	keyid_t	xkeyid = 0;	/* transmit key ID */
 	l_fp	xmt_tx;
 
 	if (!peer->dstadr)	/* don't bother with peers without interface */
 		return;
 
 	/*
 	 * This is deliciously complicated. There are three cases.
 	 *
 	 * case		leap	stratum	refid	delay	dispersion
 	 *
 	 * normal	system	system	system	system	system
 	 * orphan child	00	orphan	system	orphan	system
 	 * orphan parent 00	orphan	loopbk	0	0
 	 */
 	/*
 	 * This is a normal packet. Use the system variables.
 	 */
 	if (sys_stratum < sys_orphan) {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(sys_leap,
 		    peer->version, peer->hmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_stratum);
 		xpkt.refid = sys_refid;
 		xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay));
 		xpkt.rootdispersion =
 		    HTONS_FP(DTOUFP(sys_rootdispersion));
 
 	/*
 	 * This is a orphan child packet. The host is synchronized to an
 	 * orphan parent. Show leap synchronized, orphan stratum, system
 	 * reference ID, orphan root delay and system root dispersion.
 	 */
 	} else if (sys_peer != NULL) {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING,
 		    peer->version, peer->hmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_orphan);
 		xpkt.refid = htonl(LOOPBACKADR);
 		xpkt.rootdelay = HTONS_FP(DTOFP(sys_orphandelay));
 		xpkt.rootdispersion =
 		    HTONS_FP(DTOUFP(sys_rootdispersion));
 
 	/*
 	 * This is an orphan parent. Show leap synchronized, orphan
 	 * stratum, loopack reference ID and zero root delay and root
 	 * dispersion.
 	 */
 	} else {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING,
 		    peer->version, peer->hmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_orphan);
 		xpkt.refid = sys_refid;
 		xpkt.rootdelay = 0;
 		xpkt.rootdispersion = 0;
 	}
 	xpkt.ppoll = peer->hpoll;
 	xpkt.precision = sys_precision;
 	HTONL_FP(&sys_reftime, &xpkt.reftime);
 	HTONL_FP(&peer->org, &xpkt.org);
 	HTONL_FP(&peer->rec, &xpkt.rec);
 
 	/*
 	 * If the received packet contains a MAC, the transmitted packet
 	 * is authenticated and contains a MAC. If not, the transmitted
 	 * packet is not authenticated.
 	 *
 	 * It is most important when autokey is in use that the local
 	 * interface IP address be known before the first packet is
 	 * sent. Otherwise, it is not possible to compute a correct MAC
 	 * the recipient will accept. Thus, the I/O semantics have to do
 	 * a little more work. In particular, the wildcard interface
 	 * might not be usable.
 	 */
 	sendlen = LEN_PKT_NOMAC;
 	if (!(peer->flags & FLAG_AUTHENABLE)) {
 		get_systime(&peer->xmt);
 		HTONL_FP(&peer->xmt, &xpkt.xmt);
 		sendpkt(&peer->srcadr, peer->dstadr, sys_ttl[peer->ttl],
 			&xpkt, sendlen);
 		peer->sent++;
 #ifdef DEBUG
 		if (debug)
 			printf("transmit: at %ld %s->%s mode %d\n",
 			       current_time, peer->dstadr ? stoa(&peer->dstadr->sin) : "-",
 			       stoa(&peer->srcadr), peer->hmode);
 #endif
 		return;
 	}
 
 	/*
 	 * The received packet contains a MAC, so the transmitted packet
 	 * must be authenticated. If autokey is enabled, fuss with the
 	 * various modes; otherwise, symmetric key cryptography is used.
 	 */
 #ifdef OPENSSL
 	if (crypto_flags && (peer->flags & FLAG_SKEY)) {
 		struct exten *exten;	/* extension field */
 
 		/*
 		 * The Public Key Dance (PKD): Cryptographic credentials
 		 * are contained in extension fields, each including a
 		 * 4-octet length/code word followed by a 4-octet
 		 * association ID and optional additional data. Optional
 		 * data includes a 4-octet data length field followed by
 		 * the data itself. Request messages are sent from a
 		 * configured association; response messages can be sent
 		 * from a configured association or can take the fast
 		 * path without ever matching an association. Response
 		 * messages have the same code as the request, but have
 		 * a response bit and possibly an error bit set. In this
 		 * implementation, a message may contain no more than
 		 * one command and no more than one response.
 		 *
 		 * Cryptographic session keys include both a public and
 		 * a private componet. Request and response messages
 		 * using extension fields are always sent with the
 		 * private component set to zero. Packets without
 		 * extension fields indlude the private component when
 		 * the session key is generated.
 		 */
 		while (1) {
 		
 			/*
 			 * Allocate and initialize a keylist if not
 			 * already done. Then, use the list in inverse
 			 * order, discarding keys once used. Keep the
 			 * latest key around until the next one, so
 			 * clients can use client/server packets to
 			 * compute propagation delay.
 			 *
 			 * Note that once a key is used from the list,
 			 * it is retained in the key cache until the
 			 * next key is used. This is to allow a client
 			 * to retrieve the encrypted session key
 			 * identifier to verify authenticity.
 			 *
 			 * If for some reason a key is no longer in the
 			 * key cache, a birthday has happened and the
 			 * pseudo-random sequence is probably broken. In
 			 * that case, purge the keylist and regenerate
 			 * it.
 			 */
 			if (peer->keynumber == 0)
 				make_keylist(peer, peer->dstadr);
 			else
 				peer->keynumber--;
 			xkeyid = peer->keylist[peer->keynumber];
 			if (authistrusted(xkeyid))
 				break;
 			else
 				key_expire(peer);
 		}
 		peer->keyid = xkeyid;
 		exten = NULL;
 		switch (peer->hmode) {
 
 			/*
 			 * In broadcast server mode the autokey values are
 			 * required by the broadcast clients. Push them when a
 			 * new keylist is generated; otherwise, push the
 			 * association message so the client can request them at
 			 * other times.
 			 */
 		case MODE_BROADCAST:
 			if (peer->flags & FLAG_ASSOC)
 				exten = crypto_args(peer, CRYPTO_AUTO |
 						    CRYPTO_RESP, NULL);
 			else
 				exten = crypto_args(peer, CRYPTO_ASSOC |
 						    CRYPTO_RESP, NULL);
 			break;
 
 		/*
 		 * In symmetric modes the digest, certificate, agreement
 		 * parameters, cookie and autokey values are required.
 		 * The leapsecond table is optional. But, a passive peer
 		 * will not believe the active peer until the latter has
 		 * synchronized, so the agreement must be postponed
 		 * until then. In any case, if a new keylist is
 		 * generated, the autokey values are pushed.
 		 *
 		 * If the crypto bit is lit, don't send requests.
 		 */
 		case MODE_ACTIVE:
 		case MODE_PASSIVE:
 			if (peer->flash & TEST9)
 				break;
 			/*
 			 * Parameter and certificate.
 			 */
 			if (!peer->crypto)
 				exten = crypto_args(peer, CRYPTO_ASSOC,
 						    sys_hostname);
 			else if (!(peer->crypto & CRYPTO_FLAG_VALID))
 				exten = crypto_args(peer, CRYPTO_CERT,
 						    peer->issuer);
 
 			/*
 			 * Identity. Note we have to sign the
 			 * certificate before the cookie to avoid a
 			 * deadlock when the passive peer is walking the
 			 * certificate trail. Awesome.
 			 */
 			else if (!(peer->crypto & CRYPTO_FLAG_VRFY))
 				exten = crypto_args(peer,
 						    crypto_ident(peer), NULL);
 			else if (sys_leap != LEAP_NOTINSYNC &&
 				 !(peer->crypto & CRYPTO_FLAG_SIGN))
 				exten = crypto_args(peer, CRYPTO_SIGN,
 						    sys_hostname);
 
 			/*
 			 * Autokey. We request the cookie only when the
 			 * server and client are synchronized and
 			 * signatures work both ways. On the other hand,
 			 * the active peer needs the autokey values
 			 * before then and when the passive peer is
 			 * waiting for the active peer to synchronize.
 			 * Any time we regenerate the key list, we offer
 			 * the autokey values without being asked.
 			 */
 			else if (sys_leap != LEAP_NOTINSYNC &&
 				 peer->leap != LEAP_NOTINSYNC &&
 				 !(peer->crypto & CRYPTO_FLAG_AGREE))
 				exten = crypto_args(peer, CRYPTO_COOK,
 						    NULL);
 			else if (peer->flags & FLAG_ASSOC)
 				exten = crypto_args(peer, CRYPTO_AUTO |
 						    CRYPTO_RESP, NULL);
 			else if (!(peer->crypto & CRYPTO_FLAG_AUTO))
 				exten = crypto_args(peer, CRYPTO_AUTO,
 						    NULL);
 
 			/*
 			 * Postamble. We trade leapseconds only when the
 			 * server and client are synchronized.
 			 */
 			else if (sys_leap != LEAP_NOTINSYNC &&
 				 peer->leap != LEAP_NOTINSYNC &&
 				 peer->crypto & CRYPTO_FLAG_TAI &&
 				 !(peer->crypto & CRYPTO_FLAG_LEAP))
 				exten = crypto_args(peer, CRYPTO_TAI,
 						    NULL);
 			break;
 
 		/*
 		 * In client mode the digest, certificate, agreement
 		 * parameters and cookie are required. The leapsecond
 		 * table is optional. If broadcast client mode, the
 		 * autokey values are required as well. In broadcast
 		 * client mode, these values must be acquired during the
 		 * client/server exchange to avoid having to wait until
 		 * the next key list regeneration. Otherwise, the poor
 		 * dude may die a lingering death until becoming
 		 * unreachable and attempting rebirth.
 		 *
 		 * If neither the server or client have the agreement
 		 * parameters, the protocol transmits the cookie in the
 		 * clear. If the server has the parameters, the client
 		 * requests them and the protocol blinds it using the
 		 * agreed key. It is a protocol error if the client has
 		 * the parameters but the server does not.
 		 *
 		 * If the crypto bit is lit, don't send requests.
 		 */
 		case MODE_CLIENT:
 			if (peer->flash & TEST9)
 				break;
 			/*
 			 * Parameter and certificate.
 			 */
 			if (!peer->crypto)
 				exten = crypto_args(peer, CRYPTO_ASSOC,
 						    sys_hostname);
 			else if (!(peer->crypto & CRYPTO_FLAG_VALID))
 				exten = crypto_args(peer, CRYPTO_CERT,
 						    peer->issuer);
 
 			/*
 			 * Identity
 			 */
 			else if (!(peer->crypto & CRYPTO_FLAG_VRFY))
 				exten = crypto_args(peer,
 						    crypto_ident(peer), NULL);
 
 			/*
 			 * Autokey
 			 */
 			else if (!(peer->crypto & CRYPTO_FLAG_AGREE))
 				exten = crypto_args(peer, CRYPTO_COOK,
 						    NULL);
 			else if (!(peer->crypto & CRYPTO_FLAG_AUTO) &&
 				 (peer->cast_flags & MDF_BCLNT))
 				exten = crypto_args(peer, CRYPTO_AUTO,
 						    NULL);
 
 			/*
 			 * Postamble. We can sign the certificate here,
 			 * since there is no chance of deadlock.
 			 */
 			else if (sys_leap != LEAP_NOTINSYNC &&
 				 !(peer->crypto & CRYPTO_FLAG_SIGN))
 				exten = crypto_args(peer, CRYPTO_SIGN,
 						    sys_hostname);
 			else if (sys_leap != LEAP_NOTINSYNC &&
 				 peer->crypto & CRYPTO_FLAG_TAI &&
 				 !(peer->crypto & CRYPTO_FLAG_LEAP))
 				exten = crypto_args(peer, CRYPTO_TAI,
 						    NULL);
 			break;
 		}
 
 		/*
 		 * Build the extension fields as directed. A response to
 		 * a request is always sent, even if an error. If an
 		 * error occurs when sending a request, the crypto
 		 * machinery broke or was misconfigured. In that case
 		 * light the crypto bit to suppress further requests.
 		 */
 		if (peer->cmmd != NULL) {
 			peer->cmmd->associd = htonl(peer->associd);
 			sendlen += crypto_xmit(&xpkt, &peer->srcadr,
 					       sendlen, peer->cmmd, 0);
 			free(peer->cmmd);
 			peer->cmmd = NULL;
 		}
 		if (exten != NULL) {
 			int ltemp = 0;
 
 			if (exten->opcode != 0) {
 				ltemp = crypto_xmit(&xpkt,
 						       &peer->srcadr, sendlen, exten, 0);
 				if (ltemp == 0) {
 					peer->flash |= TEST9; /* crypto error */
 					free(exten);
 					return;
 				}
 			}
 			sendlen += ltemp;
 			free(exten);
 		}
 
 		/*
 		 * If extension fields are present, we must use a
 		 * private cookie value of zero. Don't send if the
 		 * crypto bit is set and no extension field is present,
 		 * but in that case give back the key. Most intricate.
 		 */
 		if (sendlen > LEN_PKT_NOMAC) {
 			session_key(&peer->dstadr->sin, &peer->srcadr,
 			    xkeyid, 0, 2);
 		} else if (peer->flash & TEST9) {
 			authtrust(xkeyid, 0);
 			return;
 		}
 	} 
 #endif /* OPENSSL */
 
 	/*
 	 * Stash the transmit timestamp corrected for the encryption
 	 * delay. If autokey, give back the key, as we use keys only
 	 * once. Check for errors such as missing keys, buffer overflow,
 	 * etc.
 	 */
 	xkeyid = peer->keyid;
 	get_systime(&peer->xmt);
 	L_ADD(&peer->xmt, &sys_authdelay);
 	HTONL_FP(&peer->xmt, &xpkt.xmt);
 	authlen = authencrypt(xkeyid, (u_int32 *)&xpkt, sendlen);
 	if (authlen == 0) {
 		msyslog(LOG_INFO, "transmit: %s key %u not found",
 		    stoa(&peer->srcadr), xkeyid);
 		peer->flash |= TEST9;		/* no key found */
 		return;
 	}
 	sendlen += authlen;
 #ifdef OPENSSL
 	if (xkeyid > NTP_MAXKEY)
 		authtrust(xkeyid, 0);
 #endif /* OPENSSL */
 	get_systime(&xmt_tx);
 	if (sendlen > sizeof(xpkt)) {
 		msyslog(LOG_ERR, "buffer overflow %u", sendlen);
 		exit (-1);
 	}
 	sendpkt(&peer->srcadr, peer->dstadr, sys_ttl[peer->ttl], &xpkt,
 		sendlen);
 
 	/*
 	 * Calculate the encryption delay. Keep the minimum over
 	 * the latest two samples.
 	 */
 	L_SUB(&xmt_tx, &peer->xmt);
 	L_ADD(&xmt_tx, &sys_authdelay);
 	sys_authdly[1] = sys_authdly[0];
 	sys_authdly[0] = xmt_tx.l_uf;
 	if (sys_authdly[0] < sys_authdly[1])
 		sys_authdelay.l_uf = sys_authdly[0];
 	else
 		sys_authdelay.l_uf = sys_authdly[1];
 	peer->sent++;
 #ifdef OPENSSL
 #ifdef DEBUG
 	if (debug)
 		printf(
 			"transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d index %d\n",
 			current_time, peer->dstadr ? ntoa(&peer->dstadr->sin) : "-",
 			ntoa(&peer->srcadr), peer->hmode, xkeyid, sendlen -
 			authlen, authlen, peer->keynumber);
 #endif
 #else
 #ifdef DEBUG
 	if (debug)
 		printf(
 			"transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d\n",
 			current_time, peer->dstadr ? ntoa(&peer->dstadr->sin) : "-",
 			ntoa(&peer->srcadr), peer->hmode, xkeyid, sendlen -
 			authlen, authlen);
 #endif
 #endif /* OPENSSL */
 }
 
 
 /*
  * fast_xmit - Send packet for nonpersistent association. Note that
  * neither the source or destination can be a broadcast address.
  */
 static void
 fast_xmit(
 	struct recvbuf *rbufp,	/* receive packet pointer */
 	int	xmode,		/* transmit mode */
 	keyid_t	xkeyid,		/* transmit key ID */
 	int	mask		/* restrict mask */
 	)
 {
 	struct pkt xpkt;		/* transmit packet structure */
 	struct pkt *rpkt;		/* receive packet structure */
 	l_fp	xmt_ts;			/* timestamp */
 	l_fp	xmt_tx;			/* timestamp after authent */
 	int	sendlen, authlen;
 #ifdef OPENSSL
 	u_int32	temp32;
 #endif
 
 	/*
 	 * Initialize transmit packet header fields from the receive
 	 * buffer provided. We leave some fields intact as received. If
 	 * the gazinta was from a multicast address, the gazoutta must
 	 * go out another way.
 	 *
 	 * The root delay field is special. If the system stratum is
 	 * less than the orphan stratum, send the real root delay.
 	 * Otherwise, if there is no system peer, send the orphan delay.
 	 * Otherwise, we must be an orphan parent, so send zero.
 	 */
 	rpkt = &rbufp->recv_pkt;
 	if (rbufp->dstadr->flags & INT_MCASTOPEN)
 		rbufp->dstadr = findinterface(&rbufp->recv_srcadr);
 
 	/*
 	 * This is deliciously complicated. There are four cases.
 	 *
 	 * case		leap	stratum	refid	delay	dispersion
 	 *
 	 * KoD		11	16	KISS	system	system
 	 * normal	system	system	system	system	system
 	 * orphan child	00	orphan	system	orphan	system
 	 * orphan parent 00	orphan	loopbk	0	0
 	 */
 	/*
 	 * This is a kiss-of-death (KoD) packet. Show leap
 	 * unsynchronized, stratum zero, reference ID the four-character
 	 * kiss code and system root delay. Note the rate limit on these
 	 * packets. Once a second initialize a bucket counter. Every
 	 * packet sent decrements the counter until reaching zero. If
 	 * the counter is zero, drop the kiss.
 	 */
 	if (mask & RES_LIMITED) {
 		sys_limitrejected++;
 		if (sys_kod == 0 || !(mask & RES_DEMOBILIZE))
 			return;
 
 		sys_kod--;
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOTINSYNC,
 		    PKT_VERSION(rpkt->li_vn_mode), xmode);
 		xpkt.stratum = STRATUM_UNSPEC;
 		memcpy(&xpkt.refid, "RATE", 4);
 		xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay));
 		xpkt.rootdispersion =
 		    HTONS_FP(DTOUFP(sys_rootdispersion));
 
 	/*
 	 * This is a normal packet. Use the system variables.
 	 */
 	} else if (sys_stratum < sys_orphan) {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(sys_leap,
 		    PKT_VERSION(rpkt->li_vn_mode), xmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_stratum);
 		xpkt.refid = sys_refid;
 		xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay));
 		xpkt.rootdispersion =
 		    HTONS_FP(DTOUFP(sys_rootdispersion));
 
 	/*
 	 * This is a orphan child packet. The host is synchronized to an
 	 * orphan parent. Show leap synchronized, orphan stratum, system
 	 * reference ID and orphan root delay.
 	 */
 	} else if (sys_peer != NULL) {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING,
 		    PKT_VERSION(rpkt->li_vn_mode), xmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_orphan);
 		xpkt.refid = sys_refid;
 		xpkt.rootdelay = HTONS_FP(DTOFP(sys_orphandelay));
 		xpkt.rootdispersion =
 		    HTONS_FP(DTOUFP(sys_rootdispersion));
 
 	/*
 	 * This is an orphan parent. Show leap synchronized, orphan
 	 * stratum, loopack reference ID and zero root delay.
 	 */
 	} else {
 		xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING,
 		    PKT_VERSION(rpkt->li_vn_mode), xmode);
 		xpkt.stratum = STRATUM_TO_PKT(sys_orphan);
 		xpkt.refid = htonl(LOOPBACKADR);
 		xpkt.rootdelay = HTONS_FP(DTOFP(0));
 		xpkt.rootdispersion = HTONS_FP(DTOFP(0));
 	}
 	xpkt.ppoll = rpkt->ppoll;
 	xpkt.precision = sys_precision;
 	xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion));
 	HTONL_FP(&sys_reftime, &xpkt.reftime);
 	xpkt.org = rpkt->xmt;
 	HTONL_FP(&rbufp->recv_time, &xpkt.rec);
 
 	/*
 	 * If the received packet contains a MAC, the transmitted packet
 	 * is authenticated and contains a MAC. If not, the transmitted
 	 * packet is not authenticated.
 	 */
 	sendlen = LEN_PKT_NOMAC;
 	if (rbufp->recv_length == sendlen) {
 		get_systime(&xmt_ts);
 		HTONL_FP(&xmt_ts, &xpkt.xmt);
 		sendpkt(&rbufp->recv_srcadr, rbufp->dstadr, 0, &xpkt,
 		    sendlen);
 #ifdef DEBUG
 		if (debug)
 			printf("transmit: at %ld %s->%s mode %d\n",
 			    current_time, stoa(&rbufp->dstadr->sin),
 			    stoa(&rbufp->recv_srcadr), xmode);
 #endif
 		return;
 	}
 
 	/*
 	 * The received packet contains a MAC, so the transmitted packet
 	 * must be authenticated. For symmetric key cryptography, use
 	 * the predefined and trusted symmetric keys to generate the
 	 * cryptosum. For autokey cryptography, use the server private
 	 * value to generate the cookie, which is unique for every
 	 * source-destination-key ID combination.
 	 */
 #ifdef OPENSSL
 	if (xkeyid > NTP_MAXKEY) {
 		keyid_t cookie;
 
 		/*
 		 * The only way to get here is a reply to a legitimate
 		 * client request message, so the mode must be
 		 * MODE_SERVER. If an extension field is present, there
 		 * can be only one and that must be a command. Do what
 		 * needs, but with private value of zero so the poor
 		 * jerk can decode it. If no extension field is present,
 		 * use the cookie to generate the session key.
 		 */
 		cookie = session_key(&rbufp->recv_srcadr,
 		    &rbufp->dstadr->sin, 0, sys_private, 0);
 		if (rbufp->recv_length >= (int)(sendlen + MAX_MAC_LEN +
 		    2 * sizeof(u_int32))) {
 			session_key(&rbufp->dstadr->sin,
 			    &rbufp->recv_srcadr, xkeyid, 0, 2);
 			temp32 = CRYPTO_RESP;
 			rpkt->exten[0] |= htonl(temp32);
 			sendlen += crypto_xmit(&xpkt,
 			    &rbufp->recv_srcadr, sendlen,
 			    (struct exten *)rpkt->exten, cookie);
 		} else {
 			session_key(&rbufp->dstadr->sin,
 			    &rbufp->recv_srcadr, xkeyid, cookie, 2);
 		}
 	}
 #endif /* OPENSSL */
 	get_systime(&xmt_ts);
 	L_ADD(&xmt_ts, &sys_authdelay);
 	HTONL_FP(&xmt_ts, &xpkt.xmt);
 	authlen = authencrypt(xkeyid, (u_int32 *)&xpkt, sendlen);
 	sendlen += authlen;
 #ifdef OPENSSL
 	if (xkeyid > NTP_MAXKEY)
 		authtrust(xkeyid, 0);
 #endif /* OPENSSL */
 	get_systime(&xmt_tx);
 	if (sendlen > sizeof(xpkt)) {
 		msyslog(LOG_ERR, "buffer overflow %u", sendlen);
 		exit (-1);
 	}
 	sendpkt(&rbufp->recv_srcadr, rbufp->dstadr, 0, &xpkt, sendlen);
 
 	/*
 	 * Calculate the encryption delay. Keep the minimum over the
 	 * latest two samples.
 	 */
 	L_SUB(&xmt_tx, &xmt_ts);
 	L_ADD(&xmt_tx, &sys_authdelay);
 	sys_authdly[1] = sys_authdly[0];
 	sys_authdly[0] = xmt_tx.l_uf;
 	if (sys_authdly[0] < sys_authdly[1])
 		sys_authdelay.l_uf = sys_authdly[0];
 	else
 		sys_authdelay.l_uf = sys_authdly[1];
 #ifdef DEBUG
 	if (debug)
 		printf(
 		    "transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d\n",
 		    current_time, ntoa(&rbufp->dstadr->sin),
 		    ntoa(&rbufp->recv_srcadr), xmode, xkeyid, sendlen -
 		    authlen, authlen);
 #endif
 }
 
 
 #ifdef OPENSSL
 /*
  * key_expire - purge the key list
  */
 void
 key_expire(
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	int i;
 
 	if (peer->keylist != NULL) {
 		for (i = 0; i <= peer->keynumber; i++)
 			authtrust(peer->keylist[i], 0);
 		free(peer->keylist);
 		peer->keylist = NULL;
 	}
 	value_free(&peer->sndval);
 	peer->keynumber = 0;
 #ifdef DEBUG
 	if (debug)
 		printf("key_expire: at %lu\n", current_time);
 #endif
 }
 #endif /* OPENSSL */
 
 
 /*
  * Determine if the peer is unfit for synchronization
  *
  * A peer is unfit for synchronization if
  * > TEST10 bad leap or stratum below floor or at or above ceiling
  * > TEST11 root distance exceeded
  * > TEST12 a direct or indirect synchronization loop would form
  * > TEST13 unreachable or noselect
  */
 int				/* FALSE if fit, TRUE if unfit */
 peer_unfit(
 	struct peer *peer	/* peer structure pointer */
 	)
 {
 	int	rval = 0;
 
 	/*
 	 * A stratum error occurs if (1) the server has never been
 	 * synchronized, (2) the server stratum is below the floor or
 	 * greater than or equal to the ceiling, (3) the system stratum
 	 * is below the orphan stratum and the server stratum is greater
 	 * than or equal to the orphan stratum.
 	 */
 	if (peer->leap == LEAP_NOTINSYNC || peer->stratum < sys_floor ||
 	    peer->stratum >= sys_ceiling || (sys_stratum < sys_orphan &&
 	    peer->stratum >= sys_orphan))
 		rval |= TEST10;		/* stratum out of bounds */
 
 	/*
 	 * A distance error occurs if the root distance is greater than
 	 * or equal to the distance threshold plus the increment due to
 	 * one poll interval.
 	 */
 	if (root_distance(peer) >= sys_maxdist + clock_phi *
 	    ULOGTOD(sys_poll))
 		rval |= TEST11;		/* distance exceeded */
 
 	/*
 	 * A loop error occurs if the remote peer is synchronized to the
 	 * local peer of if the remote peer is synchronized to the same
 	 * server as the local peer, but only if the remote peer is not
 	 * the orphan parent.
 	 */
 	if (peer->stratum > 1 && peer->refid != htonl(LOOPBACKADR) &&
 	    ((!peer->dstadr || peer->refid == peer->dstadr->addr_refid) ||
 	    peer->refid == sys_refid))
 		rval |= TEST12;		/* synch loop */
 
 	/*
 	 * An unreachable error occurs if the server is unreachable or
 	 * the noselect bit is set.
 	 */
 	if (!peer->reach || peer->flags & FLAG_NOSELECT)
 		rval |= TEST13;		/* unreachable */
 
 	peer->flash &= ~PEER_TEST_MASK;
 	peer->flash |= rval;
 	return (rval);
 }
 
 
 /*
  * Find the precision of this particular machine
  */
 #define MINSTEP 100e-9		/* minimum clock increment (s) */
 #define MAXSTEP 20e-3		/* maximum clock increment (s) */
 #define MINLOOPS 5		/* minimum number of step samples */
 
 /*
  * This routine calculates the system precision, defined as the minimum
  * of a sequence of differences between successive readings of the
  * system clock. However, if the system clock can be read more than once
  * during a tick interval, the difference can be zero or one LSB unit,
  * where the LSB corresponds to one nanosecond or one microsecond.
  * Conceivably, if some other process preempts this one and reads the
  * clock, the difference can be more than one LSB unit.
  *
  * For hardware clock frequencies of 10 MHz or less, we assume the
  * logical clock advances only at the hardware clock tick. For higher
  * frequencies, we assume the logical clock can advance no more than 100
  * nanoseconds between ticks.
  */
 int
 default_get_precision(void)
 {
 	l_fp	val;		/* current seconds fraction */
 	l_fp	last;		/* last seconds fraction */
 	l_fp	diff;		/* difference */
 	double	tick;		/* computed tick value */
 	double	dtemp;		/* scratch */
 	int	i;		/* log2 precision */
 
 	/*
 	 * Loop to find tick value in nanoseconds. Toss out outlyer
 	 * values less than the minimun tick value. In wacky cases, use
 	 * the default maximum value.
 	 */
 	get_systime(&last);
 	tick = MAXSTEP;
 	for (i = 0; i < MINLOOPS;) {
 		get_systime(&val);
 		diff = val;
 		L_SUB(&diff, &last);
 		last = val;
 		LFPTOD(&diff, dtemp);
 		if (dtemp < MINSTEP)
 			continue;
 		i++;
 		if (dtemp < tick)
 			tick = dtemp;
 	}
 
 	/*
 	 * Find the nearest power of two.
 	 */
 	NLOG(NLOG_SYSEVENT)
 	    msyslog(LOG_INFO, "precision = %.3f usec", tick * 1e6);
 	for (i = 0; tick <= 1; i++)
 		tick *= 2;
 	if (tick - 1. > 1. - tick / 2)
 		i--;
 	return (-i);
 }
 
 
 /*
  * kod_proto - called once per second to limit kiss-of-death packets
  */
 void
 kod_proto(void)
 {
 	sys_kod = sys_kod_rate;
 }
 
 
 /*
  * init_proto - initialize the protocol module's data
  */
 void
 init_proto(void)
 {
 	l_fp	dummy;
 	int	i;
 
 	/*
 	 * Fill in the sys_* stuff.  Default is don't listen to
 	 * broadcasting, authenticate.
 	 */
 	sys_leap = LEAP_NOTINSYNC;
 	sys_stratum = STRATUM_UNSPEC;
 	memcpy(&sys_refid, "INIT", 4);
 	sys_precision = (s_char)default_get_precision();
 	sys_jitter = LOGTOD(sys_precision);
 	sys_rootdelay = 0;
 	sys_orphandelay = (double)(ntp_random() & 0xffff) / 65536. *
 	    sys_maxdist;
 	sys_rootdispersion = 0;
 	L_CLR(&sys_reftime);
 	sys_peer = NULL;
 	sys_survivors = 0;
 	get_systime(&dummy);
 	sys_manycastserver = 0;
 	sys_bclient = 0;
 	sys_bdelay = DEFBROADDELAY;
 	sys_calldelay = BURST_DELAY;
 	sys_authenticate = 1;
 	L_CLR(&sys_authdelay);
 	sys_authdly[0] = sys_authdly[1] = 0;
 	sys_stattime = 0;
 	proto_clr_stats();
 	for (i = 0; i < MAX_TTL; i++) {
 		sys_ttl[i] = (u_char)((i * 256) / MAX_TTL);
 		sys_ttlmax = i;
 	}
 #ifdef OPENSSL
 	sys_automax = 1 << NTP_AUTOMAX;
 #endif /* OPENSSL */
 
 	/*
 	 * Default these to enable
 	 */
 	ntp_enable = 1;
 #ifndef KERNEL_FLL_BUG
 	kern_enable = 1;
 #endif
 	pps_enable = 0;
 	stats_control = 1;
 }
 
 
 /*
  * proto_config - configure the protocol module
  */
 void
 proto_config(
 	int	item,
 	u_long	value,
 	double	dvalue,
 	struct sockaddr_storage* svalue
 	)
 {
 	/*
 	 * Figure out what he wants to change, then do it
 	 */
 	switch (item) {
 
 	/*
 	 * Turn on/off kernel discipline.
 	 */
 	case PROTO_KERNEL:
 		kern_enable = (int)value;
 		break;
 
 	/*
 	 * Turn on/off clock discipline.
 	 */
 	case PROTO_NTP:
 		ntp_enable = (int)value;
 		break;
 
 	/*
 	 * Turn on/off monitoring.
 	 */
 	case PROTO_MONITOR:
 		if (value)
 			mon_start(MON_ON);
 		else
 			mon_stop(MON_ON);
 		break;
 
 	/*
 	 * Turn on/off statistics.
 	 */
 	case PROTO_FILEGEN:
 		stats_control = (int)value;
 		break;
 
 	/*
 	 * Turn on/off enable broadcasts.
 	 */
 	case PROTO_BROADCLIENT:
 		sys_bclient = (int)value;
 		if (sys_bclient == 0)
 			io_unsetbclient();
 		else
 			io_setbclient();
 		break;
 
 	/*
 	 * Turn on/off PPS discipline.
 	 */
 	case PROTO_PPS:
 		pps_enable = (int)value;
 		break;
 
 	/*
 	 * Add muliticast group address.
 	 */
 	case PROTO_MULTICAST_ADD:
 		if (svalue)
 		    io_multicast_add(*svalue);
 		sys_bclient = 1;
 		break;
 
 	/*
 	 * Delete multicast group address.
 	 */
 	case PROTO_MULTICAST_DEL:
 		if (svalue)
 		    io_multicast_del(*svalue);
 		break;
 
 	/*
 	 * Set default broadcast delay.
 	 */
 	case PROTO_BROADDELAY:
 		sys_bdelay = dvalue;
 		break;
 
 	/*
 	 * Set modem call delay.
 	 */
 	case PROTO_CALLDELAY:
 		sys_calldelay = (int)value;
 		break;
 
 	/*
 	 * Turn on/off authentication to mobilize ephemeral
 	 * associations.
 	 */
 	case PROTO_AUTHENTICATE:
 		sys_authenticate = (int)value;
 		break;
 
 	/*
 	 * Set minimum number of survivors.
 	 */
 	case PROTO_MINCLOCK:
 		sys_minclock = (int)dvalue;
 		break;
 
 	/*
 	 * Set maximum number of preemptable associations.
 	 */
 	case PROTO_MAXCLOCK:
 		sys_maxclock = (int)dvalue;
 		break;
 
 	/*
 	 * Set minimum number of survivors.
 	 */
 	case PROTO_MINSANE:
 		sys_minsane = (int)dvalue;
 		break;
 
 	/*
 	 * Set stratum floor.
 	 */
 	case PROTO_FLOOR:
 		sys_floor = (int)dvalue;
 		break;
 
 	/*
 	 * Set stratum ceiling.
 	 */
 	case PROTO_CEILING:
 		sys_ceiling = (int)dvalue;
 		break;
 
 	/*
 	 * Set orphan stratum.
 	 */
 	case PROTO_ORPHAN:
 		sys_orphan = (int)dvalue;
 		break;
 
 	/*
 	 * Set cohort switch.
 	 */
 	case PROTO_COHORT:
 		sys_cohort = (int)dvalue;
 		break;
 
 	/*
 	 * Set minimum dispersion increment.
 	 */
 	case PROTO_MINDISP:
 		sys_mindisp = dvalue;
 		break;
 
 	/*
 	 * Set maximum distance (select threshold).
 	 */
 	case PROTO_MAXDIST:
 		sys_maxdist = dvalue;
 		break;
 
 	/*
 	 * Set anticlockhop threshold.
 	 */
 	case PROTO_MAXHOP:
 		sys_maxhop = (int)dvalue;
 		break;
 
 	/*
 	 * Set adjtime() resolution (s).
 	 */
 	case PROTO_ADJ:
 		sys_tick = dvalue;
 		break;
 
 	/*
 	 * Set manycast beacon interval.
 	 */
 	case PROTO_BEACON:
 		sys_beacon = (int)dvalue;
 		break;
 
 #ifdef REFCLOCK
 	/*
 	 * Turn on/off refclock calibrate
 	 */
 	case PROTO_CAL:
 		cal_enable = (int)value;
 		break;
 #endif /* REFCLOCK */
 	default:
 
 		/*
 		 * Log this error.
 		 */
 		msyslog(LOG_INFO,
 		    "proto_config: illegal item %d, value %ld", item,
 		    value);
 	}
 }
 
 
 /*
  * proto_clr_stats - clear protocol stat counters
  */
 void
 proto_clr_stats(void)
 {
 	sys_stattime = current_time;
 	sys_received = 0;
 	sys_processed = 0;
 	sys_newversionpkt = 0;
 	sys_oldversionpkt = 0;
 	sys_unknownversion = 0;
 	sys_restricted = 0;
 	sys_badlength = 0;
 	sys_badauth = 0;
 	sys_limitrejected = 0;
 }
Index: stable/9/sys/netinet/igmp.c
===================================================================
--- stable/9/sys/netinet/igmp.c	(revision 281230)
+++ stable/9/sys/netinet/igmp.c	(revision 281231)
@@ -1,3649 +1,3648 @@
 /*-
  * Copyright (c) 2007-2009 Bruce Simpson.
  * Copyright (c) 1988 Stephen Deering.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
  */
 
 /*
  * Internet Group Management Protocol (IGMP) routines.
  * [RFC1112, RFC2236, RFC3376]
  *
  * Written by Steve Deering, Stanford, May 1988.
  * Modified by Rosen Sharma, Stanford, Aug 1994.
  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
  * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
  *
  * MULTICAST Revision: 3.5.1.4
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/protosw.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/ktr.h>
 #include <sys/condvar.h>
 
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/igmp.h>
 #include <netinet/igmp_var.h>
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifndef KTR_IGMPV3
 #define KTR_IGMPV3 KTR_INET
 #endif
 
 static struct igmp_ifinfo *
 		igi_alloc_locked(struct ifnet *);
 static void	igi_delete_locked(const struct ifnet *);
 static void	igmp_dispatch_queue(struct ifqueue *, int, const int);
 static void	igmp_fasttimo_vnet(void);
 static void	igmp_final_leave(struct in_multi *, struct igmp_ifinfo *);
 static int	igmp_handle_state_change(struct in_multi *,
 		    struct igmp_ifinfo *);
 static int	igmp_initial_join(struct in_multi *, struct igmp_ifinfo *);
 static int	igmp_input_v1_query(struct ifnet *, const struct ip *,
 		    const struct igmp *);
 static int	igmp_input_v2_query(struct ifnet *, const struct ip *,
 		    const struct igmp *);
 static int	igmp_input_v3_query(struct ifnet *, const struct ip *,
 		    /*const*/ struct igmpv3 *);
 static int	igmp_input_v3_group_query(struct in_multi *,
 		    struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *);
 static int	igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
 		    /*const*/ struct igmp *);
 static int	igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
 		    /*const*/ struct igmp *);
 static void	igmp_intr(struct mbuf *);
 static int	igmp_isgroupreported(const struct in_addr);
 static struct mbuf *
 		igmp_ra_alloc(void);
 #ifdef KTR
 static char *	igmp_rec_type_to_str(const int);
 #endif
 static void	igmp_set_version(struct igmp_ifinfo *, const int);
 static void	igmp_slowtimo_vnet(void);
 static int	igmp_v1v2_queue_report(struct in_multi *, const int);
 static void	igmp_v1v2_process_group_timer(struct in_multi *, const int);
 static void	igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
 static void	igmp_v2_update_group(struct in_multi *, const int);
 static void	igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
 static void	igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
 static struct mbuf *
 		igmp_v3_encap_report(struct ifnet *, struct mbuf *);
 static int	igmp_v3_enqueue_group_record(struct ifqueue *,
 		    struct in_multi *, const int, const int, const int);
 static int	igmp_v3_enqueue_filter_change(struct ifqueue *,
 		    struct in_multi *);
 static void	igmp_v3_process_group_timers(struct igmp_ifinfo *,
 		    struct ifqueue *, struct ifqueue *, struct in_multi *,
 		    const int);
 static int	igmp_v3_merge_state_changes(struct in_multi *,
 		    struct ifqueue *);
 static void	igmp_v3_suppress_group_record(struct in_multi *);
 static int	sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS);
 static int	sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS);
 static int	sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS);
 
 static const struct netisr_handler igmp_nh = {
 	.nh_name = "igmp",
 	.nh_handler = igmp_intr,
 	.nh_proto = NETISR_IGMP,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 /*
  * System-wide globals.
  *
  * Unlocked access to these is OK, except for the global IGMP output
  * queue. The IGMP subsystem lock ends up being system-wide for the moment,
  * because all VIMAGEs have to share a global output queue, as netisrs
  * themselves are not virtualized.
  *
  * Locking:
  *  * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
  *    Any may be taken independently; if any are held at the same
  *    time, the above lock order must be followed.
  *  * All output is delegated to the netisr.
  *    Now that Giant has been eliminated, the netisr may be inlined.
  *  * IN_MULTI_LOCK covers in_multi.
  *  * IGMP_LOCK covers igmp_ifinfo and any global variables in this file,
  *    including the output queue.
  *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
  *    per-link state iterators.
  *  * igmp_ifinfo is valid as long as PF_INET is attached to the interface,
  *    therefore it is not refcounted.
  *    We allow unlocked reads of igmp_ifinfo when accessed via in_multi.
  *
  * Reference counting
  *  * IGMP acquires its own reference every time an in_multi is passed to
  *    it and the group is being joined for the first time.
  *  * IGMP releases its reference(s) on in_multi in a deferred way,
  *    because the operations which process the release run as part of
  *    a loop whose control variables are directly affected by the release
  *    (that, and not recursing on the IF_ADDR_LOCK).
  *
  * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds
  * to a vnet in ifp->if_vnet.
  *
  * SMPng: XXX We may potentially race operations on ifma_protospec.
  * The problem is that we currently lack a clean way of taking the
  * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing,
  * as anything which modifies ifma needs to be covered by that lock.
  * So check for ifma_protospec being NULL before proceeding.
  */
 struct mtx		 igmp_mtx;
 
 struct mbuf		*m_raopt;		 /* Router Alert option */
 static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
 
 /*
  * VIMAGE-wide globals.
  *
  * The IGMPv3 timers themselves need to run per-image, however,
  * protosw timers run globally (see tcp).
  * An ifnet can only be in one vimage at a time, and the loopback
  * ifnet, loif, is itself virtualized.
  * It would otherwise be possible to seriously hose IGMP state,
  * and create inconsistencies in upstream multicast routing, if you have
  * multiple VIMAGEs running on the same link joining different multicast
  * groups, UNLESS the "primary IP address" is different. This is because
  * IGMP for IPv4 does not force link-local addresses to be used for each
  * node, unlike MLD for IPv6.
  * Obviously the IGMPv3 per-interface state has per-vimage granularity
  * also as a result.
  *
  * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection
  * policy to control the address used by IGMP on the link.
  */
 static VNET_DEFINE(int, interface_timers_running);	/* IGMPv3 general
 							 * query response */
 static VNET_DEFINE(int, state_change_timers_running);	/* IGMPv3 state-change
 							 * retransmit */
 static VNET_DEFINE(int, current_state_timers_running);	/* IGMPv1/v2 host
 							 * report; IGMPv3 g/sg
 							 * query response */
 
 #define	V_interface_timers_running	VNET(interface_timers_running)
 #define	V_state_change_timers_running	VNET(state_change_timers_running)
 #define	V_current_state_timers_running	VNET(current_state_timers_running)
 
 static VNET_DEFINE(LIST_HEAD(, igmp_ifinfo), igi_head);
 static VNET_DEFINE(struct igmpstat, igmpstat) = {
 	.igps_version = IGPS_VERSION_3,
 	.igps_len = sizeof(struct igmpstat),
 };
 static VNET_DEFINE(struct timeval, igmp_gsrdelay) = {10, 0};
 
 #define	V_igi_head			VNET(igi_head)
 #define	V_igmpstat			VNET(igmpstat)
 #define	V_igmp_gsrdelay			VNET(igmp_gsrdelay)
 
 static VNET_DEFINE(int, igmp_recvifkludge) = 1;
 static VNET_DEFINE(int, igmp_sendra) = 1;
 static VNET_DEFINE(int, igmp_sendlocal) = 1;
 static VNET_DEFINE(int, igmp_v1enable) = 1;
 static VNET_DEFINE(int, igmp_v2enable) = 1;
 static VNET_DEFINE(int, igmp_legacysupp);
 static VNET_DEFINE(int, igmp_default_version) = IGMP_VERSION_3;
 
 #define	V_igmp_recvifkludge		VNET(igmp_recvifkludge)
 #define	V_igmp_sendra			VNET(igmp_sendra)
 #define	V_igmp_sendlocal		VNET(igmp_sendlocal)
 #define	V_igmp_v1enable			VNET(igmp_v1enable)
 #define	V_igmp_v2enable			VNET(igmp_v2enable)
 #define	V_igmp_legacysupp		VNET(igmp_legacysupp)
 #define	V_igmp_default_version		VNET(igmp_default_version)
 
 /*
  * Virtualized sysctls.
  */
 SYSCTL_VNET_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RW,
     &VNET_NAME(igmpstat), igmpstat, "");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW,
     &VNET_NAME(igmp_recvifkludge), 0,
     "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW,
     &VNET_NAME(igmp_sendra), 0,
     "Send IP Router Alert option in IGMPv2/v3 messages");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW,
     &VNET_NAME(igmp_sendlocal), 0,
     "Send IGMP membership reports for 224.0.0.0/24 groups");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW,
     &VNET_NAME(igmp_v1enable), 0,
     "Enable backwards compatibility with IGMPv1");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW,
     &VNET_NAME(igmp_v2enable), 0,
     "Enable backwards compatibility with IGMPv2");
 SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW,
     &VNET_NAME(igmp_legacysupp), 0,
     "Allow v1/v2 reports to suppress v3 group responses");
 SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, default_version,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     &VNET_NAME(igmp_default_version), 0, sysctl_igmp_default_version, "I",
     "Default version of IGMP to run on each interface");
 SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     &VNET_NAME(igmp_gsrdelay.tv_sec), 0, sysctl_igmp_gsr, "I",
     "Rate limit for IGMPv3 Group-and-Source queries in seconds");
 
 /*
  * Non-virtualized sysctls.
  */
 static SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo,
     CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_igmp_ifinfo,
     "Per-interface IGMPv3 state");
 
 static __inline void
 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
 {
 
 #ifdef VIMAGE
 	m->m_pkthdr.header = ifp->if_vnet;
 #endif /* VIMAGE */
 	m->m_pkthdr.flowid = ifp->if_index;
 }
 
 static __inline void
 igmp_scrub_context(struct mbuf *m)
 {
 
 	m->m_pkthdr.header = NULL;
 	m->m_pkthdr.flowid = 0;
 }
 
 #ifdef KTR
 static __inline char *
 inet_ntoa_haddr(in_addr_t haddr)
 {
 	struct in_addr ia;
 
 	ia.s_addr = htonl(haddr);
 	return (inet_ntoa(ia));
 }
 #endif
 
 /*
  * Restore context from a queued IGMP output chain.
  * Return saved ifindex.
  *
  * VIMAGE: The assertion is there to make sure that we
  * actually called CURVNET_SET() with what's in the mbuf chain.
  */
 static __inline uint32_t
 igmp_restore_context(struct mbuf *m)
 {
 
 #ifdef notyet
 #if defined(VIMAGE) && defined(INVARIANTS)
 	KASSERT(curvnet == (m->m_pkthdr.header),
 	    ("%s: called when curvnet was not restored", __func__));
 #endif
 #endif
 	return (m->m_pkthdr.flowid);
 }
 
 /*
  * Retrieve or set default IGMP version.
  *
  * VIMAGE: Assume curvnet set by caller.
  * SMPng: NOTE: Serialized by IGMP lock.
  */
 static int
 sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS)
 {
 	int	 error;
 	int	 new;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error)
 		return (error);
 
 	IGMP_LOCK();
 
 	new = V_igmp_default_version;
 
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error || !req->newptr)
 		goto out_locked;
 
 	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
 		error = EINVAL;
 		goto out_locked;
 	}
 
 	CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d",
 	     V_igmp_default_version, new);
 
 	V_igmp_default_version = new;
 
 out_locked:
 	IGMP_UNLOCK();
 	return (error);
 }
 
 /*
  * Retrieve or set threshold between group-source queries in seconds.
  *
  * VIMAGE: Assume curvnet set by caller.
  * SMPng: NOTE: Serialized by IGMP lock.
  */
 static int
 sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int i;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error)
 		return (error);
 
 	IGMP_LOCK();
 
 	i = V_igmp_gsrdelay.tv_sec;
 
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || !req->newptr)
 		goto out_locked;
 
 	if (i < -1 || i >= 60) {
 		error = EINVAL;
 		goto out_locked;
 	}
 
 	CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d",
 	     V_igmp_gsrdelay.tv_sec, i);
 	V_igmp_gsrdelay.tv_sec = i;
 
 out_locked:
 	IGMP_UNLOCK();
 	return (error);
 }
 
 /*
  * Expose struct igmp_ifinfo to userland, keyed by ifindex.
  * For use by ifmcstat(8).
  *
  * SMPng: NOTE: Does an unlocked ifindex space read.
  * VIMAGE: Assume curvnet set by caller. The node handler itself
  * is not directly virtualized.
  */
 static int
 sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
 {
 	int			*name;
 	int			 error;
 	u_int			 namelen;
 	struct ifnet		*ifp;
 	struct igmp_ifinfo	*igi;
 
 	name = (int *)arg1;
 	namelen = arg2;
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo));
 	if (error)
 		return (error);
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	if (name[0] <= 0 || name[0] > V_if_index) {
 		error = ENOENT;
 		goto out_locked;
 	}
 
 	error = ENOENT;
 
 	ifp = ifnet_byindex(name[0]);
 	if (ifp == NULL)
 		goto out_locked;
 
 	LIST_FOREACH(igi, &V_igi_head, igi_link) {
 		if (ifp == igi->igi_ifp) {
 			error = SYSCTL_OUT(req, igi,
 			    sizeof(struct igmp_ifinfo));
 			break;
 		}
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 	return (error);
 }
 
 /*
  * Dispatch an entire queue of pending packet chains
  * using the netisr.
  * VIMAGE: Assumes the vnet pointer has been set.
  */
 static void
 igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop)
 {
 	struct mbuf *m;
 
 	for (;;) {
 		_IF_DEQUEUE(ifq, m);
 		if (m == NULL)
 			break;
 		CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m);
 		if (loop)
 			m->m_flags |= M_IGMP_LOOP;
 		netisr_dispatch(NETISR_IGMP, m);
 		if (--limit == 0)
 			break;
 	}
 }
 
 /*
  * Filter outgoing IGMP report state by group.
  *
  * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
  * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
  * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
  * this may break certain IGMP snooping switches which rely on the old
  * report behaviour.
  *
  * Return zero if the given group is one for which IGMP reports
  * should be suppressed, or non-zero if reports should be issued.
  */
 static __inline int
 igmp_isgroupreported(const struct in_addr addr)
 {
 
 	if (in_allhosts(addr) ||
 	    ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr)))))
 		return (0);
 
 	return (1);
 }
 
 /*
  * Construct a Router Alert option to use in outgoing packets.
  */
 static struct mbuf *
 igmp_ra_alloc(void)
 {
 	struct mbuf	*m;
 	struct ipoption	*p;
 
 	MGET(m, M_DONTWAIT, MT_DATA);
 	p = mtod(m, struct ipoption *);
 	p->ipopt_dst.s_addr = INADDR_ANY;
 	p->ipopt_list[0] = IPOPT_RA;	/* Router Alert Option */
 	p->ipopt_list[1] = 0x04;	/* 4 bytes long */
 	p->ipopt_list[2] = IPOPT_EOL;	/* End of IP option list */
 	p->ipopt_list[3] = 0x00;	/* pad byte */
 	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
 
 	return (m);
 }
 
 /*
  * Attach IGMP when PF_INET is attached to an interface.
  */
 struct igmp_ifinfo *
 igmp_domifattach(struct ifnet *ifp)
 {
 	struct igmp_ifinfo *igi;
 
 	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
 	    __func__, ifp, ifp->if_xname);
 
 	IGMP_LOCK();
 
 	igi = igi_alloc_locked(ifp);
 	if (!(ifp->if_flags & IFF_MULTICAST))
 		igi->igi_flags |= IGIF_SILENT;
 
 	IGMP_UNLOCK();
 
 	return (igi);
 }
 
 /*
  * VIMAGE: assume curvnet set by caller.
  */
 static struct igmp_ifinfo *
 igi_alloc_locked(/*const*/ struct ifnet *ifp)
 {
 	struct igmp_ifinfo *igi;
 
 	IGMP_LOCK_ASSERT();
 
 	igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO);
 	if (igi == NULL)
 		goto out;
 
 	igi->igi_ifp = ifp;
 	igi->igi_version = V_igmp_default_version;
 	igi->igi_flags = 0;
 	igi->igi_rv = IGMP_RV_INIT;
 	igi->igi_qi = IGMP_QI_INIT;
 	igi->igi_qri = IGMP_QRI_INIT;
 	igi->igi_uri = IGMP_URI_INIT;
 
 	SLIST_INIT(&igi->igi_relinmhead);
 
 	/*
 	 * Responses to general queries are subject to bounds.
 	 */
 	IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
 
 	LIST_INSERT_HEAD(&V_igi_head, igi, igi_link);
 
 	CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)",
 	     ifp, ifp->if_xname);
 
 out:
 	return (igi);
 }
 
 /*
  * Hook for ifdetach.
  *
  * NOTE: Some finalization tasks need to run before the protocol domain
  * is detached, but also before the link layer does its cleanup.
  *
  * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK().
  * XXX This is also bitten by unlocked ifma_protospec access.
  */
 void
 igmp_ifdetach(struct ifnet *ifp)
 {
 	struct igmp_ifinfo	*igi;
 	struct ifmultiaddr	*ifma;
 	struct in_multi		*inm, *tinm;
 
 	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp,
 	    ifp->if_xname);
 
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	if (igi->igi_version == IGMP_VERSION_3) {
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_INET ||
 			    ifma->ifma_protospec == NULL)
 				continue;
 #if 0
 			KASSERT(ifma->ifma_protospec != NULL,
 			    ("%s: ifma_protospec is NULL", __func__));
 #endif
 			inm = (struct in_multi *)ifma->ifma_protospec;
 			if (inm->inm_state == IGMP_LEAVING_MEMBER) {
 				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
 				    inm, inm_nrele);
 			}
 			inm_clear_recorded(inm);
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		/*
 		 * Free the in_multi reference(s) for this IGMP lifecycle.
 		 */
 		SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele,
 		    tinm) {
 			SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
 			inm_release_locked(inm);
 		}
 	}
 
 	IGMP_UNLOCK();
 }
 
 /*
  * Hook for domifdetach.
  */
 void
 igmp_domifdetach(struct ifnet *ifp)
 {
 	struct igmp_ifinfo *igi;
 
 	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
 	    __func__, ifp, ifp->if_xname);
 
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	igi_delete_locked(ifp);
 
 	IGMP_UNLOCK();
 }
 
 static void
 igi_delete_locked(const struct ifnet *ifp)
 {
 	struct igmp_ifinfo *igi, *tigi;
 
 	CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)",
 	    __func__, ifp, ifp->if_xname);
 
 	IGMP_LOCK_ASSERT();
 
 	LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) {
 		if (igi->igi_ifp == ifp) {
 			/*
 			 * Free deferred General Query responses.
 			 */
 			_IF_DRAIN(&igi->igi_gq);
 
 			LIST_REMOVE(igi, igi_link);
 
 			KASSERT(SLIST_EMPTY(&igi->igi_relinmhead),
 			    ("%s: there are dangling in_multi references",
 			    __func__));
 
 			free(igi, M_IGMP);
 			return;
 		}
 	}
 
 #ifdef INVARIANTS
 	panic("%s: igmp_ifinfo not found for ifp %p\n", __func__,  ifp);
 #endif
 }
 
 /*
  * Process a received IGMPv1 query.
  * Return non-zero if the message should be dropped.
  *
  * VIMAGE: The curvnet pointer is derived from the input ifp.
  */
 static int
 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
     const struct igmp *igmp)
 {
 	struct ifmultiaddr	*ifma;
 	struct igmp_ifinfo	*igi;
 	struct in_multi		*inm;
 
 	/*
 	 * IGMPv1 Host Mmembership Queries SHOULD always be addressed to
 	 * 224.0.0.1. They are always treated as General Queries.
 	 * igmp_group is always ignored. Do not drop it as a userland
 	 * daemon may wish to see it.
 	 * XXX SMPng: unlocked increments in igmpstat assumed atomic.
 	 */
 	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
 		IGMPSTAT_INC(igps_rcv_badqueries);
 		return (0);
 	}
 	IGMPSTAT_INC(igps_rcv_gen_queries);
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	if (igi->igi_flags & IGIF_LOOPBACK) {
 		CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		goto out_locked;
 	}
 
 	/*
 	 * Switch to IGMPv1 host compatibility mode.
 	 */
 	igmp_set_version(igi, IGMP_VERSION_1);
 
 	CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname);
 
 	/*
 	 * Start the timers in all of our group records
 	 * for the interface on which the query arrived,
 	 * except those which are already running.
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		if (inm->inm_timer != 0)
 			continue;
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_REPORTING_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			inm->inm_timer = IGMP_RANDOM_DELAY(
 			    IGMP_V1V2_MAX_RI * PR_FASTHZ);
 			V_current_state_timers_running = 1;
 			break;
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Process a received IGMPv2 general or group-specific query.
  */
 static int
 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
     const struct igmp *igmp)
 {
 	struct ifmultiaddr	*ifma;
 	struct igmp_ifinfo	*igi;
 	struct in_multi		*inm;
 	int			 is_general_query;
 	uint16_t		 timer;
 
 	is_general_query = 0;
 
 	/*
 	 * Validate address fields upfront.
 	 * XXX SMPng: unlocked increments in igmpstat assumed atomic.
 	 */
 	if (in_nullhost(igmp->igmp_group)) {
 		/*
 		 * IGMPv2 General Query.
 		 * If this was not sent to the all-hosts group, ignore it.
 		 */
 		if (!in_allhosts(ip->ip_dst))
 			return (0);
 		IGMPSTAT_INC(igps_rcv_gen_queries);
 		is_general_query = 1;
 	} else {
 		/* IGMPv2 Group-Specific Query. */
 		IGMPSTAT_INC(igps_rcv_group_queries);
 	}
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	if (igi->igi_flags & IGIF_LOOPBACK) {
 		CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		goto out_locked;
 	}
 
 	/*
 	 * Ignore v2 query if in v1 Compatibility Mode.
 	 */
 	if (igi->igi_version == IGMP_VERSION_1)
 		goto out_locked;
 
 	igmp_set_version(igi, IGMP_VERSION_2);
 
 	timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
 	if (timer == 0)
 		timer = 1;
 
 	if (is_general_query) {
 		/*
 		 * For each reporting group joined on this
 		 * interface, kick the report timer.
 		 */
 		CTR2(KTR_IGMPV3, "process v2 general query on ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_INET ||
 			    ifma->ifma_protospec == NULL)
 				continue;
 			inm = (struct in_multi *)ifma->ifma_protospec;
 			igmp_v2_update_group(inm, timer);
 		}
 		IF_ADDR_RUNLOCK(ifp);
 	} else {
 		/*
 		 * Group-specific IGMPv2 query, we need only
 		 * look up the single group to process it.
 		 */
 		inm = inm_lookup(ifp, igmp->igmp_group);
 		if (inm != NULL) {
 			CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 			igmp_v2_update_group(inm, timer);
 		}
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Update the report timer on a group in response to an IGMPv2 query.
  *
  * If we are becoming the reporting member for this group, start the timer.
  * If we already are the reporting member for this group, and timer is
  * below the threshold, reset it.
  *
  * We may be updating the group for the first time since we switched
  * to IGMPv3. If we are, then we must clear any recorded source lists,
  * and transition to REPORTING state; the group timer is overloaded
  * for group and group-source query responses. 
  *
  * Unlike IGMPv3, the delay per group should be jittered
  * to avoid bursts of IGMPv2 reports.
  */
 static void
 igmp_v2_update_group(struct in_multi *inm, const int timer)
 {
 
 	CTR4(KTR_IGMPV3, "%s: %s/%s timer=%d", __func__,
 	    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, timer);
 
 	IN_MULTI_LOCK_ASSERT();
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 		break;
 	case IGMP_REPORTING_MEMBER:
 		if (inm->inm_timer != 0 &&
 		    inm->inm_timer <= timer) {
 			CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, "
 			    "skipping.", __func__);
 			break;
 		}
 		/* FALLTHROUGH */
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 		CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__);
 		inm->inm_state = IGMP_REPORTING_MEMBER;
 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 		V_current_state_timers_running = 1;
 		break;
 	case IGMP_SLEEPING_MEMBER:
 		CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__);
 		inm->inm_state = IGMP_AWAKENING_MEMBER;
 		break;
 	case IGMP_LEAVING_MEMBER:
 		break;
 	}
 }
 
 /*
  * Process a received IGMPv3 general, group-specific or
  * group-and-source-specific query.
  * Assumes m has already been pulled up to the full IGMP message length.
  * Return 0 if successful, otherwise an appropriate error code is returned.
  */
 static int
 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
     /*const*/ struct igmpv3 *igmpv3)
 {
 	struct igmp_ifinfo	*igi;
 	struct in_multi		*inm;
 	int			 is_general_query;
 	uint32_t		 maxresp, nsrc, qqi;
 	uint16_t		 timer;
 	uint8_t			 qrv;
 
 	is_general_query = 0;
 
 	CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname);
 
 	maxresp = igmpv3->igmp_code;	/* in 1/10ths of a second */
 	if (maxresp >= 128) {
 		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
 			  (IGMP_EXP(igmpv3->igmp_code) + 3);
 	}
 
 	/*
 	 * Robustness must never be less than 2 for on-wire IGMPv3.
 	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
 	 * an exception for interfaces whose IGMPv3 state changes
 	 * are redirected to loopback (e.g. MANET).
 	 */
 	qrv = IGMP_QRV(igmpv3->igmp_misc);
 	if (qrv < 2) {
 		CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__,
 		    qrv, IGMP_RV_INIT);
 		qrv = IGMP_RV_INIT;
 	}
 
 	qqi = igmpv3->igmp_qqi;
 	if (qqi >= 128) {
 		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
 		     (IGMP_EXP(igmpv3->igmp_qqi) + 3);
 	}
 
 	timer = maxresp * PR_FASTHZ / IGMP_TIMER_SCALE;
 	if (timer == 0)
 		timer = 1;
 
 	nsrc = ntohs(igmpv3->igmp_numsrc);
 
 	/*
 	 * Validate address fields and versions upfront before
 	 * accepting v3 query.
 	 * XXX SMPng: Unlocked access to igmpstat counters here.
 	 */
 	if (in_nullhost(igmpv3->igmp_group)) {
 		/*
 		 * IGMPv3 General Query.
 		 *
 		 * General Queries SHOULD be directed to 224.0.0.1.
 		 * A general query with a source list has undefined
 		 * behaviour; discard it.
 		 */
 		IGMPSTAT_INC(igps_rcv_gen_queries);
 		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
 			IGMPSTAT_INC(igps_rcv_badqueries);
 			return (0);
 		}
 		is_general_query = 1;
 	} else {
 		/* Group or group-source specific query. */
 		if (nsrc == 0)
 			IGMPSTAT_INC(igps_rcv_group_queries);
 		else
 			IGMPSTAT_INC(igps_rcv_gsr_queries);
 	}
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	if (igi->igi_flags & IGIF_LOOPBACK) {
 		CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		goto out_locked;
 	}
 
 	/*
 	 * Discard the v3 query if we're in Compatibility Mode.
 	 * The RFC is not obviously worded that hosts need to stay in
 	 * compatibility mode until the Old Version Querier Present
 	 * timer expires.
 	 */
 	if (igi->igi_version != IGMP_VERSION_3) {
 		CTR3(KTR_IGMPV3, "ignore v3 query in v%d mode on ifp %p(%s)",
 		    igi->igi_version, ifp, ifp->if_xname);
 		goto out_locked;
 	}
 
 	igmp_set_version(igi, IGMP_VERSION_3);
 	igi->igi_rv = qrv;
 	igi->igi_qi = qqi;
 	igi->igi_qri = maxresp;
 
 	CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi,
 	    maxresp);
 
 	if (is_general_query) {
 		/*
 		 * Schedule a current-state report on this ifp for
 		 * all groups, possibly containing source lists.
 		 * If there is a pending General Query response
 		 * scheduled earlier than the selected delay, do
 		 * not schedule any other reports.
 		 * Otherwise, reset the interface timer.
 		 */
 		CTR2(KTR_IGMPV3, "process v3 general query on ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
 			igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
 			V_interface_timers_running = 1;
 		}
 	} else {
 		/*
 		 * Group-source-specific queries are throttled on
 		 * a per-group basis to defeat denial-of-service attempts.
 		 * Queries for groups we are not a member of on this
 		 * link are simply ignored.
 		 */
 		inm = inm_lookup(ifp, igmpv3->igmp_group);
 		if (inm == NULL)
 			goto out_locked;
 		if (nsrc > 0) {
 			if (!ratecheck(&inm->inm_lastgsrtv,
 			    &V_igmp_gsrdelay)) {
 				CTR1(KTR_IGMPV3, "%s: GS query throttled.",
 				    __func__);
 				IGMPSTAT_INC(igps_drop_gsr_queries);
 				goto out_locked;
 			}
 		}
 		CTR3(KTR_IGMPV3, "process v3 %s query on ifp %p(%s)",
 		     inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_xname);
 		/*
 		 * If there is a pending General Query response
 		 * scheduled sooner than the selected delay, no
 		 * further report need be scheduled.
 		 * Otherwise, prepare to respond to the
 		 * group-specific or group-and-source query.
 		 */
 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer)
 			igmp_input_v3_group_query(inm, igi, timer, igmpv3);
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Process a recieved IGMPv3 group-specific or group-and-source-specific
  * query.
  * Return <0 if any error occured. Currently this is ignored.
  */
 static int
 igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifinfo *igi,
     int timer, /*const*/ struct igmpv3 *igmpv3)
 {
 	int			 retval;
 	uint16_t		 nsrc;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	retval = 0;
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		return (retval);
 		break;
 	case IGMP_REPORTING_MEMBER:
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 		break;
 	}
 
 	nsrc = ntohs(igmpv3->igmp_numsrc);
 
 	/*
 	 * Deal with group-specific queries upfront.
 	 * If any group query is already pending, purge any recorded
 	 * source-list state if it exists, and schedule a query response
 	 * for this group-specific query.
 	 */
 	if (nsrc == 0) {
 		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
 		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
 			inm_clear_recorded(inm);
 			timer = min(inm->inm_timer, timer);
 		}
 		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 		V_current_state_timers_running = 1;
 		return (retval);
 	}
 
 	/*
 	 * Deal with the case where a group-and-source-specific query has
 	 * been received but a group-specific query is already pending.
 	 */
 	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
 		timer = min(inm->inm_timer, timer);
 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 		V_current_state_timers_running = 1;
 		return (retval);
 	}
 
 	/*
 	 * Finally, deal with the case where a group-and-source-specific
 	 * query has been received, where a response to a previous g-s-r
 	 * query exists, or none exists.
 	 * In this case, we need to parse the source-list which the Querier
 	 * has provided us with and check if we have any source list filter
 	 * entries at T1 for these sources. If we do not, there is no need
 	 * schedule a report and the query may be dropped.
 	 * If we do, we must record them and schedule a current-state
 	 * report for those sources.
 	 * FIXME: Handling source lists larger than 1 mbuf requires that
 	 * we pass the mbuf chain pointer down to this function, and use
 	 * m_getptr() to walk the chain.
 	 */
 	if (inm->inm_nsrc > 0) {
 		const struct in_addr	*ap;
 		int			 i, nrecorded;
 
 		ap = (const struct in_addr *)(igmpv3 + 1);
 		nrecorded = 0;
 		for (i = 0; i < nsrc; i++, ap++) {
 			retval = inm_record_source(inm, ap->s_addr);
 			if (retval < 0)
 				break;
 			nrecorded += retval;
 		}
 		if (nrecorded > 0) {
 			CTR1(KTR_IGMPV3,
 			    "%s: schedule response to SG query", __func__);
 			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
 			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 			V_current_state_timers_running = 1;
 		}
 	}
 
 	return (retval);
 }
 
 /*
  * Process a received IGMPv1 host membership report.
  *
  * NOTE: 0.0.0.0 workaround breaks const correctness.
  */
 static int
 igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
     /*const*/ struct igmp *igmp)
 {
 	struct in_ifaddr *ia;
 	struct in_multi *inm;
 
 	IGMPSTAT_INC(igps_rcv_reports);
 
 	if (ifp->if_flags & IFF_LOOPBACK)
 		return (0);
 
 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
 		IGMPSTAT_INC(igps_rcv_badreports);
 		return (EINVAL);
 	}
 
 	/*
 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
 	 * Booting clients may use the source address 0.0.0.0. Some
 	 * IGMP daemons may not know how to use IP_RECVIF to determine
 	 * the interface upon which this message was received.
 	 * Replace 0.0.0.0 with the subnet address if told to do so.
 	 */
 	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
 		IFP_TO_IA(ifp, ia);
 		if (ia != NULL) {
 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
 			ifa_free(&ia->ia_ifa);
 		}
 	}
 
 	CTR3(KTR_IGMPV3, "process v1 report %s on ifp %p(%s)",
 	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 
 	/*
 	 * IGMPv1 report suppression.
 	 * If we are a member of this group, and our membership should be
 	 * reported, stop our group timer and transition to the 'lazy' state.
 	 */
 	IN_MULTI_LOCK();
 	inm = inm_lookup(ifp, igmp->igmp_group);
 	if (inm != NULL) {
 		struct igmp_ifinfo *igi;
 
 		igi = inm->inm_igi;
 		if (igi == NULL) {
 			KASSERT(igi != NULL,
 			    ("%s: no igi for ifp %p", __func__, ifp));
 			goto out_locked;
 		}
 
 		IGMPSTAT_INC(igps_rcv_ourreports);
 
 		/*
 		 * If we are in IGMPv3 host mode, do not allow the
 		 * other host's IGMPv1 report to suppress our reports
 		 * unless explicitly configured to do so.
 		 */
 		if (igi->igi_version == IGMP_VERSION_3) {
 			if (V_igmp_legacysupp)
 				igmp_v3_suppress_group_record(inm);
 			goto out_locked;
 		}
 
 		inm->inm_timer = 0;
 
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 			break;
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			CTR3(KTR_IGMPV3,
 			    "report suppressed for %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 		case IGMP_SLEEPING_MEMBER:
 			inm->inm_state = IGMP_SLEEPING_MEMBER;
 			break;
 		case IGMP_REPORTING_MEMBER:
 			CTR3(KTR_IGMPV3,
 			    "report suppressed for %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 			if (igi->igi_version == IGMP_VERSION_1)
 				inm->inm_state = IGMP_LAZY_MEMBER;
 			else if (igi->igi_version == IGMP_VERSION_2)
 				inm->inm_state = IGMP_SLEEPING_MEMBER;
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 
 out_locked:
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Process a received IGMPv2 host membership report.
  *
  * NOTE: 0.0.0.0 workaround breaks const correctness.
  */
 static int
 igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
     /*const*/ struct igmp *igmp)
 {
 	struct in_ifaddr *ia;
 	struct in_multi *inm;
 
 	/*
 	 * Make sure we don't hear our own membership report.  Fast
 	 * leave requires knowing that we are the only member of a
 	 * group.
 	 */
 	IFP_TO_IA(ifp, ia);
 	if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
 		ifa_free(&ia->ia_ifa);
 		return (0);
 	}
 
 	IGMPSTAT_INC(igps_rcv_reports);
 
 	if (ifp->if_flags & IFF_LOOPBACK) {
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		return (0);
 	}
 
 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		IGMPSTAT_INC(igps_rcv_badreports);
 		return (EINVAL);
 	}
 
 	/*
 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
 	 * Booting clients may use the source address 0.0.0.0. Some
 	 * IGMP daemons may not know how to use IP_RECVIF to determine
 	 * the interface upon which this message was received.
 	 * Replace 0.0.0.0 with the subnet address if told to do so.
 	 */
 	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
 		if (ia != NULL)
 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
 	}
 	if (ia != NULL)
 		ifa_free(&ia->ia_ifa);
 
 	CTR3(KTR_IGMPV3, "process v2 report %s on ifp %p(%s)",
 	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 
 	/*
 	 * IGMPv2 report suppression.
 	 * If we are a member of this group, and our membership should be
 	 * reported, and our group timer is pending or about to be reset,
 	 * stop our group timer by transitioning to the 'lazy' state.
 	 */
 	IN_MULTI_LOCK();
 	inm = inm_lookup(ifp, igmp->igmp_group);
 	if (inm != NULL) {
 		struct igmp_ifinfo *igi;
 
 		igi = inm->inm_igi;
 		KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp));
 
 		IGMPSTAT_INC(igps_rcv_ourreports);
 
 		/*
 		 * If we are in IGMPv3 host mode, do not allow the
 		 * other host's IGMPv1 report to suppress our reports
 		 * unless explicitly configured to do so.
 		 */
 		if (igi->igi_version == IGMP_VERSION_3) {
 			if (V_igmp_legacysupp)
 				igmp_v3_suppress_group_record(inm);
 			goto out_locked;
 		}
 
 		inm->inm_timer = 0;
 
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 			break;
 		case IGMP_REPORTING_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			CTR3(KTR_IGMPV3,
 			    "report suppressed for %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 		case IGMP_LAZY_MEMBER:
 			inm->inm_state = IGMP_LAZY_MEMBER;
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 
 out_locked:
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 void
 igmp_input(struct mbuf *m, int off)
 {
 	int iphlen;
 	struct ifnet *ifp;
 	struct igmp *igmp;
 	struct ip *ip;
 	int igmplen;
 	int minlen;
 	int queryver;
 
 	CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, m, off);
 
 	ifp = m->m_pkthdr.rcvif;
 
 	IGMPSTAT_INC(igps_rcv_total);
 
 	ip = mtod(m, struct ip *);
 	iphlen = off;
 	igmplen = ip->ip_len;
 
 	/*
 	 * Validate lengths.
 	 */
 	if (igmplen < IGMP_MINLEN) {
 		IGMPSTAT_INC(igps_rcv_tooshort);
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Always pullup to the minimum size for v1/v2 or v3
 	 * to amortize calls to m_pullup().
 	 */
 	minlen = iphlen;
 	if (igmplen >= IGMP_V3_QUERY_MINLEN)
 		minlen += IGMP_V3_QUERY_MINLEN;
 	else
 		minlen += IGMP_MINLEN;
 	if ((m->m_flags & M_EXT || m->m_len < minlen) &&
 	    (m = m_pullup(m, minlen)) == 0) {
 		IGMPSTAT_INC(igps_rcv_tooshort);
 		return;
 	}
 	ip = mtod(m, struct ip *);
 
 	/*
 	 * Validate checksum.
 	 */
 	m->m_data += iphlen;
 	m->m_len -= iphlen;
 	igmp = mtod(m, struct igmp *);
 	if (in_cksum(m, igmplen)) {
 		IGMPSTAT_INC(igps_rcv_badsum);
 		m_freem(m);
 		return;
 	}
 	m->m_data -= iphlen;
 	m->m_len += iphlen;
 
 	/*
 	 * IGMP control traffic is link-scope, and must have a TTL of 1.
 	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
 	 * probe packets may come from beyond the LAN.
 	 */
 	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
 		IGMPSTAT_INC(igps_rcv_badttl);
 		m_freem(m);
 		return;
 	}
 
 	switch (igmp->igmp_type) {
 	case IGMP_HOST_MEMBERSHIP_QUERY:
 		if (igmplen == IGMP_MINLEN) {
 			if (igmp->igmp_code == 0)
 				queryver = IGMP_VERSION_1;
 			else
 				queryver = IGMP_VERSION_2;
 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
 			queryver = IGMP_VERSION_3;
 		} else {
 			IGMPSTAT_INC(igps_rcv_tooshort);
 			m_freem(m);
 			return;
 		}
 
 		switch (queryver) {
 		case IGMP_VERSION_1:
 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
 			if (!V_igmp_v1enable)
 				break;
 			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
 				m_freem(m);
 				return;
 			}
 			break;
 
 		case IGMP_VERSION_2:
 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
 			if (!V_igmp_v2enable)
 				break;
 			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
 				m_freem(m);
 				return;
 			}
 			break;
 
 		case IGMP_VERSION_3: {
 				struct igmpv3 *igmpv3;
 				uint16_t igmpv3len;
 				uint16_t nsrc;
-				int srclen;
 
 				IGMPSTAT_INC(igps_rcv_v3_queries);
 				igmpv3 = (struct igmpv3 *)igmp;
 				/*
 				 * Validate length based on source count.
 				 */
 				nsrc = ntohs(igmpv3->igmp_numsrc);
-				srclen = sizeof(struct in_addr) * nsrc;
-				if (nsrc * sizeof(in_addr_t) > srclen) {
+				if (nsrc * sizeof(in_addr_t) >
+				    UINT16_MAX - iphlen - IGMP_V3_QUERY_MINLEN) {
 					IGMPSTAT_INC(igps_rcv_tooshort);
 					return;
 				}
 				/*
 				 * m_pullup() may modify m, so pullup in
 				 * this scope.
 				 */
 				igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN +
-				    srclen;
+				    sizeof(struct in_addr) * nsrc;
 				if ((m->m_flags & M_EXT ||
 				     m->m_len < igmpv3len) &&
 				    (m = m_pullup(m, igmpv3len)) == NULL) {
 					IGMPSTAT_INC(igps_rcv_tooshort);
 					return;
 				}
 				igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *)
 				    + iphlen);
 				if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
 					m_freem(m);
 					return;
 				}
 			}
 			break;
 		}
 		break;
 
 	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
 		if (!V_igmp_v1enable)
 			break;
 		if (igmp_input_v1_report(ifp, ip, igmp) != 0) {
 			m_freem(m);
 			return;
 		}
 		break;
 
 	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
 		if (!V_igmp_v2enable)
 			break;
 		if (!ip_checkrouteralert(m))
 			IGMPSTAT_INC(igps_rcv_nora);
 		if (igmp_input_v2_report(ifp, ip, igmp) != 0) {
 			m_freem(m);
 			return;
 		}
 		break;
 
 	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
 		/*
 		 * Hosts do not need to process IGMPv3 membership reports,
 		 * as report suppression is no longer required.
 		 */
 		if (!ip_checkrouteralert(m))
 			IGMPSTAT_INC(igps_rcv_nora);
 		break;
 
 	default:
 		break;
 	}
 
 	/*
 	 * Pass all valid IGMP packets up to any process(es) listening on a
 	 * raw IGMP socket.
 	 */
 	rip_input(m, off);
 }
 
 
 /*
  * Fast timeout handler (global).
  * VIMAGE: Timeout handlers are expected to service all vimages.
  */
 void
 igmp_fasttimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		igmp_fasttimo_vnet();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Fast timeout handler (per-vnet).
  * Sends are shuffled off to a netisr to deal with Giant.
  *
  * VIMAGE: Assume caller has set up our curvnet.
  */
 static void
 igmp_fasttimo_vnet(void)
 {
 	struct ifqueue		 scq;	/* State-change packets */
 	struct ifqueue		 qrq;	/* Query response packets */
 	struct ifnet		*ifp;
 	struct igmp_ifinfo	*igi;
 	struct ifmultiaddr	*ifma;
 	struct in_multi		*inm;
 	int			 loop, uri_fasthz;
 
 	loop = 0;
 	uri_fasthz = 0;
 
 	/*
 	 * Quick check to see if any work needs to be done, in order to
 	 * minimize the overhead of fasttimo processing.
 	 * SMPng: XXX Unlocked reads.
 	 */
 	if (!V_current_state_timers_running &&
 	    !V_interface_timers_running &&
 	    !V_state_change_timers_running)
 		return;
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	/*
 	 * IGMPv3 General Query response timer processing.
 	 */
 	if (V_interface_timers_running) {
 		CTR1(KTR_IGMPV3, "%s: interface timers running", __func__);
 
 		V_interface_timers_running = 0;
 		LIST_FOREACH(igi, &V_igi_head, igi_link) {
 			if (igi->igi_v3_timer == 0) {
 				/* Do nothing. */
 			} else if (--igi->igi_v3_timer == 0) {
 				igmp_v3_dispatch_general_query(igi);
 			} else {
 				V_interface_timers_running = 1;
 			}
 		}
 	}
 
 	if (!V_current_state_timers_running &&
 	    !V_state_change_timers_running)
 		goto out_locked;
 
 	V_current_state_timers_running = 0;
 	V_state_change_timers_running = 0;
 
 	CTR1(KTR_IGMPV3, "%s: state change timers running", __func__);
 
 	/*
 	 * IGMPv1/v2/v3 host report and state-change timer processing.
 	 * Note: Processing a v3 group timer may remove a node.
 	 */
 	LIST_FOREACH(igi, &V_igi_head, igi_link) {
 		ifp = igi->igi_ifp;
 
 		if (igi->igi_version == IGMP_VERSION_3) {
 			loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
 			uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri *
 			    PR_FASTHZ);
 
 			memset(&qrq, 0, sizeof(struct ifqueue));
 			IFQ_SET_MAXLEN(&qrq, IGMP_MAX_G_GS_PACKETS);
 
 			memset(&scq, 0, sizeof(struct ifqueue));
 			IFQ_SET_MAXLEN(&scq, IGMP_MAX_STATE_CHANGE_PACKETS);
 		}
 
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_INET ||
 			    ifma->ifma_protospec == NULL)
 				continue;
 			inm = (struct in_multi *)ifma->ifma_protospec;
 			switch (igi->igi_version) {
 			case IGMP_VERSION_1:
 			case IGMP_VERSION_2:
 				igmp_v1v2_process_group_timer(inm,
 				    igi->igi_version);
 				break;
 			case IGMP_VERSION_3:
 				igmp_v3_process_group_timers(igi, &qrq,
 				    &scq, inm, uri_fasthz);
 				break;
 			}
 		}
 		IF_ADDR_RUNLOCK(ifp);
 
 		if (igi->igi_version == IGMP_VERSION_3) {
 			struct in_multi		*tinm;
 
 			igmp_dispatch_queue(&qrq, 0, loop);
 			igmp_dispatch_queue(&scq, 0, loop);
 
 			/*
 			 * Free the in_multi reference(s) for this
 			 * IGMP lifecycle.
 			 */
 			SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead,
 			    inm_nrele, tinm) {
 				SLIST_REMOVE_HEAD(&igi->igi_relinmhead,
 				    inm_nrele);
 				inm_release_locked(inm);
 			}
 		}
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 }
 
 /*
  * Update host report group timer for IGMPv1/v2.
  * Will update the global pending timer flags.
  */
 static void
 igmp_v1v2_process_group_timer(struct in_multi *inm, const int version)
 {
 	int report_timer_expired;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	if (inm->inm_timer == 0) {
 		report_timer_expired = 0;
 	} else if (--inm->inm_timer == 0) {
 		report_timer_expired = 1;
 	} else {
 		V_current_state_timers_running = 1;
 		return;
 	}
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 		break;
 	case IGMP_REPORTING_MEMBER:
 		if (report_timer_expired) {
 			inm->inm_state = IGMP_IDLE_MEMBER;
 			(void)igmp_v1v2_queue_report(inm,
 			    (version == IGMP_VERSION_2) ?
 			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
 			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
 		}
 		break;
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		break;
 	}
 }
 
 /*
  * Update a group's timers for IGMPv3.
  * Will update the global pending timer flags.
  * Note: Unlocked read from igi.
  */
 static void
 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
     struct ifqueue *qrq, struct ifqueue *scq,
     struct in_multi *inm, const int uri_fasthz)
 {
 	int query_response_timer_expired;
 	int state_change_retransmit_timer_expired;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	query_response_timer_expired = 0;
 	state_change_retransmit_timer_expired = 0;
 
 	/*
 	 * During a transition from v1/v2 compatibility mode back to v3,
 	 * a group record in REPORTING state may still have its group
 	 * timer active. This is a no-op in this function; it is easier
 	 * to deal with it here than to complicate the slow-timeout path.
 	 */
 	if (inm->inm_timer == 0) {
 		query_response_timer_expired = 0;
 	} else if (--inm->inm_timer == 0) {
 		query_response_timer_expired = 1;
 	} else {
 		V_current_state_timers_running = 1;
 	}
 
 	if (inm->inm_sctimer == 0) {
 		state_change_retransmit_timer_expired = 0;
 	} else if (--inm->inm_sctimer == 0) {
 		state_change_retransmit_timer_expired = 1;
 	} else {
 		V_state_change_timers_running = 1;
 	}
 
 	/* We are in fasttimo, so be quick about it. */
 	if (!state_change_retransmit_timer_expired &&
 	    !query_response_timer_expired)
 		return;
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 		break;
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 		/*
 		 * Respond to a previously pending Group-Specific
 		 * or Group-and-Source-Specific query by enqueueing
 		 * the appropriate Current-State report for
 		 * immediate transmission.
 		 */
 		if (query_response_timer_expired) {
 			int retval;
 
 			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
 			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
 			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 			    __func__, retval);
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			/* XXX Clear recorded sources for next time. */
 			inm_clear_recorded(inm);
 		}
 		/* FALLTHROUGH */
 	case IGMP_REPORTING_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		if (state_change_retransmit_timer_expired) {
 			/*
 			 * State-change retransmission timer fired.
 			 * If there are any further pending retransmissions,
 			 * set the global pending state-change flag, and
 			 * reset the timer.
 			 */
 			if (--inm->inm_scrv > 0) {
 				inm->inm_sctimer = uri_fasthz;
 				V_state_change_timers_running = 1;
 			}
 			/*
 			 * Retransmit the previously computed state-change
 			 * report. If there are no further pending
 			 * retransmissions, the mbuf queue will be consumed.
 			 * Update T0 state to T1 as we have now sent
 			 * a state-change.
 			 */
 			(void)igmp_v3_merge_state_changes(inm, scq);
 
 			inm_commit(inm);
 			CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 			    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 
 			/*
 			 * If we are leaving the group for good, make sure
 			 * we release IGMP's reference to it.
 			 * This release must be deferred using a SLIST,
 			 * as we are called from a loop which traverses
 			 * the in_ifmultiaddr TAILQ.
 			 */
 			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
 			    inm->inm_scrv == 0) {
 				inm->inm_state = IGMP_NOT_MEMBER;
 				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
 				    inm, inm_nrele);
 			}
 		}
 		break;
 	}
 }
 
 
 /*
  * Suppress a group's pending response to a group or source/group query.
  *
  * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
  * Do NOT update ST1/ST0 as this operation merely suppresses
  * the currently pending group record.
  * Do NOT suppress the response to a general query. It is possible but
  * it would require adding another state or flag.
  */
 static void
 igmp_v3_suppress_group_record(struct in_multi *inm)
 {
 
 	IN_MULTI_LOCK_ASSERT();
 
 	KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3,
 		("%s: not IGMPv3 mode on link", __func__));
 
 	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
 	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER)
 		return;
 
 	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
 		inm_clear_recorded(inm);
 
 	inm->inm_timer = 0;
 	inm->inm_state = IGMP_REPORTING_MEMBER;
 }
 
 /*
  * Switch to a different IGMP version on the given interface,
  * as per Section 7.2.1.
  */
 static void
 igmp_set_version(struct igmp_ifinfo *igi, const int version)
 {
 	int old_version_timer;
 
 	IGMP_LOCK_ASSERT();
 
 	CTR4(KTR_IGMPV3, "%s: switching to v%d on ifp %p(%s)", __func__,
 	    version, igi->igi_ifp, igi->igi_ifp->if_xname);
 
 	if (version == IGMP_VERSION_1 || version == IGMP_VERSION_2) {
 		/*
 		 * Compute the "Older Version Querier Present" timer as per
 		 * Section 8.12.
 		 */
 		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
 		old_version_timer *= PR_SLOWHZ;
 
 		if (version == IGMP_VERSION_1) {
 			igi->igi_v1_timer = old_version_timer;
 			igi->igi_v2_timer = 0;
 		} else if (version == IGMP_VERSION_2) {
 			igi->igi_v1_timer = 0;
 			igi->igi_v2_timer = old_version_timer;
 		}
 	}
 
 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
 		if (igi->igi_version != IGMP_VERSION_2) {
 			igi->igi_version = IGMP_VERSION_2;
 			igmp_v3_cancel_link_timers(igi);
 		}
 	} else if (igi->igi_v1_timer > 0) {
 		if (igi->igi_version != IGMP_VERSION_1) {
 			igi->igi_version = IGMP_VERSION_1;
 			igmp_v3_cancel_link_timers(igi);
 		}
 	}
 }
 
 /*
  * Cancel pending IGMPv3 timers for the given link and all groups
  * joined on it; state-change, general-query, and group-query timers.
  *
  * Only ever called on a transition from v3 to Compatibility mode. Kill
  * the timers stone dead (this may be expensive for large N groups), they
  * will be restarted if Compatibility Mode deems that they must be due to
  * query processing.
  */
 static void
 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
 {
 	struct ifmultiaddr	*ifma;
 	struct ifnet		*ifp;
 	struct in_multi		*inm, *tinm;
 
 	CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__,
 	    igi->igi_ifp, igi->igi_ifp->if_xname);
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	/*
 	 * Stop the v3 General Query Response on this link stone dead.
 	 * If fasttimo is woken up due to V_interface_timers_running,
 	 * the flag will be cleared if there are no pending link timers.
 	 */
 	igi->igi_v3_timer = 0;
 
 	/*
 	 * Now clear the current-state and state-change report timers
 	 * for all memberships scoped to this link.
 	 */
 	ifp = igi->igi_ifp;
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			/*
 			 * These states are either not relevant in v3 mode,
 			 * or are unreported. Do nothing.
 			 */
 			break;
 		case IGMP_LEAVING_MEMBER:
 			/*
 			 * If we are leaving the group and switching to
 			 * compatibility mode, we need to release the final
 			 * reference held for issuing the INCLUDE {}, and
 			 * transition to REPORTING to ensure the host leave
 			 * message is sent upstream to the old querier --
 			 * transition to NOT would lose the leave and race.
 			 */
 			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
 			/* FALLTHROUGH */
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 			inm_clear_recorded(inm);
 			/* FALLTHROUGH */
 		case IGMP_REPORTING_MEMBER:
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			break;
 		}
 		/*
 		 * Always clear state-change and group report timers.
 		 * Free any pending IGMPv3 state-change records.
 		 */
 		inm->inm_sctimer = 0;
 		inm->inm_timer = 0;
 		_IF_DRAIN(&inm->inm_scq);
 	}
 	IF_ADDR_RUNLOCK(ifp);
 	SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, tinm) {
 		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
 		inm_release_locked(inm);
 	}
 }
 
 /*
  * Update the Older Version Querier Present timers for a link.
  * See Section 7.2.1 of RFC 3376.
  */
 static void
 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
 {
 
 	IGMP_LOCK_ASSERT();
 
 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
 		/*
 		 * IGMPv1 and IGMPv2 Querier Present timers expired.
 		 *
 		 * Revert to IGMPv3.
 		 */
 		if (igi->igi_version != IGMP_VERSION_3) {
 			CTR5(KTR_IGMPV3,
 			    "%s: transition from v%d -> v%d on %p(%s)",
 			    __func__, igi->igi_version, IGMP_VERSION_3,
 			    igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_version = IGMP_VERSION_3;
 		}
 	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
 		/*
 		 * IGMPv1 Querier Present timer expired,
 		 * IGMPv2 Querier Present timer running.
 		 * If IGMPv2 was disabled since last timeout,
 		 * revert to IGMPv3.
 		 * If IGMPv2 is enabled, revert to IGMPv2.
 		 */
 		if (!V_igmp_v2enable) {
 			CTR5(KTR_IGMPV3,
 			    "%s: transition from v%d -> v%d on %p(%s)",
 			    __func__, igi->igi_version, IGMP_VERSION_3,
 			    igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_v2_timer = 0;
 			igi->igi_version = IGMP_VERSION_3;
 		} else {
 			--igi->igi_v2_timer;
 			if (igi->igi_version != IGMP_VERSION_2) {
 				CTR5(KTR_IGMPV3,
 				    "%s: transition from v%d -> v%d on %p(%s)",
 				    __func__, igi->igi_version, IGMP_VERSION_2,
 				    igi->igi_ifp, igi->igi_ifp->if_xname);
 				igi->igi_version = IGMP_VERSION_2;
 			}
 		}
 	} else if (igi->igi_v1_timer > 0) {
 		/*
 		 * IGMPv1 Querier Present timer running.
 		 * Stop IGMPv2 timer if running.
 		 *
 		 * If IGMPv1 was disabled since last timeout,
 		 * revert to IGMPv3.
 		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
 		 */
 		if (!V_igmp_v1enable) {
 			CTR5(KTR_IGMPV3,
 			    "%s: transition from v%d -> v%d on %p(%s)",
 			    __func__, igi->igi_version, IGMP_VERSION_3,
 			    igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_v1_timer = 0;
 			igi->igi_version = IGMP_VERSION_3;
 		} else {
 			--igi->igi_v1_timer;
 		}
 		if (igi->igi_v2_timer > 0) {
 			CTR3(KTR_IGMPV3,
 			    "%s: cancel v2 timer on %p(%s)",
 			    __func__, igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_v2_timer = 0;
 		}
 	}
 }
 
 /*
  * Global slowtimo handler.
  * VIMAGE: Timeout handlers are expected to service all vimages.
  */
 void
 igmp_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		igmp_slowtimo_vnet();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Per-vnet slowtimo handler.
  */
 static void
 igmp_slowtimo_vnet(void)
 {
 	struct igmp_ifinfo *igi;
 
 	IGMP_LOCK();
 
 	LIST_FOREACH(igi, &V_igi_head, igi_link) {
 		igmp_v1v2_process_querier_timers(igi);
 	}
 
 	IGMP_UNLOCK();
 }
 
 /*
  * Dispatch an IGMPv1/v2 host report or leave message.
  * These are always small enough to fit inside a single mbuf.
  */
 static int
 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
 {
 	struct ifnet		*ifp;
 	struct igmp		*igmp;
 	struct ip		*ip;
 	struct mbuf		*m;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	ifp = inm->inm_ifp;
 
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOMEM);
 	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
 
 	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
 
 	m->m_data += sizeof(struct ip);
 	m->m_len = sizeof(struct igmp);
 
 	igmp = mtod(m, struct igmp *);
 	igmp->igmp_type = type;
 	igmp->igmp_code = 0;
 	igmp->igmp_group = inm->inm_addr;
 	igmp->igmp_cksum = 0;
 	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
 
 	m->m_data -= sizeof(struct ip);
 	m->m_len += sizeof(struct ip);
 
 	ip = mtod(m, struct ip *);
 	ip->ip_tos = 0;
 	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
 	ip->ip_off = 0;
 	ip->ip_p = IPPROTO_IGMP;
 	ip->ip_src.s_addr = INADDR_ANY;
 
 	if (type == IGMP_HOST_LEAVE_MESSAGE)
 		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
 	else
 		ip->ip_dst = inm->inm_addr;
 
 	igmp_save_context(m, ifp);
 
 	m->m_flags |= M_IGMPV2;
 	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK)
 		m->m_flags |= M_IGMP_LOOP;
 
 	CTR2(KTR_IGMPV3, "%s: netisr_dispatch(NETISR_IGMP, %p)", __func__, m);
 	netisr_dispatch(NETISR_IGMP, m);
 
 	return (0);
 }
 
 /*
  * Process a state change from the upper layer for the given IPv4 group.
  *
  * Each socket holds a reference on the in_multi in its own ip_moptions.
  * The socket layer will have made the necessary updates to.the group
  * state, it is now up to IGMP to issue a state change report if there
  * has been any change between T0 (when the last state-change was issued)
  * and T1 (now).
  *
  * We use the IGMPv3 state machine at group level. The IGMP module
  * however makes the decision as to which IGMP protocol version to speak.
  * A state change *from* INCLUDE {} always means an initial join.
  * A state change *to* INCLUDE {} always means a final leave.
  *
  * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
  * save ourselves a bunch of work; any exclusive mode groups need not
  * compute source filter lists.
  *
  * VIMAGE: curvnet should have been set by caller, as this routine
  * is called from the socket option handlers.
  */
 int
 igmp_change_state(struct in_multi *inm)
 {
 	struct igmp_ifinfo *igi;
 	struct ifnet *ifp;
 	int error;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	error = 0;
 
 	/*
 	 * Try to detect if the upper layer just asked us to change state
 	 * for an interface which has now gone away.
 	 */
 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
 	ifp = inm->inm_ifma->ifma_ifp;
 	/*
 	 * Sanity check that netinet's notion of ifp is the
 	 * same as net's.
 	 */
 	KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
 
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	/*
 	 * If we detect a state transition to or from MCAST_UNDEFINED
 	 * for this group, then we are starting or finishing an IGMP
 	 * life cycle for this group.
 	 */
 	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
 		CTR3(KTR_IGMPV3, "%s: inm transition %d -> %d", __func__,
 		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode);
 		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
 			CTR1(KTR_IGMPV3, "%s: initial join", __func__);
 			error = igmp_initial_join(inm, igi);
 			goto out_locked;
 		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
 			CTR1(KTR_IGMPV3, "%s: final leave", __func__);
 			igmp_final_leave(inm, igi);
 			goto out_locked;
 		}
 	} else {
 		CTR1(KTR_IGMPV3, "%s: filter set change", __func__);
 	}
 
 	error = igmp_handle_state_change(inm, igi);
 
 out_locked:
 	IGMP_UNLOCK();
 	return (error);
 }
 
 /*
  * Perform the initial join for an IGMP group.
  *
  * When joining a group:
  *  If the group should have its IGMP traffic suppressed, do nothing.
  *  IGMPv1 starts sending IGMPv1 host membership reports.
  *  IGMPv2 starts sending IGMPv2 host membership reports.
  *  IGMPv3 will schedule an IGMPv3 state-change report containing the
  *  initial state of the membership.
  */
 static int
 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi)
 {
 	struct ifnet		*ifp;
 	struct ifqueue		*ifq;
 	int			 error, retval, syncstates;
 
 	CTR4(KTR_IGMPV3, "%s: initial join %s on ifp %p(%s)",
 	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
 	    inm->inm_ifp->if_xname);
 
 	error = 0;
 	syncstates = 1;
 
 	ifp = inm->inm_ifp;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
 
 	/*
 	 * Groups joined on loopback or marked as 'not reported',
 	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
 	 * are never reported in any IGMP protocol exchanges.
 	 * All other groups enter the appropriate IGMP state machine
 	 * for the version in use on this link.
 	 * A link marked as IGIF_SILENT causes IGMP to be completely
 	 * disabled for the link.
 	 */
 	if ((ifp->if_flags & IFF_LOOPBACK) ||
 	    (igi->igi_flags & IGIF_SILENT) ||
 	    !igmp_isgroupreported(inm->inm_addr)) {
 		CTR1(KTR_IGMPV3,
 "%s: not kicking state machine for silent group", __func__);
 		inm->inm_state = IGMP_SILENT_MEMBER;
 		inm->inm_timer = 0;
 	} else {
 		/*
 		 * Deal with overlapping in_multi lifecycle.
 		 * If this group was LEAVING, then make sure
 		 * we drop the reference we picked up to keep the
 		 * group around for the final INCLUDE {} enqueue.
 		 */
 		if (igi->igi_version == IGMP_VERSION_3 &&
 		    inm->inm_state == IGMP_LEAVING_MEMBER)
 			inm_release_locked(inm);
 
 		inm->inm_state = IGMP_REPORTING_MEMBER;
 
 		switch (igi->igi_version) {
 		case IGMP_VERSION_1:
 		case IGMP_VERSION_2:
 			inm->inm_state = IGMP_IDLE_MEMBER;
 			error = igmp_v1v2_queue_report(inm,
 			    (igi->igi_version == IGMP_VERSION_2) ?
 			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
 			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
 			if (error == 0) {
 				inm->inm_timer = IGMP_RANDOM_DELAY(
 				    IGMP_V1V2_MAX_RI * PR_FASTHZ);
 				V_current_state_timers_running = 1;
 			}
 			break;
 
 		case IGMP_VERSION_3:
 			/*
 			 * Defer update of T0 to T1, until the first copy
 			 * of the state change has been transmitted.
 			 */
 			syncstates = 0;
 
 			/*
 			 * Immediately enqueue a State-Change Report for
 			 * this interface, freeing any previous reports.
 			 * Don't kick the timers if there is nothing to do,
 			 * or if an error occurred.
 			 */
 			ifq = &inm->inm_scq;
 			_IF_DRAIN(ifq);
 			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
 			    0, 0);
 			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 			    __func__, retval);
 			if (retval <= 0) {
 				error = retval * -1;
 				break;
 			}
 
 			/*
 			 * Schedule transmission of pending state-change
 			 * report up to RV times for this link. The timer
 			 * will fire at the next igmp_fasttimo (~200ms),
 			 * giving us an opportunity to merge the reports.
 			 */
 			if (igi->igi_flags & IGIF_LOOPBACK) {
 				inm->inm_scrv = 1;
 			} else {
 				KASSERT(igi->igi_rv > 1,
 				   ("%s: invalid robustness %d", __func__,
 				    igi->igi_rv));
 				inm->inm_scrv = igi->igi_rv;
 			}
 			inm->inm_sctimer = 1;
 			V_state_change_timers_running = 1;
 
 			error = 0;
 			break;
 		}
 	}
 
 	/*
 	 * Only update the T0 state if state change is atomic,
 	 * i.e. we don't need to wait for a timer to fire before we
 	 * can consider the state change to have been communicated.
 	 */
 	if (syncstates) {
 		inm_commit(inm);
 		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 	}
 
 	return (error);
 }
 
 /*
  * Issue an intermediate state change during the IGMP life-cycle.
  */
 static int
 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi)
 {
 	struct ifnet		*ifp;
 	int			 retval;
 
 	CTR4(KTR_IGMPV3, "%s: state change for %s on ifp %p(%s)",
 	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
 	    inm->inm_ifp->if_xname);
 
 	ifp = inm->inm_ifp;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
 
 	if ((ifp->if_flags & IFF_LOOPBACK) ||
 	    (igi->igi_flags & IGIF_SILENT) ||
 	    !igmp_isgroupreported(inm->inm_addr) ||
 	    (igi->igi_version != IGMP_VERSION_3)) {
 		if (!igmp_isgroupreported(inm->inm_addr)) {
 			CTR1(KTR_IGMPV3,
 "%s: not kicking state machine for silent group", __func__);
 		}
 		CTR1(KTR_IGMPV3, "%s: nothing to do", __func__);
 		inm_commit(inm);
 		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 		return (0);
 	}
 
 	_IF_DRAIN(&inm->inm_scq);
 
 	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
 	CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval);
 	if (retval <= 0)
 		return (-retval);
 
 	/*
 	 * If record(s) were enqueued, start the state-change
 	 * report timer for this group.
 	 */
 	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv);
 	inm->inm_sctimer = 1;
 	V_state_change_timers_running = 1;
 
 	return (0);
 }
 
 /*
  * Perform the final leave for an IGMP group.
  *
  * When leaving a group:
  *  IGMPv1 does nothing.
  *  IGMPv2 sends a host leave message, if and only if we are the reporter.
  *  IGMPv3 enqueues a state-change report containing a transition
  *  to INCLUDE {} for immediate transmission.
  */
 static void
 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi)
 {
 	int syncstates;
 
 	syncstates = 1;
 
 	CTR4(KTR_IGMPV3, "%s: final leave %s on ifp %p(%s)",
 	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
 	    inm->inm_ifp->if_xname);
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		/* Already leaving or left; do nothing. */
 		CTR1(KTR_IGMPV3,
 "%s: not kicking state machine for silent group", __func__);
 		break;
 	case IGMP_REPORTING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 		if (igi->igi_version == IGMP_VERSION_2) {
 #ifdef INVARIANTS
 			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
 			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
 			panic("%s: IGMPv3 state reached, not IGMPv3 mode",
 			     __func__);
 #endif
 			igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE);
 			inm->inm_state = IGMP_NOT_MEMBER;
 		} else if (igi->igi_version == IGMP_VERSION_3) {
 			/*
 			 * Stop group timer and all pending reports.
 			 * Immediately enqueue a state-change report
 			 * TO_IN {} to be sent on the next fast timeout,
 			 * giving us an opportunity to merge reports.
 			 */
 			_IF_DRAIN(&inm->inm_scq);
 			inm->inm_timer = 0;
 			if (igi->igi_flags & IGIF_LOOPBACK) {
 				inm->inm_scrv = 1;
 			} else {
 				inm->inm_scrv = igi->igi_rv;
 			}
 			CTR4(KTR_IGMPV3, "%s: Leaving %s/%s with %d "
 			    "pending retransmissions.", __func__,
 			    inet_ntoa(inm->inm_addr),
 			    inm->inm_ifp->if_xname, inm->inm_scrv);
 			if (inm->inm_scrv == 0) {
 				inm->inm_state = IGMP_NOT_MEMBER;
 				inm->inm_sctimer = 0;
 			} else {
 				int retval;
 
 				inm_acquire_locked(inm);
 
 				retval = igmp_v3_enqueue_group_record(
 				    &inm->inm_scq, inm, 1, 0, 0);
 				KASSERT(retval != 0,
 				    ("%s: enqueue record = %d", __func__,
 				     retval));
 
 				inm->inm_state = IGMP_LEAVING_MEMBER;
 				inm->inm_sctimer = 1;
 				V_state_change_timers_running = 1;
 				syncstates = 0;
 			}
 			break;
 		}
 		break;
 	case IGMP_LAZY_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 		/* Our reports are suppressed; do nothing. */
 		break;
 	}
 
 	if (syncstates) {
 		inm_commit(inm);
 		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
 		CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for %s/%s",
 		    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 	}
 }
 
 /*
  * Enqueue an IGMPv3 group record to the given output queue.
  *
  * XXX This function could do with having the allocation code
  * split out, and the multiple-tree-walks coalesced into a single
  * routine as has been done in igmp_v3_enqueue_filter_change().
  *
  * If is_state_change is zero, a current-state record is appended.
  * If is_state_change is non-zero, a state-change report is appended.
  *
  * If is_group_query is non-zero, an mbuf packet chain is allocated.
  * If is_group_query is zero, and if there is a packet with free space
  * at the tail of the queue, it will be appended to providing there
  * is enough free space.
  * Otherwise a new mbuf packet chain is allocated.
  *
  * If is_source_query is non-zero, each source is checked to see if
  * it was recorded for a Group-Source query, and will be omitted if
  * it is not both in-mode and recorded.
  *
  * The function will attempt to allocate leading space in the packet
  * for the IP/IGMP header to be prepended without fragmenting the chain.
  *
  * If successful the size of all data appended to the queue is returned,
  * otherwise an error code less than zero is returned, or zero if
  * no record(s) were appended.
  */
 static int
 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
     const int is_state_change, const int is_group_query,
     const int is_source_query)
 {
 	struct igmp_grouprec	 ig;
 	struct igmp_grouprec	*pig;
 	struct ifnet		*ifp;
 	struct ip_msource	*ims, *nims;
 	struct mbuf		*m0, *m, *md;
 	int			 error, is_filter_list_change;
 	int			 minrec0len, m0srcs, msrcs, nbytes, off;
 	int			 record_has_sources;
 	int			 now;
 	int			 type;
 	in_addr_t		 naddr;
 	uint8_t			 mode;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	error = 0;
 	ifp = inm->inm_ifp;
 	is_filter_list_change = 0;
 	m = NULL;
 	m0 = NULL;
 	m0srcs = 0;
 	msrcs = 0;
 	nbytes = 0;
 	nims = NULL;
 	record_has_sources = 1;
 	pig = NULL;
 	type = IGMP_DO_NOTHING;
 	mode = inm->inm_st[1].iss_fmode;
 
 	/*
 	 * If we did not transition out of ASM mode during t0->t1,
 	 * and there are no source nodes to process, we can skip
 	 * the generation of source records.
 	 */
 	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
 	    inm->inm_nsrc == 0)
 		record_has_sources = 0;
 
 	if (is_state_change) {
 		/*
 		 * Queue a state change record.
 		 * If the mode did not change, and there are non-ASM
 		 * listeners or source filters present,
 		 * we potentially need to issue two records for the group.
 		 * If we are transitioning to MCAST_UNDEFINED, we need
 		 * not send any sources.
 		 * If there are ASM listeners, and there was no filter
 		 * mode transition of any kind, do nothing.
 		 */
 		if (mode != inm->inm_st[0].iss_fmode) {
 			if (mode == MCAST_EXCLUDE) {
 				CTR1(KTR_IGMPV3, "%s: change to EXCLUDE",
 				    __func__);
 				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
 			} else {
 				CTR1(KTR_IGMPV3, "%s: change to INCLUDE",
 				    __func__);
 				type = IGMP_CHANGE_TO_INCLUDE_MODE;
 				if (mode == MCAST_UNDEFINED)
 					record_has_sources = 0;
 			}
 		} else {
 			if (record_has_sources) {
 				is_filter_list_change = 1;
 			} else {
 				type = IGMP_DO_NOTHING;
 			}
 		}
 	} else {
 		/*
 		 * Queue a current state record.
 		 */
 		if (mode == MCAST_EXCLUDE) {
 			type = IGMP_MODE_IS_EXCLUDE;
 		} else if (mode == MCAST_INCLUDE) {
 			type = IGMP_MODE_IS_INCLUDE;
 			KASSERT(inm->inm_st[1].iss_asm == 0,
 			    ("%s: inm %p is INCLUDE but ASM count is %d",
 			     __func__, inm, inm->inm_st[1].iss_asm));
 		}
 	}
 
 	/*
 	 * Generate the filter list changes using a separate function.
 	 */
 	if (is_filter_list_change)
 		return (igmp_v3_enqueue_filter_change(ifq, inm));
 
 	if (type == IGMP_DO_NOTHING) {
 		CTR3(KTR_IGMPV3, "%s: nothing to do for %s/%s",
 		    __func__, inet_ntoa(inm->inm_addr),
 		    inm->inm_ifp->if_xname);
 		return (0);
 	}
 
 	/*
 	 * If any sources are present, we must be able to fit at least
 	 * one in the trailing space of the tail packet's mbuf,
 	 * ideally more.
 	 */
 	minrec0len = sizeof(struct igmp_grouprec);
 	if (record_has_sources)
 		minrec0len += sizeof(in_addr_t);
 
 	CTR4(KTR_IGMPV3, "%s: queueing %s for %s/%s", __func__,
 	    igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr),
 	    inm->inm_ifp->if_xname);
 
 	/*
 	 * Check if we have a packet in the tail of the queue for this
 	 * group into which the first group record for this group will fit.
 	 * Otherwise allocate a new packet.
 	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
 	 * Note: Group records for G/GSR query responses MUST be sent
 	 * in their own packet.
 	 */
 	m0 = ifq->ifq_tail;
 	if (!is_group_query &&
 	    m0 != NULL &&
 	    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
 	    (m0->m_pkthdr.len + minrec0len) <
 	     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
 		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
 			    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 		m = m0;
 		CTR1(KTR_IGMPV3, "%s: use existing packet", __func__);
 	} else {
 		if (_IF_QFULL(ifq)) {
 			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
 			return (-ENOMEM);
 		}
 		m = NULL;
 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 		if (!is_state_change && !is_group_query) {
 			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 			if (m)
 				m->m_data += IGMP_LEADINGSPACE;
 		}
 		if (m == NULL) {
 			m = m_gethdr(M_DONTWAIT, MT_DATA);
 			if (m)
 				MH_ALIGN(m, IGMP_LEADINGSPACE);
 		}
 		if (m == NULL)
 			return (-ENOMEM);
 
 		igmp_save_context(m, ifp);
 
 		CTR1(KTR_IGMPV3, "%s: allocated first packet", __func__);
 	}
 
 	/*
 	 * Append group record.
 	 * If we have sources, we don't know how many yet.
 	 */
 	ig.ig_type = type;
 	ig.ig_datalen = 0;
 	ig.ig_numsrc = 0;
 	ig.ig_group = inm->inm_addr;
 	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
 		if (m != m0)
 			m_freem(m);
 		CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
 		return (-ENOMEM);
 	}
 	nbytes += sizeof(struct igmp_grouprec);
 
 	/*
 	 * Append as many sources as will fit in the first packet.
 	 * If we are appending to a new packet, the chain allocation
 	 * may potentially use clusters; use m_getptr() in this case.
 	 * If we are appending to an existing packet, we need to obtain
 	 * a pointer to the group record after m_append(), in case a new
 	 * mbuf was allocated.
 	 * Only append sources which are in-mode at t1. If we are
 	 * transitioning to MCAST_UNDEFINED state on the group, do not
 	 * include source entries.
 	 * Only report recorded sources in our filter set when responding
 	 * to a group-source query.
 	 */
 	if (record_has_sources) {
 		if (m == m0) {
 			md = m_last(m);
 			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
 			    md->m_len - nbytes);
 		} else {
 			md = m_getptr(m, 0, &off);
 			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
 			    off);
 		}
 		msrcs = 0;
 		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
 			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
 			    inet_ntoa_haddr(ims->ims_haddr));
 			now = ims_get_mode(inm, ims, 1);
 			CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now);
 			if ((now != mode) ||
 			    (now == mode && mode == MCAST_UNDEFINED)) {
 				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
 				continue;
 			}
 			if (is_source_query && ims->ims_stp == 0) {
 				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
 				    __func__);
 				continue;
 			}
 			CTR1(KTR_IGMPV3, "%s: append node", __func__);
 			naddr = htonl(ims->ims_haddr);
 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
 				if (m != m0)
 					m_freem(m);
 				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
 				    __func__);
 				return (-ENOMEM);
 			}
 			nbytes += sizeof(in_addr_t);
 			++msrcs;
 			if (msrcs == m0srcs)
 				break;
 		}
 		CTR2(KTR_IGMPV3, "%s: msrcs is %d this packet", __func__,
 		    msrcs);
 		pig->ig_numsrc = htons(msrcs);
 		nbytes += (msrcs * sizeof(in_addr_t));
 	}
 
 	if (is_source_query && msrcs == 0) {
 		CTR1(KTR_IGMPV3, "%s: no recorded sources to report", __func__);
 		if (m != m0)
 			m_freem(m);
 		return (0);
 	}
 
 	/*
 	 * We are good to go with first packet.
 	 */
 	if (m != m0) {
 		CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__);
 		m->m_pkthdr.PH_vt.vt_nrecs = 1;
 		_IF_ENQUEUE(ifq, m);
 	} else
 		m->m_pkthdr.PH_vt.vt_nrecs++;
 
 	/*
 	 * No further work needed if no source list in packet(s).
 	 */
 	if (!record_has_sources)
 		return (nbytes);
 
 	/*
 	 * Whilst sources remain to be announced, we need to allocate
 	 * a new packet and fill out as many sources as will fit.
 	 * Always try for a cluster first.
 	 */
 	while (nims != NULL) {
 		if (_IF_QFULL(ifq)) {
 			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
 			return (-ENOMEM);
 		}
 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 		if (m)
 			m->m_data += IGMP_LEADINGSPACE;
 		if (m == NULL) {
 			m = m_gethdr(M_DONTWAIT, MT_DATA);
 			if (m)
 				MH_ALIGN(m, IGMP_LEADINGSPACE);
 		}
 		if (m == NULL)
 			return (-ENOMEM);
 		igmp_save_context(m, ifp);
 		md = m_getptr(m, 0, &off);
 		pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off);
 		CTR1(KTR_IGMPV3, "%s: allocated next packet", __func__);
 
 		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
 			if (m != m0)
 				m_freem(m);
 			CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
 			return (-ENOMEM);
 		}
 		m->m_pkthdr.PH_vt.vt_nrecs = 1;
 		nbytes += sizeof(struct igmp_grouprec);
 
 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 
 		msrcs = 0;
 		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
 			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
 			    inet_ntoa_haddr(ims->ims_haddr));
 			now = ims_get_mode(inm, ims, 1);
 			if ((now != mode) ||
 			    (now == mode && mode == MCAST_UNDEFINED)) {
 				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
 				continue;
 			}
 			if (is_source_query && ims->ims_stp == 0) {
 				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
 				    __func__);
 				continue;
 			}
 			CTR1(KTR_IGMPV3, "%s: append node", __func__);
 			naddr = htonl(ims->ims_haddr);
 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
 				if (m != m0)
 					m_freem(m);
 				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
 				    __func__);
 				return (-ENOMEM);
 			}
 			++msrcs;
 			if (msrcs == m0srcs)
 				break;
 		}
 		pig->ig_numsrc = htons(msrcs);
 		nbytes += (msrcs * sizeof(in_addr_t));
 
 		CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__);
 		_IF_ENQUEUE(ifq, m);
 	}
 
 	return (nbytes);
 }
 
 /*
  * Type used to mark record pass completion.
  * We exploit the fact we can cast to this easily from the
  * current filter modes on each ip_msource node.
  */
 typedef enum {
 	REC_NONE = 0x00,	/* MCAST_UNDEFINED */
 	REC_ALLOW = 0x01,	/* MCAST_INCLUDE */
 	REC_BLOCK = 0x02,	/* MCAST_EXCLUDE */
 	REC_FULL = REC_ALLOW | REC_BLOCK
 } rectype_t;
 
 /*
  * Enqueue an IGMPv3 filter list change to the given output queue.
  *
  * Source list filter state is held in an RB-tree. When the filter list
  * for a group is changed without changing its mode, we need to compute
  * the deltas between T0 and T1 for each source in the filter set,
  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
  *
  * As we may potentially queue two record types, and the entire R-B tree
  * needs to be walked at once, we break this out into its own function
  * so we can generate a tightly packed queue of packets.
  *
  * XXX This could be written to only use one tree walk, although that makes
  * serializing into the mbuf chains a bit harder. For now we do two walks
  * which makes things easier on us, and it may or may not be harder on
  * the L2 cache.
  *
  * If successful the size of all data appended to the queue is returned,
  * otherwise an error code less than zero is returned, or zero if
  * no record(s) were appended.
  */
 static int
 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
 {
 	static const int MINRECLEN =
 	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
 	struct ifnet		*ifp;
 	struct igmp_grouprec	 ig;
 	struct igmp_grouprec	*pig;
 	struct ip_msource	*ims, *nims;
 	struct mbuf		*m, *m0, *md;
 	in_addr_t		 naddr;
 	int			 m0srcs, nbytes, npbytes, off, rsrcs, schanged;
 	int			 nallow, nblock;
 	uint8_t			 mode, now, then;
 	rectype_t		 crt, drt, nrt;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	if (inm->inm_nsrc == 0 ||
 	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0))
 		return (0);
 
 	ifp = inm->inm_ifp;			/* interface */
 	mode = inm->inm_st[1].iss_fmode;	/* filter mode at t1 */
 	crt = REC_NONE;	/* current group record type */
 	drt = REC_NONE;	/* mask of completed group record types */
 	nrt = REC_NONE;	/* record type for current node */
 	m0srcs = 0;	/* # source which will fit in current mbuf chain */
 	nbytes = 0;	/* # of bytes appended to group's state-change queue */
 	npbytes = 0;	/* # of bytes appended this packet */
 	rsrcs = 0;	/* # sources encoded in current record */
 	schanged = 0;	/* # nodes encoded in overall filter change */
 	nallow = 0;	/* # of source entries in ALLOW_NEW */
 	nblock = 0;	/* # of source entries in BLOCK_OLD */
 	nims = NULL;	/* next tree node pointer */
 
 	/*
 	 * For each possible filter record mode.
 	 * The first kind of source we encounter tells us which
 	 * is the first kind of record we start appending.
 	 * If a node transitioned to UNDEFINED at t1, its mode is treated
 	 * as the inverse of the group's filter mode.
 	 */
 	while (drt != REC_FULL) {
 		do {
 			m0 = ifq->ifq_tail;
 			if (m0 != NULL &&
 			    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <=
 			     IGMP_V3_REPORT_MAXRECS) &&
 			    (m0->m_pkthdr.len + MINRECLEN) <
 			     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
 				m = m0;
 				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
 					    sizeof(struct igmp_grouprec)) /
 				    sizeof(in_addr_t);
 				CTR1(KTR_IGMPV3,
 				    "%s: use previous packet", __func__);
 			} else {
 				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 				if (m)
 					m->m_data += IGMP_LEADINGSPACE;
 				if (m == NULL) {
 					m = m_gethdr(M_DONTWAIT, MT_DATA);
 					if (m)
 						MH_ALIGN(m, IGMP_LEADINGSPACE);
 				}
 				if (m == NULL) {
 					CTR1(KTR_IGMPV3,
 					    "%s: m_get*() failed", __func__);
 					return (-ENOMEM);
 				}
 				m->m_pkthdr.PH_vt.vt_nrecs = 0;
 				igmp_save_context(m, ifp);
 				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 				    sizeof(struct igmp_grouprec)) /
 				    sizeof(in_addr_t);
 				npbytes = 0;
 				CTR1(KTR_IGMPV3,
 				    "%s: allocated new packet", __func__);
 			}
 			/*
 			 * Append the IGMP group record header to the
 			 * current packet's data area.
 			 * Recalculate pointer to free space for next
 			 * group record, in case m_append() allocated
 			 * a new mbuf or cluster.
 			 */
 			memset(&ig, 0, sizeof(ig));
 			ig.ig_group = inm->inm_addr;
 			if (!m_append(m, sizeof(ig), (void *)&ig)) {
 				if (m != m0)
 					m_freem(m);
 				CTR1(KTR_IGMPV3,
 				    "%s: m_append() failed", __func__);
 				return (-ENOMEM);
 			}
 			npbytes += sizeof(struct igmp_grouprec);
 			if (m != m0) {
 				/* new packet; offset in c hain */
 				md = m_getptr(m, npbytes -
 				    sizeof(struct igmp_grouprec), &off);
 				pig = (struct igmp_grouprec *)(mtod(md,
 				    uint8_t *) + off);
 			} else {
 				/* current packet; offset from last append */
 				md = m_last(m);
 				pig = (struct igmp_grouprec *)(mtod(md,
 				    uint8_t *) + md->m_len -
 				    sizeof(struct igmp_grouprec));
 			}
 			/*
 			 * Begin walking the tree for this record type
 			 * pass, or continue from where we left off
 			 * previously if we had to allocate a new packet.
 			 * Only report deltas in-mode at t1.
 			 * We need not report included sources as allowed
 			 * if we are in inclusive mode on the group,
 			 * however the converse is not true.
 			 */
 			rsrcs = 0;
 			if (nims == NULL)
 				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
 			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
 				CTR2(KTR_IGMPV3, "%s: visit node %s",
 				    __func__, inet_ntoa_haddr(ims->ims_haddr));
 				now = ims_get_mode(inm, ims, 1);
 				then = ims_get_mode(inm, ims, 0);
 				CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d",
 				    __func__, then, now);
 				if (now == then) {
 					CTR1(KTR_IGMPV3,
 					    "%s: skip unchanged", __func__);
 					continue;
 				}
 				if (mode == MCAST_EXCLUDE &&
 				    now == MCAST_INCLUDE) {
 					CTR1(KTR_IGMPV3,
 					    "%s: skip IN src on EX group",
 					    __func__);
 					continue;
 				}
 				nrt = (rectype_t)now;
 				if (nrt == REC_NONE)
 					nrt = (rectype_t)(~mode & REC_FULL);
 				if (schanged++ == 0) {
 					crt = nrt;
 				} else if (crt != nrt)
 					continue;
 				naddr = htonl(ims->ims_haddr);
 				if (!m_append(m, sizeof(in_addr_t),
 				    (void *)&naddr)) {
 					if (m != m0)
 						m_freem(m);
 					CTR1(KTR_IGMPV3,
 					    "%s: m_append() failed", __func__);
 					return (-ENOMEM);
 				}
 				nallow += !!(crt == REC_ALLOW);
 				nblock += !!(crt == REC_BLOCK);
 				if (++rsrcs == m0srcs)
 					break;
 			}
 			/*
 			 * If we did not append any tree nodes on this
 			 * pass, back out of allocations.
 			 */
 			if (rsrcs == 0) {
 				npbytes -= sizeof(struct igmp_grouprec);
 				if (m != m0) {
 					CTR1(KTR_IGMPV3,
 					    "%s: m_free(m)", __func__);
 					m_freem(m);
 				} else {
 					CTR1(KTR_IGMPV3,
 					    "%s: m_adj(m, -ig)", __func__);
 					m_adj(m, -((int)sizeof(
 					    struct igmp_grouprec)));
 				}
 				continue;
 			}
 			npbytes += (rsrcs * sizeof(in_addr_t));
 			if (crt == REC_ALLOW)
 				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
 			else if (crt == REC_BLOCK)
 				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
 			pig->ig_numsrc = htons(rsrcs);
 			/*
 			 * Count the new group record, and enqueue this
 			 * packet if it wasn't already queued.
 			 */
 			m->m_pkthdr.PH_vt.vt_nrecs++;
 			if (m != m0)
 				_IF_ENQUEUE(ifq, m);
 			nbytes += npbytes;
 		} while (nims != NULL);
 		drt |= crt;
 		crt = (~crt & REC_FULL);
 	}
 
 	CTR3(KTR_IGMPV3, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__,
 	    nallow, nblock);
 
 	return (nbytes);
 }
 
 static int
 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
 {
 	struct ifqueue	*gq;
 	struct mbuf	*m;		/* pending state-change */
 	struct mbuf	*m0;		/* copy of pending state-change */
 	struct mbuf	*mt;		/* last state-change in packet */
 	int		 docopy, domerge;
 	u_int		 recslen;
 
 	docopy = 0;
 	domerge = 0;
 	recslen = 0;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	/*
 	 * If there are further pending retransmissions, make a writable
 	 * copy of each queued state-change message before merging.
 	 */
 	if (inm->inm_scrv > 0)
 		docopy = 1;
 
 	gq = &inm->inm_scq;
 #ifdef KTR
 	if (gq->ifq_head == NULL) {
 		CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty",
 		    __func__, inm);
 	}
 #endif
 
 	m = gq->ifq_head;
 	while (m != NULL) {
 		/*
 		 * Only merge the report into the current packet if
 		 * there is sufficient space to do so; an IGMPv3 report
 		 * packet may only contain 65,535 group records.
 		 * Always use a simple mbuf chain concatentation to do this,
 		 * as large state changes for single groups may have
 		 * allocated clusters.
 		 */
 		domerge = 0;
 		mt = ifscq->ifq_tail;
 		if (mt != NULL) {
 			recslen = m_length(m, NULL);
 
 			if ((mt->m_pkthdr.PH_vt.vt_nrecs +
 			    m->m_pkthdr.PH_vt.vt_nrecs <=
 			    IGMP_V3_REPORT_MAXRECS) &&
 			    (mt->m_pkthdr.len + recslen <=
 			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE)))
 				domerge = 1;
 		}
 
 		if (!domerge && _IF_QFULL(gq)) {
 			CTR2(KTR_IGMPV3,
 			    "%s: outbound queue full, skipping whole packet %p",
 			    __func__, m);
 			mt = m->m_nextpkt;
 			if (!docopy)
 				m_freem(m);
 			m = mt;
 			continue;
 		}
 
 		if (!docopy) {
 			CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m);
 			_IF_DEQUEUE(gq, m0);
 			m = m0->m_nextpkt;
 		} else {
 			CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m);
 			m0 = m_dup(m, M_NOWAIT);
 			if (m0 == NULL)
 				return (ENOMEM);
 			m0->m_nextpkt = NULL;
 			m = m->m_nextpkt;
 		}
 
 		if (!domerge) {
 			CTR3(KTR_IGMPV3, "%s: queueing %p to ifscq %p)",
 			    __func__, m0, ifscq);
 			_IF_ENQUEUE(ifscq, m0);
 		} else {
 			struct mbuf *mtl;	/* last mbuf of packet mt */
 
 			CTR3(KTR_IGMPV3, "%s: merging %p with ifscq tail %p)",
 			    __func__, m0, mt);
 
 			mtl = m_last(mt);
 			m0->m_flags &= ~M_PKTHDR;
 			mt->m_pkthdr.len += recslen;
 			mt->m_pkthdr.PH_vt.vt_nrecs +=
 			    m0->m_pkthdr.PH_vt.vt_nrecs;
 
 			mtl->m_next = m0;
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Respond to a pending IGMPv3 General Query.
  */
 static void
 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
 {
 	struct ifmultiaddr	*ifma;
 	struct ifnet		*ifp;
 	struct in_multi		*inm;
 	int			 retval, loop;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	KASSERT(igi->igi_version == IGMP_VERSION_3,
 	    ("%s: called when version %d", __func__, igi->igi_version));
 
 	ifp = igi->igi_ifp;
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		KASSERT(ifp == inm->inm_ifp,
 		    ("%s: inconsistent ifp", __func__));
 
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 			break;
 		case IGMP_REPORTING_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
 			    inm, 0, 0, 0);
 			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 			    __func__, retval);
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
 	igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
 
 	/*
 	 * Slew transmission of bursts over 500ms intervals.
 	 */
 	if (igi->igi_gq.ifq_head != NULL) {
 		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
 		    IGMP_RESPONSE_BURST_INTERVAL);
 		V_interface_timers_running = 1;
 	}
 }
 
 /*
  * Transmit the next pending IGMP message in the output queue.
  *
  * We get called from netisr_processqueue(). A mutex private to igmpoq
  * will be acquired and released around this routine.
  *
  * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis.
  * MRT: Nothing needs to be done, as IGMP traffic is always local to
  * a link and uses a link-scope multicast address.
  */
 static void
 igmp_intr(struct mbuf *m)
 {
 	struct ip_moptions	 imo;
 	struct ifnet		*ifp;
 	struct mbuf		*ipopts, *m0;
 	int			 error;
 	uint32_t		 ifindex;
 
 	CTR2(KTR_IGMPV3, "%s: transmit %p", __func__, m);
 
 	/*
 	 * Set VNET image pointer from enqueued mbuf chain
 	 * before doing anything else. Whilst we use interface
 	 * indexes to guard against interface detach, they are
 	 * unique to each VIMAGE and must be retrieved.
 	 */
 	CURVNET_SET((struct vnet *)(m->m_pkthdr.header));
 	ifindex = igmp_restore_context(m);
 
 	/*
 	 * Check if the ifnet still exists. This limits the scope of
 	 * any race in the absence of a global ifp lock for low cost
 	 * (an array lookup).
 	 */
 	ifp = ifnet_byindex(ifindex);
 	if (ifp == NULL) {
 		CTR3(KTR_IGMPV3, "%s: dropped %p as ifindex %u went away.",
 		    __func__, m, ifindex);
 		m_freem(m);
 		IPSTAT_INC(ips_noroute);
 		goto out;
 	}
 
 	ipopts = V_igmp_sendra ? m_raopt : NULL;
 
 	imo.imo_multicast_ttl  = 1;
 	imo.imo_multicast_vif  = -1;
 	imo.imo_multicast_loop = (V_ip_mrouter != NULL);
 
 	/*
 	 * If the user requested that IGMP traffic be explicitly
 	 * redirected to the loopback interface (e.g. they are running a
 	 * MANET interface and the routing protocol needs to see the
 	 * updates), handle this now.
 	 */
 	if (m->m_flags & M_IGMP_LOOP)
 		imo.imo_multicast_ifp = V_loif;
 	else
 		imo.imo_multicast_ifp = ifp;
 
 	if (m->m_flags & M_IGMPV2) {
 		m0 = m;
 	} else {
 		m0 = igmp_v3_encap_report(ifp, m);
 		if (m0 == NULL) {
 			CTR2(KTR_IGMPV3, "%s: dropped %p", __func__, m);
 			m_freem(m);
 			IPSTAT_INC(ips_odropped);
 			goto out;
 		}
 	}
 
 	igmp_scrub_context(m0);
 	m->m_flags &= ~(M_PROTOFLAGS);
 	m0->m_pkthdr.rcvif = V_loif;
 #ifdef MAC
 	mac_netinet_igmp_send(ifp, m0);
 #endif
 	error = ip_output(m0, ipopts, NULL, 0, &imo, NULL);
 	if (error) {
 		CTR3(KTR_IGMPV3, "%s: ip_output(%p) = %d", __func__, m0, error);
 		goto out;
 	}
 
 	IGMPSTAT_INC(igps_snd_reports);
 
 out:
 	/*
 	 * We must restore the existing vnet pointer before
 	 * continuing as we are run from netisr context.
 	 */
 	CURVNET_RESTORE();
 }
 
 /*
  * Encapsulate an IGMPv3 report.
  *
  * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
  * chain has already had its IP/IGMPv3 header prepended. In this case
  * the function will not attempt to prepend; the lengths and checksums
  * will however be re-computed.
  *
  * Returns a pointer to the new mbuf chain head, or NULL if the
  * allocation failed.
  */
 static struct mbuf *
 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
 {
 	struct igmp_report	*igmp;
 	struct ip		*ip;
 	int			 hdrlen, igmpreclen;
 
 	KASSERT((m->m_flags & M_PKTHDR),
 	    ("%s: mbuf chain %p is !M_PKTHDR", __func__, m));
 
 	igmpreclen = m_length(m, NULL);
 	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
 
 	if (m->m_flags & M_IGMPV3_HDR) {
 		igmpreclen -= hdrlen;
 	} else {
 		M_PREPEND(m, hdrlen, M_DONTWAIT);
 		if (m == NULL)
 			return (NULL);
 		m->m_flags |= M_IGMPV3_HDR;
 	}
 
 	CTR2(KTR_IGMPV3, "%s: igmpreclen is %d", __func__, igmpreclen);
 
 	m->m_data += sizeof(struct ip);
 	m->m_len -= sizeof(struct ip);
 
 	igmp = mtod(m, struct igmp_report *);
 	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
 	igmp->ir_rsv1 = 0;
 	igmp->ir_rsv2 = 0;
 	igmp->ir_numgrps = htons(m->m_pkthdr.PH_vt.vt_nrecs);
 	igmp->ir_cksum = 0;
 	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
 	m->m_pkthdr.PH_vt.vt_nrecs = 0;
 
 	m->m_data -= sizeof(struct ip);
 	m->m_len += sizeof(struct ip);
 
 	ip = mtod(m, struct ip *);
 	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
 	ip->ip_len = hdrlen + igmpreclen;
 	ip->ip_off = IP_DF;
 	ip->ip_p = IPPROTO_IGMP;
 	ip->ip_sum = 0;
 
 	ip->ip_src.s_addr = INADDR_ANY;
 
 	if (m->m_flags & M_IGMP_LOOP) {
 		struct in_ifaddr *ia;
 
 		IFP_TO_IA(ifp, ia);
 		if (ia != NULL) {
 			ip->ip_src = ia->ia_addr.sin_addr;
 			ifa_free(&ia->ia_ifa);
 		}
 	}
 
 	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
 
 	return (m);
 }
 
 #ifdef KTR
 static char *
 igmp_rec_type_to_str(const int type)
 {
 
 	switch (type) {
 		case IGMP_CHANGE_TO_EXCLUDE_MODE:
 			return "TO_EX";
 			break;
 		case IGMP_CHANGE_TO_INCLUDE_MODE:
 			return "TO_IN";
 			break;
 		case IGMP_MODE_IS_EXCLUDE:
 			return "MODE_EX";
 			break;
 		case IGMP_MODE_IS_INCLUDE:
 			return "MODE_IN";
 			break;
 		case IGMP_ALLOW_NEW_SOURCES:
 			return "ALLOW_NEW";
 			break;
 		case IGMP_BLOCK_OLD_SOURCES:
 			return "BLOCK_OLD";
 			break;
 		default:
 			break;
 	}
 	return "unknown";
 }
 #endif
 
 static void
 igmp_init(void *unused __unused)
 {
 
 	CTR1(KTR_IGMPV3, "%s: initializing", __func__);
 
 	IGMP_LOCK_INIT();
 
 	m_raopt = igmp_ra_alloc();
 
 	netisr_register(&igmp_nh);
 }
 SYSINIT(igmp_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_init, NULL);
 
 static void
 igmp_uninit(void *unused __unused)
 {
 
 	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
 
 	netisr_unregister(&igmp_nh);
 
 	m_free(m_raopt);
 	m_raopt = NULL;
 
 	IGMP_LOCK_DESTROY();
 }
 SYSUNINIT(igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_uninit, NULL);
 
 static void
 vnet_igmp_init(const void *unused __unused)
 {
 
 	CTR1(KTR_IGMPV3, "%s: initializing", __func__);
 
 	LIST_INIT(&V_igi_head);
 }
 VNET_SYSINIT(vnet_igmp_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_igmp_init,
     NULL);
 
 static void
 vnet_igmp_uninit(const void *unused __unused)
 {
 
 	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
 
 	KASSERT(LIST_EMPTY(&V_igi_head),
 	    ("%s: igi list not empty; ifnets not detached?", __func__));
 }
 VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_igmp_uninit, NULL);
 
 static int
 igmp_modevent(module_t mod, int type, void *unused __unused)
 {
 
     switch (type) {
     case MOD_LOAD:
     case MOD_UNLOAD:
 	break;
     default:
 	return (EOPNOTSUPP);
     }
     return (0);
 }
 
 static moduledata_t igmp_mod = {
     "igmp",
     igmp_modevent,
     0
 };
 DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
Index: stable/9/sys/netinet6/nd6_rtr.c
===================================================================
--- stable/9/sys/netinet6/nd6_rtr.c	(revision 281230)
+++ stable/9/sys/netinet6/nd6_rtr.c	(revision 281231)
@@ -1,2196 +1,2204 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/errno.h>
 #include <sys/rwlock.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/radix.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <net/if_llatbl.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/scope6_var.h>
 
 static int rtpref(struct nd_defrouter *);
 static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
 static int prelist_update(struct nd_prefixctl *, struct nd_defrouter *,
     struct mbuf *, int);
 static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int);
 static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *,
 	struct nd_defrouter *);
 static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
 static void pfxrtr_del(struct nd_pfxrouter *);
 static struct nd_pfxrouter *find_pfxlist_reachable_router
 (struct nd_prefix *);
 static void defrouter_delreq(struct nd_defrouter *);
 static void nd6_rtmsg(int, struct rtentry *);
 
 static int in6_init_prefix_ltimes(struct nd_prefix *);
 static void in6_init_address_ltimes(struct nd_prefix *,
 	struct in6_addrlifetime *);
 
 static int nd6_prefix_onlink(struct nd_prefix *);
 static int nd6_prefix_offlink(struct nd_prefix *);
 
 static int rt6_deleteroute(struct radix_node *, void *);
 
 VNET_DECLARE(int, nd6_recalc_reachtm_interval);
 #define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
 
 static VNET_DEFINE(struct ifnet *, nd6_defifp);
 VNET_DEFINE(int, nd6_defifindex);
 #define	V_nd6_defifp			VNET(nd6_defifp)
 
 VNET_DEFINE(int, ip6_use_tempaddr) = 0;
 
 VNET_DEFINE(int, ip6_desync_factor);
 VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME;
 VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME;
 
 VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE;
 
 /* RTPREF_MEDIUM has to be 0! */
 #define RTPREF_HIGH	1
 #define RTPREF_MEDIUM	0
 #define RTPREF_LOW	(-1)
 #define RTPREF_RESERVED	(-2)
 #define RTPREF_INVALID	(-3)	/* internal */
 
 /*
  * Receive Router Solicitation Message - just for routers.
  * Router solicitation/advertisement is mostly managed by userland program
  * (rtadvd) so here we have no function like nd6_ra_output().
  *
  * Based on RFC 2461
  */
 void
 nd6_rs_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_router_solicit *nd_rs;
 	struct in6_addr saddr6 = ip6->ip6_src;
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 	union nd_opts ndopts;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	/*
 	 * Accept RS only when V_ip6_forwarding=1 and the interface has
 	 * no ND6_IFF_ACCEPT_RTADV.
 	 */
 	if (!V_ip6_forwarding || ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV)
 		goto freeit;
 
 	/* Sanity checks */
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
 		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
 	/*
 	 * Don't update the neighbor cache, if src = ::.
 	 * This indicates that the src has no IP address assigned yet.
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
 		goto freeit;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len);
 	if (nd_rs == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return;
 	}
 #endif
 
 	icmp6len -= sizeof(*nd_rs);
 	nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO,
 		    "nd6_rs_input: invalid ND option, ignored\n"));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	if (ndopts.nd_opts_src_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
 		    "nd6_rs_input: lladdrlen mismatch for %s "
 		    "(if %d, RS packet %d)\n",
 		    ip6_sprintf(ip6bufs, &saddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
 
 	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0);
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	ICMP6STAT_INC(icp6s_badrs);
 	m_freem(m);
 }
 
 /*
  * Receive Router Advertisement Message.
  *
  * Based on RFC 2461
  * TODO: on-link bit on prefix information
  * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
  */
 void
 nd6_ra_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_router_advert *nd_ra;
 	struct in6_addr saddr6 = ip6->ip6_src;
 	int mcast = 0;
 	union nd_opts ndopts;
 	struct nd_defrouter *dr;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	/*
 	 * We only accept RAs only when the per-interface flag
 	 * ND6_IFF_ACCEPT_RTADV is on the receiving interface.
 	 */
 	if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
 		goto freeit;
 
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
 		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
 	if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
 		nd6log((LOG_ERR,
 		    "nd6_ra_input: src %s is not link-local\n",
 		    ip6_sprintf(ip6bufs, &saddr6)));
 		goto bad;
 	}
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len);
 	if (nd_ra == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return;
 	}
 #endif
 
 	icmp6len -= sizeof(*nd_ra);
 	nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO,
 		    "nd6_ra_input: invalid ND option, ignored\n"));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
     {
 	struct nd_defrouter dr0;
 	u_int32_t advreachable = nd_ra->nd_ra_reachable;
 
 	/* remember if this is a multicasted advertisement */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
 		mcast = 1;
 
 	bzero(&dr0, sizeof(dr0));
 	dr0.rtaddr = saddr6;
 	dr0.flags  = nd_ra->nd_ra_flags_reserved;
 	/*
 	 * Effectively-disable routes from RA messages when
 	 * ND6_IFF_NO_RADR enabled on the receiving interface or
 	 * (ip6.forwarding == 1 && ip6.rfc6204w3 != 1).
 	 */
 	if (ndi->flags & ND6_IFF_NO_RADR)
 		dr0.rtlifetime = 0;
 	else if (V_ip6_forwarding && !V_ip6_rfc6204w3)
 		dr0.rtlifetime = 0;
 	else
 		dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
 	dr0.expire = time_second + dr0.rtlifetime;
 	dr0.ifp = ifp;
 	/* unspecified or not? (RFC 2461 6.3.4) */
 	if (advreachable) {
 		advreachable = ntohl(advreachable);
 		if (advreachable <= MAX_REACHABLE_TIME &&
 		    ndi->basereachable != advreachable) {
 			ndi->basereachable = advreachable;
 			ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
 			ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */
 		}
 	}
 	if (nd_ra->nd_ra_retransmit)
 		ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
-	if (nd_ra->nd_ra_curhoplimit)
-		ndi->chlim = nd_ra->nd_ra_curhoplimit;
+	if (nd_ra->nd_ra_curhoplimit) {
+		if (ndi->chlim < nd_ra->nd_ra_curhoplimit)
+			ndi->chlim = nd_ra->nd_ra_curhoplimit;
+		else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) {
+			log(LOG_ERR, "RA with a lower CurHopLimit sent from "
+			    "%s on %s (current = %d, received = %d). "
+			    "Ignored.\n", ip6_sprintf(ip6bufs, &ip6->ip6_src),
+			    if_name(ifp), ndi->chlim, nd_ra->nd_ra_curhoplimit);
+		}
+	}
 	dr = defrtrlist_update(&dr0);
     }
 
 	/*
 	 * prefix
 	 */
 	if (ndopts.nd_opts_pi) {
 		struct nd_opt_hdr *pt;
 		struct nd_opt_prefix_info *pi = NULL;
 		struct nd_prefixctl pr;
 
 		for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi;
 		     pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end;
 		     pt = (struct nd_opt_hdr *)((caddr_t)pt +
 						(pt->nd_opt_len << 3))) {
 			if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION)
 				continue;
 			pi = (struct nd_opt_prefix_info *)pt;
 
 			if (pi->nd_opt_pi_len != 4) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid option "
 				    "len %d for prefix information option, "
 				    "ignored\n", pi->nd_opt_pi_len));
 				continue;
 			}
 
 			if (128 < pi->nd_opt_pi_prefix_len) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid prefix "
 				    "len %d for prefix information option, "
 				    "ignored\n", pi->nd_opt_pi_prefix_len));
 				continue;
 			}
 
 			if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
 			 || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid prefix "
 				    "%s, ignored\n",
 				    ip6_sprintf(ip6bufs,
 					&pi->nd_opt_pi_prefix)));
 				continue;
 			}
 
 			bzero(&pr, sizeof(pr));
 			pr.ndpr_prefix.sin6_family = AF_INET6;
 			pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
 			pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
 			pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif;
 
 			pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved &
 			    ND_OPT_PI_FLAG_ONLINK) ? 1 : 0;
 			pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved &
 			    ND_OPT_PI_FLAG_AUTO) ? 1 : 0;
 			pr.ndpr_plen = pi->nd_opt_pi_prefix_len;
 			pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time);
 			pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time);
 			(void)prelist_update(&pr, dr, m, mcast);
 		}
 	}
 
 	/*
 	 * MTU
 	 */
 	if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
 		u_long mtu;
 		u_long maxmtu;
 
 		mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
 
 		/* lower bound */
 		if (mtu < IPV6_MMTU) {
 			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
 			    "mtu=%lu sent from %s, ignoring\n",
 			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
 			goto skip;
 		}
 
 		/* upper bound */
 		maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu)
 		    ? ndi->maxmtu : ifp->if_mtu;
 		if (mtu <= maxmtu) {
 			int change = (ndi->linkmtu != mtu);
 
 			ndi->linkmtu = mtu;
 			if (change) /* in6_maxmtu may change */
 				in6_setmaxmtu();
 		} else {
 			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
 			    "mtu=%lu sent from %s; "
 			    "exceeds maxmtu %lu, ignoring\n",
 			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
 		}
 	}
 
  skip:
 
 	/*
 	 * Source link layer address
 	 */
     {
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 
 	if (ndopts.nd_opts_src_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
 		    "nd6_ra_input: lladdrlen mismatch for %s "
 		    "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
 
 	nd6_cache_lladdr(ifp, &saddr6, lladdr,
 	    lladdrlen, ND_ROUTER_ADVERT, 0);
 
 	/*
 	 * Installing a link-layer address might change the state of the
 	 * router's neighbor cache, which might also affect our on-link
 	 * detection of adveritsed prefixes.
 	 */
 	pfxlist_onlink_check();
     }
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	ICMP6STAT_INC(icp6s_badra);
 	m_freem(m);
 }
 
 /*
  * default router list proccessing sub routines
  */
 
 /* tell the change to user processes watching the routing socket. */
 static void
 nd6_rtmsg(int cmd, struct rtentry *rt)
 {
 	struct rt_addrinfo info;
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = rt_key(rt);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 	ifp = rt->rt_ifp;
 	if (ifp != NULL) {
 		IF_ADDR_RLOCK(ifp);
 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
 		ifa_ref(ifa);
 		IF_ADDR_RUNLOCK(ifp);
 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 	} else
 		ifa = NULL;
 
 	rt_missmsg_fib(cmd, &info, rt->rt_flags, 0, rt->rt_fibnum);
 	if (ifa != NULL)
 		ifa_free(ifa);
 }
 
 static void
 defrouter_addreq(struct nd_defrouter *new)
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *newrt = NULL;
 	int s;
 	int error;
 
 	bzero(&def, sizeof(def));
 	bzero(&mask, sizeof(mask));
 	bzero(&gate, sizeof(gate));
 
 	def.sin6_len = mask.sin6_len = gate.sin6_len =
 	    sizeof(struct sockaddr_in6);
 	def.sin6_family = gate.sin6_family = AF_INET6;
 	gate.sin6_addr = new->rtaddr;
 
 	s = splnet();
 	error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def,
 	    (struct sockaddr *)&gate, (struct sockaddr *)&mask,
 	    RTF_GATEWAY, &newrt, RT_DEFAULT_FIB);
 	if (newrt) {
 		nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
 		RTFREE(newrt);
 	}
 	if (error == 0)
 		new->installed = 1;
 	splx(s);
 	return;
 }
 
 struct nd_defrouter *
 defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
 {
 	struct nd_defrouter *dr;
 
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
 			return (dr);
 	}
 
 	return (NULL);		/* search failed */
 }
 
 /*
  * Remove the default route for a given router.
  * This is just a subroutine function for defrouter_select(), and should
  * not be called from anywhere else.
  */
 static void
 defrouter_delreq(struct nd_defrouter *dr)
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *oldrt = NULL;
 
 	bzero(&def, sizeof(def));
 	bzero(&mask, sizeof(mask));
 	bzero(&gate, sizeof(gate));
 
 	def.sin6_len = mask.sin6_len = gate.sin6_len =
 	    sizeof(struct sockaddr_in6);
 	def.sin6_family = gate.sin6_family = AF_INET6;
 	gate.sin6_addr = dr->rtaddr;
 
 	in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def,
 	    (struct sockaddr *)&gate,
 	    (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, RT_DEFAULT_FIB);
 	if (oldrt) {
 		nd6_rtmsg(RTM_DELETE, oldrt);
 		RTFREE(oldrt);
 	}
 
 	dr->installed = 0;
 }
 
 /*
  * remove all default routes from default router list
  */
 void
 defrouter_reset(void)
 {
 	struct nd_defrouter *dr;
 
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
 		defrouter_delreq(dr);
 
 	/*
 	 * XXX should we also nuke any default routers in the kernel, by
 	 * going through them by rtalloc1()?
 	 */
 }
 
 void
 defrtrlist_del(struct nd_defrouter *dr)
 {
 	struct nd_defrouter *deldr = NULL;
 	struct nd_prefix *pr;
 
 	/*
 	 * Flush all the routing table entries that use the router
 	 * as a next hop.
 	 */
 	if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV)
 		rt6_flush(&dr->rtaddr, dr->ifp);
 
 	if (dr->installed) {
 		deldr = dr;
 		defrouter_delreq(dr);
 	}
 	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
 
 	/*
 	 * Also delete all the pointers to the router in each prefix lists.
 	 */
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		struct nd_pfxrouter *pfxrtr;
 		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
 			pfxrtr_del(pfxrtr);
 	}
 	pfxlist_onlink_check();
 
 	/*
 	 * If the router is the primary one, choose a new one.
 	 * Note that defrouter_select() will remove the current gateway
 	 * from the routing table.
 	 */
 	if (deldr)
 		defrouter_select();
 
 	free(dr, M_IP6NDP);
 }
 
 /*
  * Default Router Selection according to Section 6.3.6 of RFC 2461 and
  * draft-ietf-ipngwg-router-selection:
  * 1) Routers that are reachable or probably reachable should be preferred.
  *    If we have more than one (probably) reachable router, prefer ones
  *    with the highest router preference.
  * 2) When no routers on the list are known to be reachable or
  *    probably reachable, routers SHOULD be selected in a round-robin
  *    fashion, regardless of router preference values.
  * 3) If the Default Router List is empty, assume that all
  *    destinations are on-link.
  *
  * We assume nd_defrouter is sorted by router preference value.
  * Since the code below covers both with and without router preference cases,
  * we do not need to classify the cases by ifdef.
  *
  * At this moment, we do not try to install more than one default router,
  * even when the multipath routing is available, because we're not sure about
  * the benefits for stub hosts comparing to the risk of making the code
  * complicated and the possibility of introducing bugs.
  */
 void
 defrouter_select(void)
 {
 	int s = splnet();
 	struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
 	struct llentry *ln = NULL;
 
 	/*
 	 * Let's handle easy case (3) first:
 	 * If default router list is empty, there's nothing to be done.
 	 */
 	if (TAILQ_EMPTY(&V_nd_defrouter)) {
 		splx(s);
 		return;
 	}
 
 	/*
 	 * Search for a (probably) reachable router from the list.
 	 * We just pick up the first reachable one (if any), assuming that
 	 * the ordering rule of the list described in defrtrlist_update().
 	 */
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		IF_AFDATA_RLOCK(dr->ifp);
 		if (selected_dr == NULL &&
 		    (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
 		    ND6_IS_LLINFO_PROBREACH(ln)) {
 			selected_dr = dr;
 		}
 		IF_AFDATA_RUNLOCK(dr->ifp);
 		if (ln != NULL) {
 			LLE_RUNLOCK(ln);
 			ln = NULL;
 		}
 
 		if (dr->installed && installed_dr == NULL)
 			installed_dr = dr;
 		else if (dr->installed && installed_dr) {
 			/* this should not happen.  warn for diagnosis. */
 			log(LOG_ERR, "defrouter_select: more than one router"
 			    " is installed\n");
 		}
 	}
 	/*
 	 * If none of the default routers was found to be reachable,
 	 * round-robin the list regardless of preference.
 	 * Otherwise, if we have an installed router, check if the selected
 	 * (reachable) router should really be preferred to the installed one.
 	 * We only prefer the new router when the old one is not reachable
 	 * or when the new one has a really higher preference value.
 	 */
 	if (selected_dr == NULL) {
 		if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry))
 			selected_dr = TAILQ_FIRST(&V_nd_defrouter);
 		else
 			selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
 	} else if (installed_dr) {
 		IF_AFDATA_RLOCK(installed_dr->ifp);
 		if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
 		    ND6_IS_LLINFO_PROBREACH(ln) &&
 		    rtpref(selected_dr) <= rtpref(installed_dr)) {
 			selected_dr = installed_dr;
 		}
 		IF_AFDATA_RUNLOCK(installed_dr->ifp);
 		if (ln != NULL)
 			LLE_RUNLOCK(ln);
 	}
 
 	/*
 	 * If the selected router is different than the installed one,
 	 * remove the installed router and install the selected one.
 	 * Note that the selected router is never NULL here.
 	 */
 	if (installed_dr != selected_dr) {
 		if (installed_dr)
 			defrouter_delreq(installed_dr);
 		defrouter_addreq(selected_dr);
 	}
 
 	splx(s);
 	return;
 }
 
 /*
  * for default router selection
  * regards router-preference field as a 2-bit signed integer
  */
 static int
 rtpref(struct nd_defrouter *dr)
 {
 	switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
 	case ND_RA_FLAG_RTPREF_HIGH:
 		return (RTPREF_HIGH);
 	case ND_RA_FLAG_RTPREF_MEDIUM:
 	case ND_RA_FLAG_RTPREF_RSV:
 		return (RTPREF_MEDIUM);
 	case ND_RA_FLAG_RTPREF_LOW:
 		return (RTPREF_LOW);
 	default:
 		/*
 		 * This case should never happen.  If it did, it would mean a
 		 * serious bug of kernel internal.  We thus always bark here.
 		 * Or, can we even panic?
 		 */
 		log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
 		return (RTPREF_INVALID);
 	}
 	/* NOTREACHED */
 }
 
 static struct nd_defrouter *
 defrtrlist_update(struct nd_defrouter *new)
 {
 	struct nd_defrouter *dr, *n;
 	int s = splnet();
 
 	if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
 		/* entry exists */
 		if (new->rtlifetime == 0) {
 			defrtrlist_del(dr);
 			dr = NULL;
 		} else {
 			int oldpref = rtpref(dr);
 
 			/* override */
 			dr->flags = new->flags; /* xxx flag check */
 			dr->rtlifetime = new->rtlifetime;
 			dr->expire = new->expire;
 
 			/*
 			 * If the preference does not change, there's no need
 			 * to sort the entries. Also make sure the selected
 			 * router is still installed in the kernel.
 			 */
 			if (dr->installed && rtpref(new) == oldpref) {
 				splx(s);
 				return (dr);
 			}
 
 			/*
 			 * preferred router may be changed, so relocate
 			 * this router.
 			 * XXX: calling TAILQ_REMOVE directly is a bad manner.
 			 * However, since defrtrlist_del() has many side
 			 * effects, we intentionally do so here.
 			 * defrouter_select() below will handle routing
 			 * changes later.
 			 */
 			TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
 			n = dr;
 			goto insert;
 		}
 		splx(s);
 		return (dr);
 	}
 
 	/* entry does not exist */
 	if (new->rtlifetime == 0) {
 		splx(s);
 		return (NULL);
 	}
 
 	n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT);
 	if (n == NULL) {
 		splx(s);
 		return (NULL);
 	}
 	bzero(n, sizeof(*n));
 	*n = *new;
 
 insert:
 	/*
 	 * Insert the new router in the Default Router List;
 	 * The Default Router List should be in the descending order
 	 * of router-preferece.  Routers with the same preference are
 	 * sorted in the arriving time order.
 	 */
 
 	/* insert at the end of the group */
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		if (rtpref(n) > rtpref(dr))
 			break;
 	}
 	if (dr)
 		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
 	else
 		TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
 
 	defrouter_select();
 
 	splx(s);
 
 	return (n);
 }
 
 static struct nd_pfxrouter *
 pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *search;
 
 	LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) {
 		if (search->router == dr)
 			break;
 	}
 
 	return (search);
 }
 
 static void
 pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *new;
 
 	new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
 	if (new == NULL)
 		return;
 	bzero(new, sizeof(*new));
 	new->router = dr;
 
 	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
 
 	pfxlist_onlink_check();
 }
 
 static void
 pfxrtr_del(struct nd_pfxrouter *pfr)
 {
 	LIST_REMOVE(pfr, pfr_entry);
 	free(pfr, M_IP6NDP);
 }
 
 struct nd_prefix *
 nd6_prefix_lookup(struct nd_prefixctl *key)
 {
 	struct nd_prefix *search;
 
 	LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) {
 		if (key->ndpr_ifp == search->ndpr_ifp &&
 		    key->ndpr_plen == search->ndpr_plen &&
 		    in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
 		    &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
 			break;
 		}
 	}
 
 	return (search);
 }
 
 int
 nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
     struct nd_prefix **newp)
 {
 	struct nd_prefix *new = NULL;
 	int error = 0;
 	int i, s;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
 	if (new == NULL)
 		return(ENOMEM);
 	bzero(new, sizeof(*new));
 	new->ndpr_ifp = pr->ndpr_ifp;
 	new->ndpr_prefix = pr->ndpr_prefix;
 	new->ndpr_plen = pr->ndpr_plen;
 	new->ndpr_vltime = pr->ndpr_vltime;
 	new->ndpr_pltime = pr->ndpr_pltime;
 	new->ndpr_flags = pr->ndpr_flags;
 	if ((error = in6_init_prefix_ltimes(new)) != 0) {
 		free(new, M_IP6NDP);
 		return(error);
 	}
 	new->ndpr_lastupdate = time_second;
 	if (newp != NULL)
 		*newp = new;
 
 	/* initialization */
 	LIST_INIT(&new->ndpr_advrtrs);
 	in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
 	/* make prefix in the canonical form */
 	for (i = 0; i < 4; i++)
 		new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
 		    new->ndpr_mask.s6_addr32[i];
 
 	s = splnet();
 	/* link ndpr_entry to nd_prefix list */
 	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
 	splx(s);
 
 	/* ND_OPT_PI_FLAG_ONLINK processing */
 	if (new->ndpr_raf_onlink) {
 		int e;
 
 		if ((e = nd6_prefix_onlink(new)) != 0) {
 			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
 			    "the prefix %s/%d on-link on %s (errno=%d)\n",
 			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 			/* proceed anyway. XXX: is it correct? */
 		}
 	}
 
 	if (dr)
 		pfxrtr_add(new, dr);
 
 	return 0;
 }
 
 void
 prelist_remove(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfr, *next;
 	int e, s;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* make sure to invalidate the prefix until it is really freed. */
 	pr->ndpr_vltime = 0;
 	pr->ndpr_pltime = 0;
 
 	/*
 	 * Though these flags are now meaningless, we'd rather keep the value
 	 * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
 	 * when executing "ndp -p".
 	 */
 
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
 	    (e = nd6_prefix_offlink(pr)) != 0) {
 		nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
 		    "on %s, errno=%d\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 		/* what should we do? */
 	}
 
 	if (pr->ndpr_refcnt > 0)
 		return;		/* notice here? */
 
 	s = splnet();
 
 	/* unlink ndpr_entry from nd_prefix list */
 	LIST_REMOVE(pr, ndpr_entry);
 
 	/* free list of routers that adversed the prefix */
 	LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) {
 		free(pfr, M_IP6NDP);
 	}
 	splx(s);
 
 	free(pr, M_IP6NDP);
 
 	pfxlist_onlink_check();
 }
 
 /*
  * dr - may be NULL
  */
 
 static int
 prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
     struct mbuf *m, int mcast)
 {
 	struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
 	struct ifaddr *ifa;
 	struct ifnet *ifp = new->ndpr_ifp;
 	struct nd_prefix *pr;
 	int s = splnet();
 	int error = 0;
 	int newprefix = 0;
 	int auth;
 	struct in6_addrlifetime lt6_tmp;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	auth = 0;
 	if (m) {
 		/*
 		 * Authenticity for NA consists authentication for
 		 * both IP header and IP datagrams, doesn't it ?
 		 */
 #if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM)
 		auth = ((m->m_flags & M_AUTHIPHDR) &&
 		    (m->m_flags & M_AUTHIPDGM));
 #endif
 	}
 
 	if ((pr = nd6_prefix_lookup(new)) != NULL) {
 		/*
 		 * nd6_prefix_lookup() ensures that pr and new have the same
 		 * prefix on a same interface.
 		 */
 
 		/*
 		 * Update prefix information.  Note that the on-link (L) bit
 		 * and the autonomous (A) bit should NOT be changed from 1
 		 * to 0.
 		 */
 		if (new->ndpr_raf_onlink == 1)
 			pr->ndpr_raf_onlink = 1;
 		if (new->ndpr_raf_auto == 1)
 			pr->ndpr_raf_auto = 1;
 		if (new->ndpr_raf_onlink) {
 			pr->ndpr_vltime = new->ndpr_vltime;
 			pr->ndpr_pltime = new->ndpr_pltime;
 			(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
 			pr->ndpr_lastupdate = time_second;
 		}
 
 		if (new->ndpr_raf_onlink &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 			int e;
 
 			if ((e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "prelist_update: failed to make "
 				    "the prefix %s/%d on-link on %s "
 				    "(errno=%d)\n",
 				    ip6_sprintf(ip6buf,
 					    &pr->ndpr_prefix.sin6_addr),
 				    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 				/* proceed anyway. XXX: is it correct? */
 			}
 		}
 
 		if (dr && pfxrtr_lookup(pr, dr) == NULL)
 			pfxrtr_add(pr, dr);
 	} else {
 		struct nd_prefix *newpr = NULL;
 
 		newprefix = 1;
 
 		if (new->ndpr_vltime == 0)
 			goto end;
 		if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
 			goto end;
 
 		error = nd6_prelist_add(new, dr, &newpr);
 		if (error != 0 || newpr == NULL) {
 			nd6log((LOG_NOTICE, "prelist_update: "
 			    "nd6_prelist_add failed for %s/%d on %s "
 			    "errno=%d, returnpr=%p\n",
 			    ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
 			    new->ndpr_plen, if_name(new->ndpr_ifp),
 			    error, newpr));
 			goto end; /* we should just give up in this case. */
 		}
 
 		/*
 		 * XXX: from the ND point of view, we can ignore a prefix
 		 * with the on-link bit being zero.  However, we need a
 		 * prefix structure for references from autoconfigured
 		 * addresses.  Thus, we explicitly make sure that the prefix
 		 * itself expires now.
 		 */
 		if (newpr->ndpr_raf_onlink == 0) {
 			newpr->ndpr_vltime = 0;
 			newpr->ndpr_pltime = 0;
 			in6_init_prefix_ltimes(newpr);
 		}
 
 		pr = newpr;
 	}
 
 	/*
 	 * Address autoconfiguration based on Section 5.5.3 of RFC 2462.
 	 * Note that pr must be non NULL at this point.
 	 */
 
 	/* 5.5.3 (a). Ignore the prefix without the A bit set. */
 	if (!new->ndpr_raf_auto)
 		goto end;
 
 	/*
 	 * 5.5.3 (b). the link-local prefix should have been ignored in
 	 * nd6_ra_input.
 	 */
 
 	/* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */
 	if (new->ndpr_pltime > new->ndpr_vltime) {
 		error = EINVAL;	/* XXX: won't be used */
 		goto end;
 	}
 
 	/*
 	 * 5.5.3 (d).  If the prefix advertised is not equal to the prefix of
 	 * an address configured by stateless autoconfiguration already in the
 	 * list of addresses associated with the interface, and the Valid
 	 * Lifetime is not 0, form an address.  We first check if we have
 	 * a matching prefix.
 	 * Note: we apply a clarification in rfc2462bis-02 here.  We only
 	 * consider autoconfigured addresses while RFC2462 simply said
 	 * "address".
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in6_ifaddr *ifa6;
 		u_int32_t remaininglifetime;
 
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		ifa6 = (struct in6_ifaddr *)ifa;
 
 		/*
 		 * We only consider autoconfigured addresses as per rfc2462bis.
 		 */
 		if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF))
 			continue;
 
 		/*
 		 * Spec is not clear here, but I believe we should concentrate
 		 * on unicast (i.e. not anycast) addresses.
 		 * XXX: other ia6_flags? detached or duplicated?
 		 */
 		if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
 			continue;
 
 		/*
 		 * Ignore the address if it is not associated with a prefix
 		 * or is associated with a prefix that is different from this
 		 * one.  (pr is never NULL here)
 		 */
 		if (ifa6->ia6_ndpr != pr)
 			continue;
 
 		if (ia6_match == NULL) /* remember the first one */
 			ia6_match = ifa6;
 
 		/*
 		 * An already autoconfigured address matched.  Now that we
 		 * are sure there is at least one matched address, we can
 		 * proceed to 5.5.3. (e): update the lifetimes according to the
 		 * "two hours" rule and the privacy extension.
 		 * We apply some clarifications in rfc2462bis:
 		 * - use remaininglifetime instead of storedlifetime as a
 		 *   variable name
 		 * - remove the dead code in the "two-hour" rule
 		 */
 #define TWOHOUR		(120*60)
 		lt6_tmp = ifa6->ia6_lifetime;
 
 		if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
 			remaininglifetime = ND6_INFINITE_LIFETIME;
 		else if (time_second - ifa6->ia6_updatetime >
 			 lt6_tmp.ia6t_vltime) {
 			/*
 			 * The case of "invalid" address.  We should usually
 			 * not see this case.
 			 */
 			remaininglifetime = 0;
 		} else
 			remaininglifetime = lt6_tmp.ia6t_vltime -
 			    (time_second - ifa6->ia6_updatetime);
 
 		/* when not updating, keep the current stored lifetime. */
 		lt6_tmp.ia6t_vltime = remaininglifetime;
 
 		if (TWOHOUR < new->ndpr_vltime ||
 		    remaininglifetime < new->ndpr_vltime) {
 			lt6_tmp.ia6t_vltime = new->ndpr_vltime;
 		} else if (remaininglifetime <= TWOHOUR) {
 			if (auth) {
 				lt6_tmp.ia6t_vltime = new->ndpr_vltime;
 			}
 		} else {
 			/*
 			 * new->ndpr_vltime <= TWOHOUR &&
 			 * TWOHOUR < remaininglifetime
 			 */
 			lt6_tmp.ia6t_vltime = TWOHOUR;
 		}
 
 		/* The 2 hour rule is not imposed for preferred lifetime. */
 		lt6_tmp.ia6t_pltime = new->ndpr_pltime;
 
 		in6_init_address_ltimes(pr, &lt6_tmp);
 
 		/*
 		 * We need to treat lifetimes for temporary addresses
 		 * differently, according to
 		 * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
 		 * we only update the lifetimes when they are in the maximum
 		 * intervals.
 		 */
 		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
 			u_int32_t maxvltime, maxpltime;
 
 			if (V_ip6_temp_valid_lifetime >
 			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
 			    V_ip6_desync_factor)) {
 				maxvltime = V_ip6_temp_valid_lifetime -
 				    (time_second - ifa6->ia6_createtime) -
 				    V_ip6_desync_factor;
 			} else
 				maxvltime = 0;
 			if (V_ip6_temp_preferred_lifetime >
 			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
 			    V_ip6_desync_factor)) {
 				maxpltime = V_ip6_temp_preferred_lifetime -
 				    (time_second - ifa6->ia6_createtime) -
 				    V_ip6_desync_factor;
 			} else
 				maxpltime = 0;
 
 			if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME ||
 			    lt6_tmp.ia6t_vltime > maxvltime) {
 				lt6_tmp.ia6t_vltime = maxvltime;
 			}
 			if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME ||
 			    lt6_tmp.ia6t_pltime > maxpltime) {
 				lt6_tmp.ia6t_pltime = maxpltime;
 			}
 		}
 		ifa6->ia6_lifetime = lt6_tmp;
 		ifa6->ia6_updatetime = time_second;
 	}
 	IF_ADDR_RUNLOCK(ifp);
 	if (ia6_match == NULL && new->ndpr_vltime) {
 		int ifidlen;
 
 		/*
 		 * 5.5.3 (d) (continued)
 		 * No address matched and the valid lifetime is non-zero.
 		 * Create a new address.
 		 */
 
 		/*
 		 * Prefix Length check:
 		 * If the sum of the prefix length and interface identifier
 		 * length does not equal 128 bits, the Prefix Information
 		 * option MUST be ignored.  The length of the interface
 		 * identifier is defined in a separate link-type specific
 		 * document.
 		 */
 		ifidlen = in6_if2idlen(ifp);
 		if (ifidlen < 0) {
 			/* this should not happen, so we always log it. */
 			log(LOG_ERR, "prelist_update: IFID undefined (%s)\n",
 			    if_name(ifp));
 			goto end;
 		}
 		if (ifidlen + pr->ndpr_plen != 128) {
 			nd6log((LOG_INFO,
 			    "prelist_update: invalid prefixlen "
 			    "%d for %s, ignored\n",
 			    pr->ndpr_plen, if_name(ifp)));
 			goto end;
 		}
 
 		if ((ia6 = in6_ifadd(new, mcast)) != NULL) {
 			/*
 			 * note that we should use pr (not new) for reference.
 			 */
 			pr->ndpr_refcnt++;
 			ia6->ia6_ndpr = pr;
 
 			/*
 			 * RFC 3041 3.3 (2).
 			 * When a new public address is created as described
 			 * in RFC2462, also create a new temporary address.
 			 *
 			 * RFC 3041 3.5.
 			 * When an interface connects to a new link, a new
 			 * randomized interface identifier should be generated
 			 * immediately together with a new set of temporary
 			 * addresses.  Thus, we specifiy 1 as the 2nd arg of
 			 * in6_tmpifadd().
 			 */
 			if (V_ip6_use_tempaddr) {
 				int e;
 				if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) {
 					nd6log((LOG_NOTICE, "prelist_update: "
 					    "failed to create a temporary "
 					    "address, errno=%d\n",
 					    e));
 				}
 			}
 			ifa_free(&ia6->ia_ifa);
 
 			/*
 			 * A newly added address might affect the status
 			 * of other addresses, so we check and update it.
 			 * XXX: what if address duplication happens?
 			 */
 			pfxlist_onlink_check();
 		} else {
 			/* just set an error. do not bark here. */
 			error = EADDRNOTAVAIL; /* XXX: might be unused. */
 		}
 	}
 
  end:
 	splx(s);
 	return error;
 }
 
 /*
  * A supplement function used in the on-link detection below;
  * detect if a given prefix has a (probably) reachable advertising router.
  * XXX: lengthy function name...
  */
 static struct nd_pfxrouter *
 find_pfxlist_reachable_router(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfxrtr;
 	struct llentry *ln;
 	int canreach;
 
 	LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) {
 		IF_AFDATA_RLOCK(pfxrtr->router->ifp);
 		ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp);
 		IF_AFDATA_RUNLOCK(pfxrtr->router->ifp);
 		if (ln == NULL)
 			continue;
 		canreach = ND6_IS_LLINFO_PROBREACH(ln);
 		LLE_RUNLOCK(ln);
 		if (canreach)
 			break;
 	}
 	return (pfxrtr);
 }
 
 /*
  * Check if each prefix in the prefix list has at least one available router
  * that advertised the prefix (a router is "available" if its neighbor cache
  * entry is reachable or probably reachable).
  * If the check fails, the prefix may be off-link, because, for example,
  * we have moved from the network but the lifetime of the prefix has not
  * expired yet.  So we should not use the prefix if there is another prefix
  * that has an available router.
  * But, if there is no prefix that has an available router, we still regards
  * all the prefixes as on-link.  This is because we can't tell if all the
  * routers are simply dead or if we really moved from the network and there
  * is no router around us.
  */
 void
 pfxlist_onlink_check()
 {
 	struct nd_prefix *pr;
 	struct in6_ifaddr *ifa;
 	struct nd_defrouter *dr;
 	struct nd_pfxrouter *pfxrtr = NULL;
 
 	/*
 	 * Check if there is a prefix that has a reachable advertising
 	 * router.
 	 */
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
 			break;
 	}
 
 	/*
 	 * If we have no such prefix, check whether we still have a router
 	 * that does not advertise any prefixes.
 	 */
 	if (pr == NULL) {
 		TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 			struct nd_prefix *pr0;
 
 			LIST_FOREACH(pr0, &V_nd_prefix, ndpr_entry) {
 				if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL)
 					break;
 			}
 			if (pfxrtr != NULL)
 				break;
 		}
 	}
 	if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
 		/*
 		 * There is at least one prefix that has a reachable router,
 		 * or at least a router which probably does not advertise
 		 * any prefixes.  The latter would be the case when we move
 		 * to a new link where we have a router that does not provide
 		 * prefixes and we configure an address by hand.
 		 * Detach prefixes which have no reachable advertising
 		 * router, and attach other prefixes.
 		 */
 		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 			/* XXX: a link-local prefix should never be detached */
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue;
 
 			/*
 			 * we aren't interested in prefixes without the L bit
 			 * set.
 			 */
 			if (pr->ndpr_raf_onlink == 0)
 				continue;
 
 			if (pr->ndpr_raf_auto == 0)
 				continue;
 
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
 			    find_pfxlist_reachable_router(pr) == NULL)
 				pr->ndpr_stateflags |= NDPRF_DETACHED;
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
 			    find_pfxlist_reachable_router(pr) != 0)
 				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
 		}
 	} else {
 		/* there is no prefix that has a reachable router */
 		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue;
 
 			if (pr->ndpr_raf_onlink == 0)
 				continue;
 
 			if (pr->ndpr_raf_auto == 0)
 				continue;
 
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0)
 				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
 		}
 	}
 
 	/*
 	 * Remove each interface route associated with a (just) detached
 	 * prefix, and reinstall the interface route for a (just) attached
 	 * prefix.  Note that all attempt of reinstallation does not
 	 * necessarily success, when a same prefix is shared among multiple
 	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
 	 * so we don't have to care about them.
 	 */
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		int e;
 		char ip6buf[INET6_ADDRSTRLEN];
 
 		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 			continue;
 
 		if (pr->ndpr_raf_onlink == 0)
 			continue;
 
 		if (pr->ndpr_raf_auto == 0)
 			continue;
 
 		if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 			if ((e = nd6_prefix_offlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d offlink, errno=%d\n",
 				    ip6_sprintf(ip6buf,
 					    &pr->ndpr_prefix.sin6_addr),
 					    pr->ndpr_plen, e));
 			}
 		}
 		if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
 		    pr->ndpr_raf_onlink) {
 			if ((e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d onlink, errno=%d\n",
 				    ip6_sprintf(ip6buf,
 					    &pr->ndpr_prefix.sin6_addr),
 					    pr->ndpr_plen, e));
 			}
 		}
 	}
 
 	/*
 	 * Changes on the prefix status might affect address status as well.
 	 * Make sure that all addresses derived from an attached prefix are
 	 * attached, and that all addresses derived from a detached prefix are
 	 * detached.  Note, however, that a manually configured address should
 	 * always be attached.
 	 * The precise detection logic is same as the one for prefixes.
 	 *
 	 * XXXRW: in6_ifaddrhead locking.
 	 */
 	TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
 		if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
 			continue;
 
 		if (ifa->ia6_ndpr == NULL) {
 			/*
 			 * This can happen when we first configure the address
 			 * (i.e. the address exists, but the prefix does not).
 			 * XXX: complicated relationships...
 			 */
 			continue;
 		}
 
 		if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
 			break;
 	}
 	if (ifa) {
 		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
 			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 				continue;
 
 			if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
 				continue;
 
 			if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) {
 				if (ifa->ia6_flags & IN6_IFF_DETACHED) {
 					ifa->ia6_flags &= ~IN6_IFF_DETACHED;
 					ifa->ia6_flags |= IN6_IFF_TENTATIVE;
 					nd6_dad_start((struct ifaddr *)ifa, 0);
 				}
 			} else {
 				ifa->ia6_flags |= IN6_IFF_DETACHED;
 			}
 		}
 	}
 	else {
 		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
 			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 				continue;
 
 			if (ifa->ia6_flags & IN6_IFF_DETACHED) {
 				ifa->ia6_flags &= ~IN6_IFF_DETACHED;
 				ifa->ia6_flags |= IN6_IFF_TENTATIVE;
 				/* Do we need a delay in this case? */
 				nd6_dad_start((struct ifaddr *)ifa, 0);
 			}
 		}
 	}
 }
 
 static int
 nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
 {
 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
 	struct radix_node_head *rnh;
 	struct rtentry *rt;
 	struct sockaddr_in6 mask6;
 	u_long rtflags;
 	int error, a_failure, fibnum;
 
 	/*
 	 * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs.
 	 * ifa->ifa_rtrequest = nd6_rtrequest;
 	 */
 	bzero(&mask6, sizeof(mask6));
 	mask6.sin6_len = sizeof(mask6);
 	mask6.sin6_addr = pr->ndpr_mask;
 	rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP;
 
 	a_failure = 0;
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 
 		rt = NULL;
 		error = in6_rtrequest(RTM_ADD,
 		    (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr,
 		    (struct sockaddr *)&mask6, rtflags, &rt, fibnum);
 		if (error == 0) {
 			KASSERT(rt != NULL, ("%s: in6_rtrequest return no "
 			    "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__,
 			    error, pr, ifa));
 
 			rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6);
 			/* XXX what if rhn == NULL? */
 			RADIX_NODE_HEAD_LOCK(rnh);
 			RT_LOCK(rt);
 			if (rt_setgate(rt, rt_key(rt),
 			    (struct sockaddr *)&null_sdl) == 0) {
 				struct sockaddr_dl *dl;
 
 				dl = (struct sockaddr_dl *)rt->rt_gateway;
 				dl->sdl_type = rt->rt_ifp->if_type;
 				dl->sdl_index = rt->rt_ifp->if_index;
 			}
 			RADIX_NODE_HEAD_UNLOCK(rnh);
 			nd6_rtmsg(RTM_ADD, rt);
 			RT_UNLOCK(rt);
 			pr->ndpr_stateflags |= NDPRF_ONLINK;
 		} else {
 			char ip6buf[INET6_ADDRSTRLEN];
 			char ip6bufg[INET6_ADDRSTRLEN];
 			char ip6bufm[INET6_ADDRSTRLEN];
 			struct sockaddr_in6 *sin6;
 
 			sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 			nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add "
 			    "route for a prefix (%s/%d) on %s, gw=%s, mask=%s, "
 			    "flags=%lx errno = %d\n",
 			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 			    pr->ndpr_plen, if_name(pr->ndpr_ifp),
 			    ip6_sprintf(ip6bufg, &sin6->sin6_addr),
 			    ip6_sprintf(ip6bufm, &mask6.sin6_addr),
 			    rtflags, error));
 
 			/* Save last error to return, see rtinit(). */
 			a_failure = error;
 		}
 
 		if (rt != NULL) {
 			RT_LOCK(rt);
 			RT_REMREF(rt);
 			RT_UNLOCK(rt);
 		}
 	}
 
 	/* Return the last error we got. */
 	return (a_failure);
 }
 
 static int
 nd6_prefix_onlink(struct nd_prefix *pr)
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct nd_prefix *opr;
 	int error = 0;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* sanity check */
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 		nd6log((LOG_ERR,
 		    "nd6_prefix_onlink: %s/%d is already on-link\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen));
 		return (EEXIST);
 	}
 
 	/*
 	 * Add the interface route associated with the prefix.  Before
 	 * installing the route, check if there's the same prefix on another
 	 * interface, and the prefix has already installed the interface route.
 	 * Although such a configuration is expected to be rare, we explicitly
 	 * allow it.
 	 */
 	LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
 		if (opr == pr)
 			continue;
 
 		if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
 			continue;
 
 		if (opr->ndpr_plen == pr->ndpr_plen &&
 		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
 		    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen))
 			return (0);
 	}
 
 	/*
 	 * We prefer link-local addresses as the associated interface address.
 	 */
 	/* search for a link-local addr */
 	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
 	    IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
 	if (ifa == NULL) {
 		/* XXX: freebsd does not have ifa_ifwithaf */
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family == AF_INET6)
 				break;
 		}
 		if (ifa != NULL)
 			ifa_ref(ifa);
 		IF_ADDR_RUNLOCK(ifp);
 		/* should we care about ia6_flags? */
 	}
 	if (ifa == NULL) {
 		/*
 		 * This can still happen, when, for example, we receive an RA
 		 * containing a prefix with the L bit set and the A bit clear,
 		 * after removing all IPv6 addresses on the receiving
 		 * interface.  This should, of course, be rare though.
 		 */
 		nd6log((LOG_NOTICE,
 		    "nd6_prefix_onlink: failed to find any ifaddr"
 		    " to add route for a prefix(%s/%d) on %s\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(ifp)));
 		return (0);
 	}
 
 	error = nd6_prefix_onlink_rtrequest(pr, ifa);
 
 	if (ifa != NULL)
 		ifa_free(ifa);
 
 	return (error);
 }
 
 static int
 nd6_prefix_offlink(struct nd_prefix *pr)
 {
 	int error = 0;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct nd_prefix *opr;
 	struct sockaddr_in6 sa6, mask6;
 	struct rtentry *rt;
 	char ip6buf[INET6_ADDRSTRLEN];
 	int fibnum, a_failure;
 
 	/* sanity check */
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 		nd6log((LOG_ERR,
 		    "nd6_prefix_offlink: %s/%d is already off-link\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen));
 		return (EEXIST);
 	}
 
 	bzero(&sa6, sizeof(sa6));
 	sa6.sin6_family = AF_INET6;
 	sa6.sin6_len = sizeof(sa6);
 	bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr,
 	    sizeof(struct in6_addr));
 	bzero(&mask6, sizeof(mask6));
 	mask6.sin6_family = AF_INET6;
 	mask6.sin6_len = sizeof(sa6);
 	bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
 
 	a_failure = 0;
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		rt = NULL;
 		error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
 		    (struct sockaddr *)&mask6, 0, &rt, fibnum);
 		if (error == 0) {
 			/* report the route deletion to the routing socket. */
 			if (rt != NULL)
 				nd6_rtmsg(RTM_DELETE, rt);
 		} else {
 			/* Save last error to return, see rtinit(). */
 			a_failure = error;
 		}
 		if (rt != NULL) {
 			RTFREE(rt);
 		}
 	}
 	error = a_failure;
 	if (error == 0) {
 		pr->ndpr_stateflags &= ~NDPRF_ONLINK;
 
 		/*
 		 * There might be the same prefix on another interface,
 		 * the prefix which could not be on-link just because we have
 		 * the interface route (see comments in nd6_prefix_onlink).
 		 * If there's one, try to make the prefix on-link on the
 		 * interface.
 		 */
 		LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
 			if (opr == pr)
 				continue;
 
 			if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0)
 				continue;
 
 			/*
 			 * KAME specific: detached prefixes should not be
 			 * on-link.
 			 */
 			if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0)
 				continue;
 
 			if (opr->ndpr_plen == pr->ndpr_plen &&
 			    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
 			    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
 				int e;
 
 				if ((e = nd6_prefix_onlink(opr)) != 0) {
 					nd6log((LOG_ERR,
 					    "nd6_prefix_offlink: failed to "
 					    "recover a prefix %s/%d from %s "
 					    "to %s (errno = %d)\n",
 					    ip6_sprintf(ip6buf,
 						&opr->ndpr_prefix.sin6_addr),
 					    opr->ndpr_plen, if_name(ifp),
 					    if_name(opr->ndpr_ifp), e));
 				}
 			}
 		}
 	} else {
 		/* XXX: can we still set the NDPRF_ONLINK flag? */
 		nd6log((LOG_ERR,
 		    "nd6_prefix_offlink: failed to delete route: "
 		    "%s/%d on %s (errno = %d)\n",
 		    ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
 		    if_name(ifp), error));
 	}
 
 	return (error);
 }
 
 static struct in6_ifaddr *
 in6_ifadd(struct nd_prefixctl *pr, int mcast)
 {
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct ifaddr *ifa;
 	struct in6_aliasreq ifra;
 	struct in6_ifaddr *ia, *ib;
 	int error, plen0;
 	struct in6_addr mask;
 	int prefixlen = pr->ndpr_plen;
 	int updateflags;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	in6_prefixlen2mask(&mask, prefixlen);
 
 	/*
 	 * find a link-local address (will be interface ID).
 	 * Is it really mandatory? Theoretically, a global or a site-local
 	 * address can be configured without a link-local address, if we
 	 * have a unique interface identifier...
 	 *
 	 * it is not mandatory to have a link-local address, we can generate
 	 * interface identifier on the fly.  we do this because:
 	 * (1) it should be the easiest way to find interface identifier.
 	 * (2) RFC2462 5.4 suggesting the use of the same interface identifier
 	 * for multiple addresses on a single interface, and possible shortcut
 	 * of DAD.  we omitted DAD for this reason in the past.
 	 * (3) a user can prevent autoconfiguration of global address
 	 * by removing link-local address by hand (this is partly because we
 	 * don't have other way to control the use of IPv6 on an interface.
 	 * this has been our design choice - cf. NRL's "ifconfig auto").
 	 * (4) it is easier to manage when an interface has addresses
 	 * with the same interface identifier, than to have multiple addresses
 	 * with different interface identifiers.
 	 */
 	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
 	if (ifa)
 		ib = (struct in6_ifaddr *)ifa;
 	else
 		return NULL;
 
 	/* prefixlen + ifidlen must be equal to 128 */
 	plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
 	if (prefixlen != plen0) {
 		ifa_free(ifa);
 		nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
 		    "(prefix=%d ifid=%d)\n",
 		    if_name(ifp), prefixlen, 128 - plen0));
 		return NULL;
 	}
 
 	/* make ifaddr */
 
 	bzero(&ifra, sizeof(ifra));
 	/*
 	 * in6_update_ifa() does not use ifra_name, but we accurately set it
 	 * for safety.
 	 */
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 	ifra.ifra_addr.sin6_family = AF_INET6;
 	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 	/* prefix */
 	ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr;
 	ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
 	ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
 	ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
 
 	/* interface ID */
 	ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
 	ifa_free(ifa);
 
 	/* new prefix mask. */
 	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_prefixmask.sin6_family = AF_INET6;
 	bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
 	    sizeof(ifra.ifra_prefixmask.sin6_addr));
 
 	/* lifetimes. */
 	ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
 	ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
 
 	/* XXX: scope zone ID? */
 
 	ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
 
 	/*
 	 * Make sure that we do not have this address already.  This should
 	 * usually not happen, but we can still see this case, e.g., if we
 	 * have manually configured the exact address to be configured.
 	 */
 	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
 	    &ifra.ifra_addr.sin6_addr);
 	if (ifa != NULL) {
 		ifa_free(ifa);
 		/* this should be rare enough to make an explicit log */
 		log(LOG_INFO, "in6_ifadd: %s is already configured\n",
 		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
 		return (NULL);
 	}
 
 	/*
 	 * Allocate ifaddr structure, link into chain, etc.
 	 * If we are going to create a new address upon receiving a multicasted
 	 * RA, we need to impose a random delay before starting DAD.
 	 * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
 	 */
 	updateflags = 0;
 	if (mcast)
 		updateflags |= IN6_IFAUPDATE_DADDELAY;
 	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
 		nd6log((LOG_ERR,
 		    "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
 		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
 		    if_name(ifp), error));
 		return (NULL);	/* ifaddr must not have been allocated. */
 	}
 
 	ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
 	/*
 	 * XXXRW: Assumption of non-NULLness here might not be true with
 	 * fine-grained locking -- should we validate it?  Or just return
 	 * earlier ifa rather than looking it up again?
 	 */
 	return (ia);		/* this is always non-NULL  and referenced. */
 }
 
 /*
  * ia0 - corresponding public address
  */
 int
 in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
 {
 	struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
 	struct in6_ifaddr *newia, *ia;
 	struct in6_aliasreq ifra;
 	int i, error;
 	int trylimit = 3;	/* XXX: adhoc value */
 	int updateflags;
 	u_int32_t randid[2];
 	time_t vltime0, pltime0;
 
 	bzero(&ifra, sizeof(ifra));
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 	ifra.ifra_addr = ia0->ia_addr;
 	/* copy prefix mask */
 	ifra.ifra_prefixmask = ia0->ia_prefixmask;
 	/* clear the old IFID */
 	for (i = 0; i < 4; i++) {
 		ifra.ifra_addr.sin6_addr.s6_addr32[i] &=
 		    ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
 	}
 
   again:
 	if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
 	    (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) {
 		nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good "
 		    "random IFID\n"));
 		return (EINVAL);
 	}
 	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
 	    (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
 	    (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
 
 	/*
 	 * in6_get_tmpifid() quite likely provided a unique interface ID.
 	 * However, we may still have a chance to see collision, because
 	 * there may be a time lag between generation of the ID and generation
 	 * of the address.  So, we'll do one more sanity check.
 	 */
 	IN6_IFADDR_RLOCK();
 	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
 		    &ifra.ifra_addr.sin6_addr)) {
 			if (trylimit-- == 0) {
 				IN6_IFADDR_RUNLOCK();
 				/*
 				 * Give up.  Something strange should have
 				 * happened.
 				 */
 				nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
 				    "find a unique random IFID\n"));
 				return (EEXIST);
 			}
 			IN6_IFADDR_RUNLOCK();
 			forcegen = 1;
 			goto again;
 		}
 	}
 	IN6_IFADDR_RUNLOCK();
 
 	/*
 	 * The Valid Lifetime is the lower of the Valid Lifetime of the
          * public address or TEMP_VALID_LIFETIME.
 	 * The Preferred Lifetime is the lower of the Preferred Lifetime
          * of the public address or TEMP_PREFERRED_LIFETIME -
          * DESYNC_FACTOR.
 	 */
 	if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 		vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
 		    (ia0->ia6_lifetime.ia6t_vltime -
 		    (time_second - ia0->ia6_updatetime));
 		if (vltime0 > V_ip6_temp_valid_lifetime)
 			vltime0 = V_ip6_temp_valid_lifetime;
 	} else
 		vltime0 = V_ip6_temp_valid_lifetime;
 	if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 		pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
 		    (ia0->ia6_lifetime.ia6t_pltime -
 		    (time_second - ia0->ia6_updatetime));
 		if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
 			pltime0 = V_ip6_temp_preferred_lifetime -
 			    V_ip6_desync_factor;
 		}
 	} else
 		pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor;
 	ifra.ifra_lifetime.ia6t_vltime = vltime0;
 	ifra.ifra_lifetime.ia6t_pltime = pltime0;
 
 	/*
 	 * A temporary address is created only if this calculated Preferred
 	 * Lifetime is greater than REGEN_ADVANCE time units.
 	 */
 	if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance)
 		return (0);
 
 	/* XXX: scope zone ID? */
 
 	ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY);
 
 	/* allocate ifaddr structure, link into chain, etc. */
 	updateflags = 0;
 	if (delay)
 		updateflags |= IN6_IFAUPDATE_DADDELAY;
 	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0)
 		return (error);
 
 	newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
 	if (newia == NULL) {	/* XXX: can it happen? */
 		nd6log((LOG_ERR,
 		    "in6_tmpifadd: ifa update succeeded, but we got "
 		    "no ifaddr\n"));
 		return (EINVAL); /* XXX */
 	}
 	newia->ia6_ndpr = ia0->ia6_ndpr;
 	newia->ia6_ndpr->ndpr_refcnt++;
 	ifa_free(&newia->ia_ifa);
 
 	/*
 	 * A newly added address might affect the status of other addresses.
 	 * XXX: when the temporary address is generated with a new public
 	 * address, the onlink check is redundant.  However, it would be safe
 	 * to do the check explicitly everywhere a new address is generated,
 	 * and, in fact, we surely need the check when we create a new
 	 * temporary address due to deprecation of an old temporary address.
 	 */
 	pfxlist_onlink_check();
 
 	return (0);
 }
 
 static int
 in6_init_prefix_ltimes(struct nd_prefix *ndpr)
 {
 	if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
 		ndpr->ndpr_preferred = 0;
 	else
 		ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
 	if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 		ndpr->ndpr_expire = 0;
 	else
 		ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;
 
 	return 0;
 }
 
 static void
 in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
 {
 	/* init ia6t_expire */
 	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_expire = 0;
 	else {
 		lt6->ia6t_expire = time_second;
 		lt6->ia6t_expire += lt6->ia6t_vltime;
 	}
 
 	/* init ia6t_preferred */
 	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_preferred = 0;
 	else {
 		lt6->ia6t_preferred = time_second;
 		lt6->ia6t_preferred += lt6->ia6t_pltime;
 	}
 }
 
 /*
  * Delete all the routing table entries that use the specified gateway.
  * XXX: this function causes search through all entries of routing table, so
  * it shouldn't be called when acting as a router.
  */
 void
 rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
 {
 	struct radix_node_head *rnh;
 	u_int fibnum;
 	int s = splnet();
 
 	/* We'll care only link-local addresses */
 	if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
 		splx(s);
 		return;
 	}
 
 	/* XXX Do we really need to walk any but the default FIB? */
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		rnh = rt_tables_get_rnh(fibnum, AF_INET6);
 		if (rnh == NULL)
 			continue;
 
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	}
 	splx(s);
 }
 
 static int
 rt6_deleteroute(struct radix_node *rn, void *arg)
 {
 #define SIN6(s)	((struct sockaddr_in6 *)s)
 	struct rtentry *rt = (struct rtentry *)rn;
 	struct in6_addr *gate = (struct in6_addr *)arg;
 
 	if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
 		return (0);
 
 	if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) {
 		return (0);
 	}
 
 	/*
 	 * Do not delete a static route.
 	 * XXX: this seems to be a bit ad-hoc. Should we consider the
 	 * 'cloned' bit instead?
 	 */
 	if ((rt->rt_flags & RTF_STATIC) != 0)
 		return (0);
 
 	/*
 	 * We delete only host route. This means, in particular, we don't
 	 * delete default route.
 	 */
 	if ((rt->rt_flags & RTF_HOST) == 0)
 		return (0);
 
 	return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
 	    rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum));
 #undef SIN6
 }
 
 int
 nd6_setdefaultiface(int ifindex)
 {
 	int error = 0;
 
 	if (ifindex < 0 || V_if_index < ifindex)
 		return (EINVAL);
 	if (ifindex != 0 && !ifnet_byindex(ifindex))
 		return (EINVAL);
 
 	if (V_nd6_defifindex != ifindex) {
 		V_nd6_defifindex = ifindex;
 		if (V_nd6_defifindex > 0)
 			V_nd6_defifp = ifnet_byindex(V_nd6_defifindex);
 		else
 			V_nd6_defifp = NULL;
 
 		/*
 		 * Our current implementation assumes one-to-one maping between
 		 * interfaces and links, so it would be natural to use the
 		 * default interface as the default link.
 		 */
 		scope6_setdefault(V_nd6_defifp);
 	}
 
 	return (error);
 }