Index: stable/8/contrib/ntp/ntpd/ntp_crypto.c =================================================================== --- stable/8/contrib/ntp/ntpd/ntp_crypto.c (revision 281230) +++ stable/8/contrib/ntp/ntpd/ntp_crypto.c (revision 281231) @@ -1,4201 +1,4235 @@ /* * ntp_crypto.c - NTP version 4 public key routines */ #ifdef HAVE_CONFIG_H #include #endif #ifdef OPENSSL #include #include #include #include #include #include "ntpd.h" #include "ntp_stdlib.h" #include "ntp_unixtime.h" #include "ntp_string.h" #include #include "openssl/asn1_mac.h" #include "openssl/bn.h" #include "openssl/err.h" #include "openssl/evp.h" #include "openssl/pem.h" #include "openssl/rand.h" #include "openssl/x509v3.h" #ifdef KERNEL_PLL #include "ntp_syscall.h" #endif /* KERNEL_PLL */ /* * Extension field message format * * These are always signed and saved before sending in network byte * order. They must be converted to and from host byte order for * processing. * * +-------+-------+ * | op | len | <- extension pointer * +-------+-------+ * | assocID | * +---------------+ * | timestamp | <- value pointer * +---------------+ * | filestamp | * +---------------+ * | value len | * +---------------+ * | | * = value = * | | * +---------------+ * | signature len | * +---------------+ * | | * = signature = * | | * +---------------+ * * The CRYPTO_RESP bit is set to 0 for requests, 1 for responses. * Requests carry the association ID of the receiver; responses carry * the association ID of the sender. Some messages include only the * operation/length and association ID words and so have length 8 * octets. Ohers include the value structure and associated value and * signature fields. These messages include the timestamp, filestamp, * value and signature words and so have length at least 24 octets. The * signature and/or value fields can be empty, in which case the * respective length words are zero. An empty value with nonempty * signature is syntactically valid, but semantically questionable. * * The filestamp represents the time when a cryptographic data file such * as a public/private key pair is created. It follows every reference * depending on that file and serves as a means to obsolete earlier data * of the same type. The timestamp represents the time when the * cryptographic data of the message were last signed. Creation of a * cryptographic data file or signing a message can occur only when the * creator or signor is synchronized to an authoritative source and * proventicated to a trusted authority. * * Note there are four conditions required for server trust. First, the * public key on the certificate must be verified, which involves a * number of format, content and consistency checks. Next, the server * identity must be confirmed by one of four schemes: private * certificate, IFF scheme, GQ scheme or certificate trail hike to a * self signed trusted certificate. Finally, the server signature must * be verified. */ /* * Cryptodefines */ #define TAI_1972 10 /* initial TAI offset (s) */ #define MAX_LEAP 100 /* max UTC leapseconds (s) */ #define VALUE_LEN (6 * 4) /* min response field length */ +#define MAX_VALLEN (65535 - VALUE_LEN) #define YEAR (60 * 60 * 24 * 365) /* seconds in year */ /* * Global cryptodata in host byte order */ u_int32 crypto_flags = 0x0; /* status word */ /* * Global cryptodata in network byte order */ struct cert_info *cinfo = NULL; /* certificate info/value */ struct value hostval; /* host value */ struct value pubkey; /* public key */ struct value tai_leap; /* leapseconds table */ EVP_PKEY *iffpar_pkey = NULL; /* IFF parameters */ EVP_PKEY *gqpar_pkey = NULL; /* GQ parameters */ EVP_PKEY *mvpar_pkey = NULL; /* MV parameters */ char *iffpar_file = NULL; /* IFF parameters file */ char *gqpar_file = NULL; /* GQ parameters file */ char *mvpar_file = NULL; /* MV parameters file */ /* * Private cryptodata in host byte order */ static char *passwd = NULL; /* private key password */ static EVP_PKEY *host_pkey = NULL; /* host key */ static EVP_PKEY *sign_pkey = NULL; /* sign key */ static const EVP_MD *sign_digest = NULL; /* sign digest */ static u_int sign_siglen; /* sign key length */ static char *rand_file = NULL; /* random seed file */ static char *host_file = NULL; /* host key file */ static char *sign_file = NULL; /* sign key file */ static char *cert_file = NULL; /* certificate file */ static char *leap_file = NULL; /* leapseconds file */ static tstamp_t if_fstamp = 0; /* IFF filestamp */ static tstamp_t gq_fstamp = 0; /* GQ file stamp */ static tstamp_t mv_fstamp = 0; /* MV filestamp */ static u_int ident_scheme = 0; /* server identity scheme */ /* * Cryptotypes */ static int crypto_verify P((struct exten *, struct value *, struct peer *)); -static int crypto_encrypt P((struct exten *, struct value *, - keyid_t *)); +static int crypto_encrypt P((const u_char *, u_int, keyid_t *, + struct value *)); static int crypto_alice P((struct peer *, struct value *)); static int crypto_alice2 P((struct peer *, struct value *)); static int crypto_alice3 P((struct peer *, struct value *)); static int crypto_bob P((struct exten *, struct value *)); static int crypto_bob2 P((struct exten *, struct value *)); static int crypto_bob3 P((struct exten *, struct value *)); static int crypto_iff P((struct exten *, struct peer *)); static int crypto_gq P((struct exten *, struct peer *)); static int crypto_mv P((struct exten *, struct peer *)); static u_int crypto_send P((struct exten *, struct value *)); static tstamp_t crypto_time P((void)); static u_long asn2ntp P((ASN1_TIME *)); static struct cert_info *cert_parse P((u_char *, u_int, tstamp_t)); static int cert_sign P((struct exten *, struct value *)); static int cert_valid P((struct cert_info *, EVP_PKEY *)); static int cert_install P((struct exten *, struct peer *)); static void cert_free P((struct cert_info *)); static EVP_PKEY *crypto_key P((char *, tstamp_t *)); static int bighash P((BIGNUM *, BIGNUM *)); static struct cert_info *crypto_cert P((char *)); static void crypto_tai P((char *)); #ifdef SYS_WINNT int readlink(char * link, char * file, int len) { return (-1); } #endif /* * session_key - generate session key * * This routine generates a session key from the source address, * destination address, key ID and private value. The value of the * session key is the MD5 hash of these values, while the next key ID is * the first four octets of the hash. * * Returns the next key ID */ keyid_t session_key( struct sockaddr_storage *srcadr, /* source address */ struct sockaddr_storage *dstadr, /* destination address */ keyid_t keyno, /* key ID */ keyid_t private, /* private value */ u_long lifetime /* key lifetime */ ) { EVP_MD_CTX ctx; /* message digest context */ u_char dgst[EVP_MAX_MD_SIZE]; /* message digest */ keyid_t keyid; /* key identifer */ u_int32 header[10]; /* data in network byte order */ u_int hdlen, len; if (!dstadr) return 0; /* * Generate the session key and key ID. If the lifetime is * greater than zero, install the key and call it trusted. */ hdlen = 0; switch(srcadr->ss_family) { case AF_INET: header[0] = ((struct sockaddr_in *)srcadr)->sin_addr.s_addr; header[1] = ((struct sockaddr_in *)dstadr)->sin_addr.s_addr; header[2] = htonl(keyno); header[3] = htonl(private); hdlen = 4 * sizeof(u_int32); break; case AF_INET6: memcpy(&header[0], &GET_INADDR6(*srcadr), sizeof(struct in6_addr)); memcpy(&header[4], &GET_INADDR6(*dstadr), sizeof(struct in6_addr)); header[8] = htonl(keyno); header[9] = htonl(private); hdlen = 10 * sizeof(u_int32); break; } EVP_DigestInit(&ctx, EVP_md5()); EVP_DigestUpdate(&ctx, (u_char *)header, hdlen); EVP_DigestFinal(&ctx, dgst, &len); memcpy(&keyid, dgst, 4); keyid = ntohl(keyid); if (lifetime != 0) { MD5auth_setkey(keyno, dgst, len); authtrust(keyno, lifetime); } #ifdef DEBUG if (debug > 1) printf( "session_key: %s > %s %08x %08x hash %08x life %lu\n", stoa(srcadr), stoa(dstadr), keyno, private, keyid, lifetime); #endif return (keyid); } /* * make_keylist - generate key list * * Returns * XEVNT_OK success * XEVNT_PER host certificate expired * * This routine constructs a pseudo-random sequence by repeatedly * hashing the session key starting from a given source address, * destination address, private value and the next key ID of the * preceeding session key. The last entry on the list is saved along * with its sequence number and public signature. */ int make_keylist( struct peer *peer, /* peer structure pointer */ struct interface *dstadr /* interface */ ) { EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; /* NTP timestamp */ struct autokey *ap; /* autokey pointer */ struct value *vp; /* value pointer */ keyid_t keyid = 0; /* next key ID */ keyid_t cookie; /* private value */ u_long lifetime; u_int len, mpoll; int i; if (!dstadr) return XEVNT_OK; /* * Allocate the key list if necessary. */ tstamp = crypto_time(); if (peer->keylist == NULL) peer->keylist = emalloc(sizeof(keyid_t) * NTP_MAXSESSION); /* * Generate an initial key ID which is unique and greater than * NTP_MAXKEY. */ while (1) { keyid = (ntp_random() + NTP_MAXKEY + 1) & ((1 << sizeof(keyid_t)) - 1); if (authhavekey(keyid)) continue; break; } /* * Generate up to NTP_MAXSESSION session keys. Stop if the * next one would not be unique or not a session key ID or if * it would expire before the next poll. The private value * included in the hash is zero if broadcast mode, the peer * cookie if client mode or the host cookie if symmetric modes. */ mpoll = 1 << min(peer->ppoll, peer->hpoll); lifetime = min(sys_automax, NTP_MAXSESSION * mpoll); if (peer->hmode == MODE_BROADCAST) cookie = 0; else cookie = peer->pcookie; for (i = 0; i < NTP_MAXSESSION; i++) { peer->keylist[i] = keyid; peer->keynumber = i; keyid = session_key(&dstadr->sin, &peer->srcadr, keyid, cookie, lifetime); lifetime -= mpoll; if (auth_havekey(keyid) || keyid <= NTP_MAXKEY || lifetime <= mpoll) break; } /* * Save the last session key ID, sequence number and timestamp, * then sign these values for later retrieval by the clients. Be * careful not to use invalid key media. Use the public values * timestamp as filestamp. */ vp = &peer->sndval; if (vp->ptr == NULL) vp->ptr = emalloc(sizeof(struct autokey)); ap = (struct autokey *)vp->ptr; ap->seq = htonl(peer->keynumber); ap->key = htonl(keyid); vp->tstamp = htonl(tstamp); vp->fstamp = hostval.tstamp; vp->vallen = htonl(sizeof(struct autokey)); vp->siglen = 0; if (tstamp != 0) { if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); if (vp->sig == NULL) vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)vp, 12); EVP_SignUpdate(&ctx, vp->ptr, sizeof(struct autokey)); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); else msyslog(LOG_ERR, "make_keys %s\n", ERR_error_string(ERR_get_error(), NULL)); peer->flags |= FLAG_ASSOC; } #ifdef DEBUG if (debug) printf("make_keys: %d %08x %08x ts %u fs %u poll %d\n", ntohl(ap->seq), ntohl(ap->key), cookie, ntohl(vp->tstamp), ntohl(vp->fstamp), peer->hpoll); #endif return (XEVNT_OK); } /* * crypto_recv - parse extension fields * * This routine is called when the packet has been matched to an * association and passed sanity, format and MAC checks. We believe the * extension field values only if the field has proper format and * length, the timestamp and filestamp are valid and the signature has * valid length and is verified. There are a few cases where some values * are believed even if the signature fails, but only if the proventic * bit is not set. */ int crypto_recv( struct peer *peer, /* peer structure pointer */ struct recvbuf *rbufp /* packet buffer pointer */ ) { const EVP_MD *dp; /* message digest algorithm */ u_int32 *pkt; /* receive packet pointer */ struct autokey *ap, *bp; /* autokey pointer */ struct exten *ep, *fp; /* extension pointers */ int has_mac; /* length of MAC field */ int authlen; /* offset of MAC field */ associd_t associd; /* association ID */ tstamp_t tstamp = 0; /* timestamp */ tstamp_t fstamp = 0; /* filestamp */ u_int len; /* extension field length */ u_int code; /* extension field opcode */ u_int vallen = 0; /* value length */ X509 *cert; /* X509 certificate */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ keyid_t cookie; /* crumbles */ int hismode; /* packet mode */ int rval = XEVNT_OK; u_char *ptr; u_int32 temp32; /* * Initialize. Note that the packet has already been checked for * valid format and extension field lengths. First extract the * field length, command code and association ID in host byte * order. These are used with all commands and modes. Then check * the version number, which must be 2, and length, which must * be at least 8 for requests and VALUE_LEN (24) for responses. * Packets that fail either test sink without a trace. The * association ID is saved only if nonzero. */ authlen = LEN_PKT_NOMAC; hismode = (int)PKT_MODE((&rbufp->recv_pkt)->li_vn_mode); while ((has_mac = rbufp->recv_length - authlen) > MAX_MAC_LEN) { pkt = (u_int32 *)&rbufp->recv_pkt + authlen / 4; ep = (struct exten *)pkt; code = ntohl(ep->opcode) & 0xffff0000; len = ntohl(ep->opcode) & 0x0000ffff; associd = (associd_t) ntohl(pkt[1]); rval = XEVNT_OK; #ifdef DEBUG if (debug) printf( "crypto_recv: flags 0x%x ext offset %d len %u code 0x%x assocID %d\n", peer->crypto, authlen, len, code >> 16, associd); #endif /* * Check version number and field length. If bad, * quietly ignore the packet. */ if (((code >> 24) & 0x3f) != CRYPTO_VN || len < 8) { sys_unknownversion++; code |= CRYPTO_ERROR; } /* * Little vulnerability bandage here. If a perp tosses a * fake association ID over the fence, we better toss it * out. Only the first one counts. */ if (code & CRYPTO_RESP) { if (peer->assoc == 0) peer->assoc = associd; else if (peer->assoc != associd) code |= CRYPTO_ERROR; } if (len >= VALUE_LEN) { tstamp = ntohl(ep->tstamp); fstamp = ntohl(ep->fstamp); vallen = ntohl(ep->vallen); + /* + * Bug 2761: I hope this isn't too early... + */ + if ( vallen == 0 + || len - VALUE_LEN < vallen) + return XEVNT_LEN; } switch (code) { /* * Install status word, host name, signature scheme and * association ID. In OpenSSL the signature algorithm is * bound to the digest algorithm, so the NID completely * defines the signature scheme. Note the request and * response are identical, but neither is validated by * signature. The request is processed here only in * symmetric modes. The server name field might be * useful to implement access controls in future. */ case CRYPTO_ASSOC: /* * If the machine is running when this message * arrives, the other fellow has reset and so * must we. Otherwise, pass the extension field * to the transmit side. */ if (peer->crypto) { rval = XEVNT_ERR; break; } fp = emalloc(len); memcpy(fp, ep, len); temp32 = CRYPTO_RESP; fp->opcode |= htonl(temp32); peer->cmmd = fp; /* fall through */ case CRYPTO_ASSOC | CRYPTO_RESP: /* * Discard the message if it has already been * stored or the message has been amputated. */ if (peer->crypto) break; if (vallen == 0 || vallen > MAXHOSTNAME || - len < VALUE_LEN + vallen) { + len - VALUE_LEN < vallen) { rval = XEVNT_LEN; break; } /* * Check the identity schemes are compatible. If * the client has PC, the server must have PC, * in which case the server public key and * identity are presumed valid, so we skip the * certificate and identity exchanges and move * immediately to the cookie exchange which * confirms the server signature. */ #ifdef DEBUG if (debug) printf( "crypto_recv: ident host 0x%x server 0x%x\n", crypto_flags, fstamp); #endif temp32 = (crypto_flags | ident_scheme) & fstamp & CRYPTO_FLAG_MASK; if (crypto_flags & CRYPTO_FLAG_PRIV) { if (!(fstamp & CRYPTO_FLAG_PRIV)) { rval = XEVNT_KEY; break; } else { fstamp |= CRYPTO_FLAG_VALID | CRYPTO_FLAG_VRFY | CRYPTO_FLAG_SIGN; } /* * In symmetric modes it is an error if either * peer requests identity and the other peer * does not support it. */ } else if ((hismode == MODE_ACTIVE || hismode == MODE_PASSIVE) && ((crypto_flags | fstamp) & CRYPTO_FLAG_MASK) && !temp32) { rval = XEVNT_KEY; break; /* * It is an error if the client requests * identity and the server does not support it. */ } else if (hismode == MODE_CLIENT && (fstamp & CRYPTO_FLAG_MASK) && !temp32) { rval = XEVNT_KEY; break; } /* * Otherwise, the identity scheme(s) are those * that both client and server support. */ fstamp = temp32 | (fstamp & ~CRYPTO_FLAG_MASK); /* * Discard the message if the signature digest * NID is not supported. */ temp32 = (fstamp >> 16) & 0xffff; dp = (const EVP_MD *)EVP_get_digestbynid(temp32); if (dp == NULL) { rval = XEVNT_MD; break; } /* * Save status word, host name and message * digest/signature type. */ peer->crypto = fstamp; peer->digest = dp; peer->subject = emalloc(vallen + 1); memcpy(peer->subject, ep->pkt, vallen); peer->subject[vallen] = '\0'; peer->issuer = emalloc(vallen + 1); strcpy(peer->issuer, peer->subject); temp32 = (fstamp >> 16) & 0xffff; snprintf(statstr, NTP_MAXSTRLEN, "flags 0x%x host %s signature %s", fstamp, peer->subject, OBJ_nid2ln(temp32)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Decode X509 certificate in ASN.1 format and extract * the data containing, among other things, subject * name and public key. In the default identification * scheme, the certificate trail is followed to a self * signed trusted certificate. */ case CRYPTO_CERT | CRYPTO_RESP: /* * Discard the message if invalid. */ if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * Scan the certificate list to delete old * versions and link the newest version first on * the list. */ if ((rval = cert_install(ep, peer)) != XEVNT_OK) break; /* * If we snatch the certificate before the * server certificate has been signed by its * server, it will be self signed. When it is, * we chase the certificate issuer, which the * server has, and keep going until a self * signed trusted certificate is found. Be sure * to update the issuer field, since it may * change. */ if (peer->issuer != NULL) free(peer->issuer); peer->issuer = emalloc(strlen(cinfo->issuer) + 1); strcpy(peer->issuer, cinfo->issuer); /* * We plug in the public key and lifetime from * the first certificate received. However, note * that this certificate might not be signed by * the server, so we can't check the * signature/digest NID. */ if (peer->pkey == NULL) { ptr = (u_char *)cinfo->cert.ptr; cert = d2i_X509(NULL, &ptr, ntohl(cinfo->cert.vallen)); peer->pkey = X509_get_pubkey(cert); X509_free(cert); } peer->flash &= ~TEST8; temp32 = cinfo->nid; snprintf(statstr, NTP_MAXSTRLEN, "cert %s 0x%x %s (%u) fs %u", cinfo->subject, cinfo->flags, OBJ_nid2ln(temp32), temp32, ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Schnorr (IFF)identity scheme. This scheme is designed * for use with shared secret group keys and where the * certificate may be generated by a third party. The * client sends a challenge to the server, which * performs a calculation and returns the result. A * positive result is possible only if both client and * server contain the same secret group key. */ case CRYPTO_IFF | CRYPTO_RESP: /* * Discard the message if invalid or certificate * trail not trusted. */ if (!(peer->crypto & CRYPTO_FLAG_VALID)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * If the the challenge matches the response, * the certificate public key, as well as the * server public key, signatyre and identity are * all verified at the same time. The server is * declared trusted, so we skip further * certificate stages and move immediately to * the cookie stage. */ if ((rval = crypto_iff(ep, peer)) != XEVNT_OK) break; peer->crypto |= CRYPTO_FLAG_VRFY | CRYPTO_FLAG_PROV; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "iff fs %u", ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Guillou-Quisquater (GQ) identity scheme. This scheme * is designed for use with public certificates carrying * the GQ public key in an extension field. The client * sends a challenge to the server, which performs a * calculation and returns the result. A positive result * is possible only if both client and server contain * the same group key and the server has the matching GQ * private key. */ case CRYPTO_GQ | CRYPTO_RESP: /* * Discard the message if invalid or certificate * trail not trusted. */ if (!(peer->crypto & CRYPTO_FLAG_VALID)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * If the the challenge matches the response, * the certificate public key, as well as the * server public key, signatyre and identity are * all verified at the same time. The server is * declared trusted, so we skip further * certificate stages and move immediately to * the cookie stage. */ if ((rval = crypto_gq(ep, peer)) != XEVNT_OK) break; peer->crypto |= CRYPTO_FLAG_VRFY | CRYPTO_FLAG_PROV; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "gq fs %u", ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * MV */ case CRYPTO_MV | CRYPTO_RESP: /* * Discard the message if invalid or certificate * trail not trusted. */ if (!(peer->crypto & CRYPTO_FLAG_VALID)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * If the the challenge matches the response, * the certificate public key, as well as the * server public key, signatyre and identity are * all verified at the same time. The server is * declared trusted, so we skip further * certificate stages and move immediately to * the cookie stage. */ if ((rval = crypto_mv(ep, peer)) != XEVNT_OK) break; peer->crypto |= CRYPTO_FLAG_VRFY | CRYPTO_FLAG_PROV; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "mv fs %u", ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Cookie request in symmetric modes. Roll a random * cookie and install in symmetric mode. Encrypt for the * response, which is transmitted later. */ case CRYPTO_COOK: /* * Discard the message if invalid or certificate * trail not trusted. */ if (!(peer->crypto & CRYPTO_FLAG_VALID)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * Pass the extension field to the transmit * side. If already agreed, walk away. */ fp = emalloc(len); memcpy(fp, ep, len); temp32 = CRYPTO_RESP; fp->opcode |= htonl(temp32); peer->cmmd = fp; if (peer->crypto & CRYPTO_FLAG_AGREE) { peer->flash &= ~TEST8; break; } /* * Install cookie values and light the cookie * bit. The transmit side will pick up and * encrypt it for the response. */ key_expire(peer); peer->cookval.tstamp = ep->tstamp; peer->cookval.fstamp = ep->fstamp; RAND_bytes((u_char *)&peer->pcookie, 4); peer->crypto &= ~CRYPTO_FLAG_AUTO; peer->crypto |= CRYPTO_FLAG_AGREE; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "cook %x ts %u fs %u", peer->pcookie, ntohl(ep->tstamp), ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Cookie response in client and symmetric modes. If the * cookie bit is set, the working cookie is the EXOR of * the current and new values. */ case CRYPTO_COOK | CRYPTO_RESP: /* * Discard the message if invalid or identity * not confirmed or signature not verified with * respect to the cookie values. */ if (!(peer->crypto & CRYPTO_FLAG_VRFY)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, &peer->cookval, peer)) != XEVNT_OK) break; /* * Decrypt the cookie, hunting all the time for * errors. */ if (vallen == (u_int) EVP_PKEY_size(host_pkey)) { u_int32 *cookiebuf = malloc( RSA_size(host_pkey->pkey.rsa)); if (cookiebuf == NULL) { rval = XEVNT_CKY; break; } if (RSA_private_decrypt(vallen, (u_char *)ep->pkt, (u_char *)cookiebuf, host_pkey->pkey.rsa, RSA_PKCS1_OAEP_PADDING) != 4) { rval = XEVNT_CKY; free(cookiebuf); break; } else { cookie = ntohl(*cookiebuf); free(cookiebuf); } } else { rval = XEVNT_CKY; break; } /* * Install cookie values and light the cookie * bit. If this is not broadcast client mode, we * are done here. */ key_expire(peer); peer->cookval.tstamp = ep->tstamp; peer->cookval.fstamp = ep->fstamp; if (peer->crypto & CRYPTO_FLAG_AGREE) peer->pcookie ^= cookie; else peer->pcookie = cookie; if (peer->hmode == MODE_CLIENT && !(peer->cast_flags & MDF_BCLNT)) peer->crypto |= CRYPTO_FLAG_AUTO; else peer->crypto &= ~CRYPTO_FLAG_AUTO; peer->crypto |= CRYPTO_FLAG_AGREE; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "cook %x ts %u fs %u", peer->pcookie, ntohl(ep->tstamp), ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Install autokey values in broadcast client and * symmetric modes. We have to do this every time the * sever/peer cookie changes or a new keylist is * rolled. Ordinarily, this is automatic as this message * is piggybacked on the first NTP packet sent upon * either of these events. Note that a broadcast client * or symmetric peer can receive this response without a * matching request. */ case CRYPTO_AUTO | CRYPTO_RESP: /* * Discard the message if invalid or identity * not confirmed or signature not verified with * respect to the receive autokey values. */ if (!(peer->crypto & CRYPTO_FLAG_VRFY)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, &peer->recval, peer)) != XEVNT_OK) break; /* * Install autokey values and light the * autokey bit. This is not hard. */ if (peer->recval.ptr == NULL) peer->recval.ptr = emalloc(sizeof(struct autokey)); bp = (struct autokey *)peer->recval.ptr; peer->recval.tstamp = ep->tstamp; peer->recval.fstamp = ep->fstamp; ap = (struct autokey *)ep->pkt; bp->seq = ntohl(ap->seq); bp->key = ntohl(ap->key); peer->pkeyid = bp->key; peer->crypto |= CRYPTO_FLAG_AUTO; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "auto seq %d key %x ts %u fs %u", bp->seq, bp->key, ntohl(ep->tstamp), ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * X509 certificate sign response. Validate the * certificate signed by the server and install. Later * this can be provided to clients of this server in * lieu of the self signed certificate in order to * validate the public key. */ case CRYPTO_SIGN | CRYPTO_RESP: /* * Discard the message if invalid or not * proventic. */ if (!(peer->crypto & CRYPTO_FLAG_PROV)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * Scan the certificate list to delete old * versions and link the newest version first on * the list. */ if ((rval = cert_install(ep, peer)) != XEVNT_OK) break; peer->crypto |= CRYPTO_FLAG_SIGN; peer->flash &= ~TEST8; temp32 = cinfo->nid; snprintf(statstr, NTP_MAXSTRLEN, "sign %s 0x%x %s (%u) fs %u", cinfo->issuer, cinfo->flags, OBJ_nid2ln(temp32), temp32, ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Install leapseconds table in symmetric modes. This * table is proventicated to the NIST primary servers, * either by copying the file containing the table from * a NIST server to a trusted server or directly using * this protocol. While the entire table is installed at * the server, presently only the current TAI offset is * provided via the kernel to other applications. */ case CRYPTO_TAI: /* * Discard the message if invalid. */ if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * Pass the extension field to the transmit * side. Continue below if a leapseconds table * accompanies the message. */ fp = emalloc(len); memcpy(fp, ep, len); temp32 = CRYPTO_RESP; fp->opcode |= htonl(temp32); peer->cmmd = fp; if (len <= VALUE_LEN) { peer->flash &= ~TEST8; break; } /* fall through */ case CRYPTO_TAI | CRYPTO_RESP: /* * If this is a response, discard the message if * signature not verified with respect to the * leapsecond table values. */ if (peer->cmmd == NULL) { if ((rval = crypto_verify(ep, &peer->tai_leap, peer)) != XEVNT_OK) break; } /* * Initialize peer variables with latest update. */ peer->tai_leap.tstamp = ep->tstamp; peer->tai_leap.fstamp = ep->fstamp; peer->tai_leap.vallen = ep->vallen; /* * Install the new table if there is no stored * table or the new table is more recent than * the stored table. Since a filestamp may have * changed, recompute the signatures. */ if (ntohl(peer->tai_leap.fstamp) > ntohl(tai_leap.fstamp)) { tai_leap.fstamp = ep->fstamp; tai_leap.vallen = ep->vallen; if (tai_leap.ptr != NULL) free(tai_leap.ptr); tai_leap.ptr = emalloc(vallen); memcpy(tai_leap.ptr, ep->pkt, vallen); crypto_update(); } crypto_flags |= CRYPTO_FLAG_TAI; peer->crypto |= CRYPTO_FLAG_LEAP; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "leap %u ts %u fs %u", vallen, ntohl(ep->tstamp), ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * We come here in symmetric modes for miscellaneous * commands that have value fields but are processed on * the transmit side. All we need do here is check for * valid field length. Remaining checks are below and on * the transmit side. */ case CRYPTO_CERT: case CRYPTO_IFF: case CRYPTO_GQ: case CRYPTO_MV: case CRYPTO_SIGN: if (len < VALUE_LEN) { rval = XEVNT_LEN; break; } /* fall through */ /* * We come here for miscellaneous requests and unknown * requests and responses. If an unknown response or * error, forget it. If a request, save the extension * field for later. Unknown requests will be caught on * the transmit side. */ default: if (code & (CRYPTO_RESP | CRYPTO_ERROR)) { rval = XEVNT_ERR; } else if ((rval = crypto_verify(ep, NULL, peer)) == XEVNT_OK) { fp = emalloc(len); memcpy(fp, ep, len); temp32 = CRYPTO_RESP; fp->opcode |= htonl(temp32); peer->cmmd = fp; } } /* * We don't log length/format/timestamp errors and * duplicates, which are log clogging vulnerabilities. * The first error found terminates the extension field * scan and we return the laundry to the caller. A * length/format/timestamp error on transmit is * cheerfully ignored, as the message is not sent. */ if (rval > XEVNT_TSP) { snprintf(statstr, NTP_MAXSTRLEN, "error %x opcode %x ts %u fs %u", rval, code, tstamp, fstamp); record_crypto_stats(&peer->srcadr, statstr); report_event(rval, peer); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; } else if (rval > XEVNT_OK && (code & CRYPTO_RESP)) { rval = XEVNT_OK; } authlen += len; } return (rval); } /* * crypto_xmit - construct extension fields * * This routine is called both when an association is configured and * when one is not. The only case where this matters is to retrieve the * autokey information, in which case the caller has to provide the * association ID to match the association. * * Returns length of extension field. */ int crypto_xmit( struct pkt *xpkt, /* transmit packet pointer */ struct sockaddr_storage *srcadr_sin, /* active runway */ int start, /* offset to extension field */ struct exten *ep, /* extension pointer */ keyid_t cookie /* session cookie */ ) { u_int32 *pkt; /* packet pointer */ struct peer *peer; /* peer structure pointer */ u_int opcode; /* extension field opcode */ struct exten *fp; /* extension pointers */ struct cert_info *cp, *xp; /* certificate info/value pointer */ char certname[MAXHOSTNAME + 1]; /* subject name buffer */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ tstamp_t tstamp; u_int vallen; u_int len; struct value vtemp; associd_t associd; int rval; keyid_t tcookie; /* * Generate the requested extension field request code, length * and association ID. If this is a response and the host is not * synchronized, light the error bit and go home. */ pkt = (u_int32 *)xpkt + start / 4; fp = (struct exten *)pkt; opcode = ntohl(ep->opcode); associd = (associd_t) ntohl(ep->associd); fp->associd = htonl(associd); len = 8; rval = XEVNT_OK; tstamp = crypto_time(); switch (opcode & 0xffff0000) { /* * Send association request and response with status word and * host name. Note, this message is not signed and the filestamp * contains only the status word. */ case CRYPTO_ASSOC | CRYPTO_RESP: len += crypto_send(fp, &hostval); fp->fstamp = htonl(crypto_flags); break; case CRYPTO_ASSOC: len += crypto_send(fp, &hostval); fp->fstamp = htonl(crypto_flags | ident_scheme); break; /* * Send certificate request. Use the values from the extension * field. */ case CRYPTO_CERT: memset(&vtemp, 0, sizeof(vtemp)); vtemp.tstamp = ep->tstamp; vtemp.fstamp = ep->fstamp; vtemp.vallen = ep->vallen; vtemp.ptr = (u_char *)ep->pkt; len += crypto_send(fp, &vtemp); break; /* * Send certificate response or sign request. Use the values * from the certificate cache. If the request contains no * subject name, assume the name of this host. This is for * backwards compatibility. Private certificates are never sent. */ case CRYPTO_SIGN: case CRYPTO_CERT | CRYPTO_RESP: vallen = ntohl(ep->vallen); if (vallen == 8) { strcpy(certname, sys_hostname); - } else if (vallen == 0 || vallen > MAXHOSTNAME) { + } else if (vallen == 0 || vallen > MAXHOSTNAME || + len - VALUE_LEN < vallen) { rval = XEVNT_LEN; break; } else { memcpy(certname, ep->pkt, vallen); certname[vallen] = '\0'; } /* * Find all certificates with matching subject. If a * self-signed, trusted certificate is found, use that. * If not, use the first one with matching subject. A * private certificate is never divulged or signed. */ xp = NULL; for (cp = cinfo; cp != NULL; cp = cp->link) { if (cp->flags & CERT_PRIV) continue; if (strcmp(certname, cp->subject) == 0) { if (xp == NULL) xp = cp; if (strcmp(certname, cp->issuer) == 0 && cp->flags & CERT_TRUST) { xp = cp; break; } } } /* * Be careful who you trust. If not yet synchronized, * give back an empty response. If certificate not found * or beyond the lifetime, return an error. This is to * avoid a bad dude trying to get an expired certificate * re-signed. Otherwise, send it. * * Note the timestamp and filestamp are taken from the * certificate value structure. For all certificates the * timestamp is the latest signature update time. For * host and imported certificates the filestamp is the * creation epoch. For signed certificates the filestamp * is the creation epoch of the trusted certificate at * the base of the certificate trail. In principle, this * allows strong checking for signature masquerade. */ if (tstamp == 0) break; if (xp == NULL) rval = XEVNT_CRT; else if (tstamp < xp->first || tstamp > xp->last) rval = XEVNT_SRV; else len += crypto_send(fp, &xp->cert); break; /* * Send challenge in Schnorr (IFF) identity scheme. */ case CRYPTO_IFF: if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) { rval = XEVNT_ERR; break; } if ((rval = crypto_alice(peer, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send response in Schnorr (IFF) identity scheme. */ case CRYPTO_IFF | CRYPTO_RESP: if ((rval = crypto_bob(ep, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send challenge in Guillou-Quisquater (GQ) identity scheme. */ case CRYPTO_GQ: if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) { rval = XEVNT_ERR; break; } if ((rval = crypto_alice2(peer, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send response in Guillou-Quisquater (GQ) identity scheme. */ case CRYPTO_GQ | CRYPTO_RESP: if ((rval = crypto_bob2(ep, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send challenge in MV identity scheme. */ case CRYPTO_MV: if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) { rval = XEVNT_ERR; break; } if ((rval = crypto_alice3(peer, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send response in MV identity scheme. */ case CRYPTO_MV | CRYPTO_RESP: if ((rval = crypto_bob3(ep, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send certificate sign response. The integrity of the request * certificate has already been verified on the receive side. * Sign the response using the local server key. Use the * filestamp from the request and use the timestamp as the * current time. Light the error bit if the certificate is * invalid or contains an unverified signature. */ case CRYPTO_SIGN | CRYPTO_RESP: if ((rval = cert_sign(ep, &vtemp)) == XEVNT_OK) len += crypto_send(fp, &vtemp); value_free(&vtemp); break; /* * Send public key and signature. Use the values from the public * key. */ case CRYPTO_COOK: len += crypto_send(fp, &pubkey); break; /* * Encrypt and send cookie and signature. Light the error bit if * anything goes wrong. */ case CRYPTO_COOK | CRYPTO_RESP: - if ((opcode & 0xffff) < VALUE_LEN) { + vallen = ntohl(ep->vallen); /* Must be <64k */ + if ( vallen == 0 + || (vallen >= MAX_VALLEN) + || (opcode & 0x0000ffff) < VALUE_LEN + vallen) { rval = XEVNT_LEN; break; } if (PKT_MODE(xpkt->li_vn_mode) == MODE_SERVER) { tcookie = cookie; } else { if ((peer = findpeerbyassoc(associd)) == NULL) { rval = XEVNT_ERR; break; } tcookie = peer->pcookie; } - if ((rval = crypto_encrypt(ep, &vtemp, &tcookie)) == - XEVNT_OK) + if ((rval = crypto_encrypt((const u_char *)ep->pkt, vallen, &tcookie, &vtemp)) + == XEVNT_OK) { len += crypto_send(fp, &vtemp); - value_free(&vtemp); + value_free(&vtemp); + } break; /* * Find peer and send autokey data and signature in broadcast * server and symmetric modes. Use the values in the autokey * structure. If no association is found, either the server has * restarted with new associations or some perp has replayed an * old message, in which case light the error bit. */ case CRYPTO_AUTO | CRYPTO_RESP: if ((peer = findpeerbyassoc(associd)) == NULL) { rval = XEVNT_ERR; break; } peer->flags &= ~FLAG_ASSOC; len += crypto_send(fp, &peer->sndval); break; /* * Send leapseconds table and signature. Use the values from the * tai structure. If no table has been loaded, just send an * empty request. */ case CRYPTO_TAI: case CRYPTO_TAI | CRYPTO_RESP: if (crypto_flags & CRYPTO_FLAG_TAI) len += crypto_send(fp, &tai_leap); break; /* * Default - Fall through for requests; for unknown responses, * flag as error. */ default: if (opcode & CRYPTO_RESP) rval = XEVNT_ERR; } /* * In case of error, flame the log. If a request, toss the * puppy; if a response, return so the sender can flame, too. */ if (rval != XEVNT_OK) { opcode |= CRYPTO_ERROR; snprintf(statstr, NTP_MAXSTRLEN, "error %x opcode %x", rval, opcode); record_crypto_stats(srcadr_sin, statstr); report_event(rval, NULL); #ifdef DEBUG if (debug) printf("crypto_xmit: %s\n", statstr); #endif if (!(opcode & CRYPTO_RESP)) return (0); } /* * Round up the field length to a multiple of 8 bytes and save * the request code and length. */ len = ((len + 7) / 8) * 8; fp->opcode = htonl((opcode & 0xffff0000) | len); #ifdef DEBUG if (debug) printf( "crypto_xmit: flags 0x%x ext offset %d len %u code 0x%x assocID %d\n", crypto_flags, start, len, opcode >> 16, associd); #endif return (len); } /* * crypto_verify - parse and verify the extension field and value * * Returns * XEVNT_OK success * XEVNT_LEN bad field format or length * XEVNT_TSP bad timestamp * XEVNT_FSP bad filestamp * XEVNT_PUB bad or missing public key * XEVNT_SGL bad signature length * XEVNT_SIG signature not verified * XEVNT_ERR protocol error */ static int crypto_verify( struct exten *ep, /* extension pointer */ struct value *vp, /* value pointer */ struct peer *peer /* peer structure pointer */ ) { EVP_PKEY *pkey; /* server public key */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp, tstamp1 = 0; /* timestamp */ tstamp_t fstamp, fstamp1 = 0; /* filestamp */ u_int vallen; /* value length */ u_int siglen; /* signature length */ u_int opcode, len; int i; /* * We require valid opcode and field lengths, timestamp, * filestamp, public key, digest, signature length and * signature, where relevant. Note that preliminary length * checks are done in the main loop. */ len = ntohl(ep->opcode) & 0x0000ffff; opcode = ntohl(ep->opcode) & 0xffff0000; /* * Check for valid operation code and protocol. The opcode must * not have the error bit set. If a response, it must have a * value header. If a request and does not contain a value * header, no need for further checking. */ if (opcode & CRYPTO_ERROR) return (XEVNT_ERR); if (opcode & CRYPTO_RESP) { if (len < VALUE_LEN) return (XEVNT_LEN); } else { if (len < VALUE_LEN) return (XEVNT_OK); } /* * We have a value header. Check for valid field lengths. The * field length must be long enough to contain the value header, * value and signature. Note both the value and signature fields * are rounded up to the next word. */ vallen = ntohl(ep->vallen); + if ( vallen == 0 + || vallen > MAX_VALLEN) + return (XEVNT_LEN); i = (vallen + 3) / 4; siglen = ntohl(ep->pkt[i++]); - if (len < VALUE_LEN + ((vallen + 3) / 4) * 4 + ((siglen + 3) / - 4) * 4) + if ( siglen > MAX_VALLEN + || len - VALUE_LEN < ((vallen + 3) / 4) * 4 + || len - VALUE_LEN - ((vallen + 3) / 4) * 4 + < ((siglen + 3) / 4) * 4) return (XEVNT_LEN); /* * Punt if this is a response with no data. Punt if this is a * request and a previous response is pending. */ if (opcode & CRYPTO_RESP) { if (vallen == 0) return (XEVNT_LEN); } else { if (peer->cmmd != NULL) return (XEVNT_LEN); } /* * Check for valid timestamp and filestamp. If the timestamp is * zero, the sender is not synchronized and signatures are * disregarded. If not, the timestamp must not precede the * filestamp. The timestamp and filestamp must not precede the * corresponding values in the value structure, if present. Once * the autokey values have been installed, the timestamp must * always be later than the corresponding value in the value * structure. Duplicate timestamps are illegal once the cookie * has been validated. */ tstamp = ntohl(ep->tstamp); fstamp = ntohl(ep->fstamp); if (tstamp == 0) return (XEVNT_OK); if (tstamp < fstamp) return (XEVNT_TSP); if (vp != NULL) { tstamp1 = ntohl(vp->tstamp); fstamp1 = ntohl(vp->fstamp); if ((tstamp < tstamp1 || (tstamp == tstamp1 && (peer->crypto & CRYPTO_FLAG_AUTO)))) return (XEVNT_TSP); if ((tstamp < fstamp1 || fstamp < fstamp1)) return (XEVNT_FSP); } /* * Check for valid signature length, public key and digest * algorithm. */ if (crypto_flags & peer->crypto & CRYPTO_FLAG_PRIV) pkey = sign_pkey; else pkey = peer->pkey; if (siglen == 0 || pkey == NULL || peer->digest == NULL) return (XEVNT_OK); if (siglen != (u_int)EVP_PKEY_size(pkey)) return (XEVNT_SGL); /* * Darn, I thought we would never get here. Verify the * signature. If the identity exchange is verified, light the * proventic bit. If no client identity scheme is specified, * avoid doing the sign exchange. */ EVP_VerifyInit(&ctx, peer->digest); + /* XXX: the "+ 12" needs to be at least documented... */ EVP_VerifyUpdate(&ctx, (u_char *)&ep->tstamp, vallen + 12); if (EVP_VerifyFinal(&ctx, (u_char *)&ep->pkt[i], siglen, pkey) <= 0) return (XEVNT_SIG); if (peer->crypto & CRYPTO_FLAG_VRFY) { peer->crypto |= CRYPTO_FLAG_PROV; if (!(crypto_flags & CRYPTO_FLAG_MASK)) peer->crypto |= CRYPTO_FLAG_SIGN; } return (XEVNT_OK); } /* - * crypto_encrypt - construct encrypted cookie and signature from - * extension field and cookie + * crypto_encrypt - construct vp (encrypted cookie and signature) from + * the public key and cookie. * - * Returns + * Returns: * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_CKY bad or missing cookie * XEVNT_PER host certificate expired */ static int crypto_encrypt( - struct exten *ep, /* extension pointer */ - struct value *vp, /* value pointer */ - keyid_t *cookie /* server cookie */ + const u_char *ptr, /* Public Key */ + u_int vallen, /* Length of Public Key */ + keyid_t *cookie, /* server cookie */ + struct value *vp /* value pointer */ ) { EVP_PKEY *pkey; /* public key */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; /* NTP timestamp */ u_int32 temp32; - u_int len; - u_char *ptr; /* * Extract the public key from the request. */ - len = ntohl(ep->vallen); - ptr = (u_char *)ep->pkt; - pkey = d2i_PublicKey(EVP_PKEY_RSA, NULL, &ptr, len); + pkey = d2i_PublicKey(EVP_PKEY_RSA, NULL, &ptr, vallen); if (pkey == NULL) { msyslog(LOG_ERR, "crypto_encrypt %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_PUB); } /* * Encrypt the cookie, encode in ASN.1 and sign. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = hostval.tstamp; - len = EVP_PKEY_size(pkey); - vp->vallen = htonl(len); - vp->ptr = emalloc(len); + vallen = EVP_PKEY_size(pkey); + vp->vallen = htonl(vallen); + vp->ptr = emalloc(vallen); temp32 = htonl(*cookie); if (!RSA_public_encrypt(4, (u_char *)&temp32, vp->ptr, pkey->pkey.rsa, RSA_PKCS1_OAEP_PADDING)) { msyslog(LOG_ERR, "crypto_encrypt %s\n", ERR_error_string(ERR_get_error(), NULL)); EVP_PKEY_free(pkey); return (XEVNT_CKY); } EVP_PKEY_free(pkey); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); - EVP_SignUpdate(&ctx, vp->ptr, len); - if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) - vp->siglen = htonl(len); + EVP_SignUpdate(&ctx, vp->ptr, vallen); + if (EVP_SignFinal(&ctx, vp->sig, &vallen, sign_pkey)) + vp->siglen = htonl(sign_siglen); return (XEVNT_OK); } /* * crypto_ident - construct extension field for identity scheme * * This routine determines which identity scheme is in use and * constructs an extension field for that scheme. */ u_int crypto_ident( struct peer *peer /* peer structure pointer */ ) { char filename[MAXFILENAME + 1]; /* * If the server identity has already been verified, no further * action is necessary. Otherwise, try to load the identity file * of the certificate issuer. If the issuer file is not found, * try the host file. If nothing found, declare a cryptobust. * Note we can't get here unless the trusted certificate has * been found and the CRYPTO_FLAG_VALID bit is set, so the * certificate issuer is valid. */ if (peer->ident_pkey != NULL) EVP_PKEY_free(peer->ident_pkey); if (peer->crypto & CRYPTO_FLAG_GQ) { snprintf(filename, MAXFILENAME, "ntpkey_gq_%s", peer->issuer); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_GQ); snprintf(filename, MAXFILENAME, "ntpkey_gq_%s", sys_hostname); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_GQ); } if (peer->crypto & CRYPTO_FLAG_IFF) { snprintf(filename, MAXFILENAME, "ntpkey_iff_%s", peer->issuer); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_IFF); snprintf(filename, MAXFILENAME, "ntpkey_iff_%s", sys_hostname); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_IFF); } if (peer->crypto & CRYPTO_FLAG_MV) { snprintf(filename, MAXFILENAME, "ntpkey_mv_%s", peer->issuer); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_MV); snprintf(filename, MAXFILENAME, "ntpkey_mv_%s", sys_hostname); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_MV); } /* * No compatible identity scheme is available. Life is hard. */ msyslog(LOG_INFO, "crypto_ident: no compatible identity scheme found"); return (0); } /* * crypto_args - construct extension field from arguments * * This routine creates an extension field with current timestamps and * specified opcode, association ID and optional string. Note that the * extension field is created here, but freed after the crypto_xmit() * call in the protocol module. * * Returns extension field pointer (no errors). + * + * XXX: opcode and len should really be 32-bit quantities and + * we should make sure that str is not too big. */ struct exten * crypto_args( struct peer *peer, /* peer structure pointer */ u_int opcode, /* operation code */ char *str /* argument string */ ) { tstamp_t tstamp; /* NTP timestamp */ struct exten *ep; /* extension field pointer */ u_int len; /* extension field length */ + size_t slen; tstamp = crypto_time(); len = sizeof(struct exten); - if (str != NULL) - len += strlen(str); + if (str != NULL) { + slen = strlen(str); + len += slen; + } ep = emalloc(len); memset(ep, 0, len); if (opcode == 0) return (ep); ep->opcode = htonl(opcode + len); /* * If a response, send our ID; if a request, send the * responder's ID. */ if (opcode & CRYPTO_RESP) ep->associd = htonl(peer->associd); else ep->associd = htonl(peer->assoc); ep->tstamp = htonl(tstamp); ep->fstamp = hostval.tstamp; ep->vallen = 0; if (str != NULL) { - ep->vallen = htonl(strlen(str)); - memcpy((char *)ep->pkt, str, strlen(str)); + ep->vallen = htonl(slen); + memcpy((char *)ep->pkt, str, slen); } else { ep->pkt[0] = peer->associd; } return (ep); } /* * crypto_send - construct extension field from value components * * Returns extension field length. Note: it is not polite to send a * nonempty signature with zero timestamp or a nonzero timestamp with * empty signature, but these rules are not enforced here. + * + * XXX This code won't work on a box with 16-bit ints. */ u_int crypto_send( struct exten *ep, /* extension field pointer */ struct value *vp /* value pointer */ ) { u_int len, temp32; int i; /* * Copy data. If the data field is empty or zero length, encode * an empty value with length zero. */ ep->tstamp = vp->tstamp; ep->fstamp = vp->fstamp; ep->vallen = vp->vallen; len = 12; temp32 = ntohl(vp->vallen); if (temp32 > 0 && vp->ptr != NULL) memcpy(ep->pkt, vp->ptr, temp32); /* * Copy signature. If the signature field is empty or zero * length, encode an empty signature with length zero. */ i = (temp32 + 3) / 4; len += i * 4 + 4; ep->pkt[i++] = vp->siglen; temp32 = ntohl(vp->siglen); if (temp32 > 0 && vp->sig != NULL) memcpy(&ep->pkt[i], vp->sig, temp32); len += temp32; return (len); } /* * crypto_update - compute new public value and sign extension fields * * This routine runs periodically, like once a day, and when something * changes. It updates the timestamps on three value structures and one * value structure list, then signs all the structures: * * hostval host name (not signed) * pubkey public key * cinfo certificate info/value list * tai_leap leapseconds file * * Filestamps are proventicated data, so this routine is run only when * the host has been synchronized to a proventicated source. Thus, the * timestamp is proventicated, too, and can be used to deflect * clogging attacks and even cook breakfast. * * Returns void (no errors) */ void crypto_update(void) { EVP_MD_CTX ctx; /* message digest context */ struct cert_info *cp, *cpn; /* certificate info/value */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ tstamp_t tstamp; /* NTP timestamp */ u_int len; if ((tstamp = crypto_time()) == 0) return; hostval.tstamp = htonl(tstamp); /* * Sign public key and timestamps. The filestamp is derived from * the host key file extension from wherever the file was * generated. */ if (pubkey.vallen != 0) { pubkey.tstamp = hostval.tstamp; pubkey.siglen = 0; if (pubkey.sig == NULL) pubkey.sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&pubkey, 12); EVP_SignUpdate(&ctx, pubkey.ptr, ntohl(pubkey.vallen)); if (EVP_SignFinal(&ctx, pubkey.sig, &len, sign_pkey)) pubkey.siglen = htonl(len); } /* * Sign certificates and timestamps. The filestamp is derived * from the certificate file extension from wherever the file * was generated. Note we do not throw expired certificates * away; they may have signed younger ones. */ for (cp = cinfo; cp != NULL; cp = cpn) { cpn = cp->link; cp->cert.tstamp = hostval.tstamp; cp->cert.siglen = 0; if (cp->cert.sig == NULL) cp->cert.sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&cp->cert, 12); EVP_SignUpdate(&ctx, cp->cert.ptr, ntohl(cp->cert.vallen)); if (EVP_SignFinal(&ctx, cp->cert.sig, &len, sign_pkey)) cp->cert.siglen = htonl(len); } /* * Sign leapseconds table and timestamps. The filestamp is * derived from the leapsecond file extension from wherever the * file was generated. */ if (tai_leap.vallen != 0) { tai_leap.tstamp = hostval.tstamp; tai_leap.siglen = 0; if (tai_leap.sig == NULL) tai_leap.sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&tai_leap, 12); EVP_SignUpdate(&ctx, tai_leap.ptr, ntohl(tai_leap.vallen)); if (EVP_SignFinal(&ctx, tai_leap.sig, &len, sign_pkey)) tai_leap.siglen = htonl(len); } snprintf(statstr, NTP_MAXSTRLEN, "update ts %u", ntohl(hostval.tstamp)); record_crypto_stats(NULL, statstr); #ifdef DEBUG if (debug) printf("crypto_update: %s\n", statstr); #endif } /* * value_free - free value structure components. * * Returns void (no errors) */ void value_free( struct value *vp /* value structure */ ) { if (vp->ptr != NULL) free(vp->ptr); if (vp->sig != NULL) free(vp->sig); memset(vp, 0, sizeof(struct value)); } /* * crypto_time - returns current NTP time in seconds. */ tstamp_t crypto_time() { l_fp tstamp; /* NTP time */ L_CLR(&tstamp); L_CLR(&tstamp); if (sys_leap != LEAP_NOTINSYNC) get_systime(&tstamp); return (tstamp.l_ui); } /* * asn2ntp - convert ASN1_TIME time structure to NTP time in seconds. */ u_long asn2ntp ( ASN1_TIME *asn1time /* pointer to ASN1_TIME structure */ ) { char *v; /* pointer to ASN1_TIME string */ struct tm tm; /* used to convert to NTP time */ /* * Extract time string YYMMDDHHMMSSZ from ASN1 time structure. * Note that the YY, MM, DD fields start with one, the HH, MM, * SS fiels start with zero and the Z character should be 'Z' * for UTC. Also note that years less than 50 map to years * greater than 100. Dontcha love ASN.1? Better than MIL-188. */ if (asn1time->length > 13) return ((u_long)(~0)); /* We can't use -1 here. It's invalid */ v = (char *)asn1time->data; tm.tm_year = (v[0] - '0') * 10 + v[1] - '0'; if (tm.tm_year < 50) tm.tm_year += 100; tm.tm_mon = (v[2] - '0') * 10 + v[3] - '0' - 1; tm.tm_mday = (v[4] - '0') * 10 + v[5] - '0'; tm.tm_hour = (v[6] - '0') * 10 + v[7] - '0'; tm.tm_min = (v[8] - '0') * 10 + v[9] - '0'; tm.tm_sec = (v[10] - '0') * 10 + v[11] - '0'; tm.tm_wday = 0; tm.tm_yday = 0; tm.tm_isdst = 0; return (timegm(&tm) + JAN_1970); } /* * bigdig() - compute a BIGNUM MD5 hash of a BIGNUM number. */ static int bighash( BIGNUM *bn, /* BIGNUM * from */ BIGNUM *bk /* BIGNUM * to */ ) { EVP_MD_CTX ctx; /* message digest context */ u_char dgst[EVP_MAX_MD_SIZE]; /* message digest */ u_char *ptr; /* a BIGNUM as binary string */ u_int len; len = BN_num_bytes(bn); ptr = emalloc(len); BN_bn2bin(bn, ptr); EVP_DigestInit(&ctx, EVP_md5()); EVP_DigestUpdate(&ctx, ptr, len); EVP_DigestFinal(&ctx, dgst, &len); BN_bin2bn(dgst, len, bk); /* XXX MEMLEAK? free ptr? */ return (1); } /* *********************************************************************** * * * The following routines implement the Schnorr (IFF) identity scheme * * * *********************************************************************** * * The Schnorr (IFF) identity scheme is intended for use when * the ntp-genkeys program does not generate the certificates used in * the protocol and the group key cannot be conveyed in the certificate * itself. For this purpose, new generations of IFF values must be * securely transmitted to all members of the group before use. The * scheme is self contained and independent of new generations of host * keys, sign keys and certificates. * * The IFF identity scheme is based on DSA cryptography and algorithms * described in Stinson p. 285. The IFF values hide in a DSA cuckoo * structure, but only the primes and generator are used. The p is a * 512-bit prime, q a 160-bit prime that divides p - 1 and is a qth root * of 1 mod p; that is, g^q = 1 mod p. The TA rolls primvate random * group key b disguised as a DSA structure member, then computes public * key g^(q - b). These values are shared only among group members and * never revealed in messages. Alice challenges Bob to confirm identity * using the protocol described below. * * How it works * * The scheme goes like this. Both Alice and Bob have the public primes * p, q and generator g. The TA gives private key b to Bob and public * key v = g^(q - a) mod p to Alice. * * Alice rolls new random challenge r and sends to Bob in the IFF * request message. Bob rolls new random k, then computes y = k + b r * mod q and x = g^k mod p and sends (y, hash(x)) to Alice in the * response message. Besides making the response shorter, the hash makes * it effectivey impossible for an intruder to solve for b by observing * a number of these messages. * * Alice receives the response and computes g^y v^r mod p. After a bit * of algebra, this simplifies to g^k. If the hash of this result * matches hash(x), Alice knows that Bob has the group key b. The signed * response binds this knowledge to Bob's private key and the public key * previously received in his certificate. * * crypto_alice - construct Alice's challenge in IFF scheme * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group key */ static int crypto_alice( struct peer *peer, /* peer pointer */ struct value *vp /* value pointer */ ) { DSA *dsa; /* IFF parameters */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; u_int len; /* * The identity parameters must have correct format and content. */ if (peer->ident_pkey == NULL) return (XEVNT_ID); if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) { msyslog(LOG_INFO, "crypto_alice: defective key"); return (XEVNT_PUB); } /* * Roll new random r (0 < r < q). The OpenSSL library has a bug * omitting BN_rand_range, so we have to do it the hard way. */ bctx = BN_CTX_new(); len = BN_num_bytes(dsa->q); if (peer->iffval != NULL) BN_free(peer->iffval); peer->iffval = BN_new(); BN_rand(peer->iffval, len * 8, -1, 1); /* r */ BN_mod(peer->iffval, peer->iffval, dsa->q, bctx); BN_CTX_free(bctx); /* * Sign and send to Bob. The filestamp is from the local file. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = htonl(peer->fstamp); vp->vallen = htonl(len); vp->ptr = emalloc(len); BN_bn2bin(peer->iffval, vp->ptr); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_bob - construct Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_ID bad or missing group key * XEVNT_ERR protocol error * XEVNT_PER host expired certificate */ static int crypto_bob( struct exten *ep, /* extension pointer */ struct value *vp /* value pointer */ ) { DSA *dsa; /* IFF parameters */ DSA_SIG *sdsa; /* DSA signature context fake */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; /* NTP timestamp */ BIGNUM *bn, *bk, *r; u_char *ptr; - u_int len; + u_int len; /* extension field length */ + u_int vallen = 0; /* value length */ /* * If the IFF parameters are not valid, something awful * happened or we are being tormented. */ if (iffpar_pkey == NULL) { msyslog(LOG_INFO, "crypto_bob: scheme unavailable"); return (XEVNT_ID); } dsa = iffpar_pkey->pkey.dsa; /* * Extract r from the challenge. */ - len = ntohl(ep->vallen); - if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) { + vallen = ntohl(ep->vallen); + len = ntohl(ep->opcode) & 0x0000ffff; + if (vallen == 0 || len < VALUE_LEN || len - VALUE_LEN < vallen) + return XEVNT_LEN; + if ((r = BN_bin2bn((u_char *)ep->pkt, vallen, NULL)) == NULL) { msyslog(LOG_ERR, "crypto_bob %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Bob rolls random k (0 < k < q), computes y = k + b r mod q * and x = g^k mod p, then sends (y, hash(x)) to Alice. */ bctx = BN_CTX_new(); bk = BN_new(); bn = BN_new(); sdsa = DSA_SIG_new(); - BN_rand(bk, len * 8, -1, 1); /* k */ + BN_rand(bk, vallen * 8, -1, 1); /* k */ BN_mod_mul(bn, dsa->priv_key, r, dsa->q, bctx); /* b r mod q */ BN_add(bn, bn, bk); BN_mod(bn, bn, dsa->q, bctx); /* k + b r mod q */ sdsa->r = BN_dup(bn); BN_mod_exp(bk, dsa->g, bk, dsa->p, bctx); /* g^k mod p */ bighash(bk, bk); sdsa->s = BN_dup(bk); BN_CTX_free(bctx); BN_free(r); BN_free(bn); BN_free(bk); /* * Encode the values in ASN.1 and sign. */ - tstamp = crypto_time(); - memset(vp, 0, sizeof(struct value)); - vp->tstamp = htonl(tstamp); - vp->fstamp = htonl(if_fstamp); - len = i2d_DSA_SIG(sdsa, NULL); - if (len <= 0) { + vallen = i2d_DSA_SIG(sdsa, NULL); + if (vallen == 0) { msyslog(LOG_ERR, "crypto_bob %s\n", ERR_error_string(ERR_get_error(), NULL)); DSA_SIG_free(sdsa); return (XEVNT_ERR); } - vp->vallen = htonl(len); - ptr = emalloc(len); + if (vallen > MAX_VALLEN) { + msyslog(LOG_ERR, "crypto_bob: signature is too big: %d", + vallen); + DSA_SIG_free(sdsa); + return (XEVNT_LEN); + } + memset(vp, 0, sizeof(struct value)); + tstamp = crypto_time(); + vp->tstamp = htonl(tstamp); + vp->fstamp = htonl(if_fstamp); + vp->vallen = htonl(vallen); + ptr = emalloc(vallen); vp->ptr = ptr; i2d_DSA_SIG(sdsa, &ptr); DSA_SIG_free(sdsa); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); + /* XXX: more validation to make sure the sign fits... */ vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); - EVP_SignUpdate(&ctx, vp->ptr, len); - if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) + EVP_SignUpdate(&ctx, vp->ptr, vallen); + if (EVP_SignFinal(&ctx, vp->sig, &vallen, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_iff - verify Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group key * XEVNT_FSP bad filestamp */ int crypto_iff( struct exten *ep, /* extension pointer */ struct peer *peer /* peer structure pointer */ ) { DSA *dsa; /* IFF parameters */ BN_CTX *bctx; /* BIGNUM context */ DSA_SIG *sdsa; /* DSA parameters */ BIGNUM *bn, *bk; u_int len; const u_char *ptr; int temp; /* * If the IFF parameters are not valid or no challenge was sent, * something awful happened or we are being tormented. */ if (peer->ident_pkey == NULL) { msyslog(LOG_INFO, "crypto_iff: scheme unavailable"); return (XEVNT_ID); } if (ntohl(ep->fstamp) != peer->fstamp) { msyslog(LOG_INFO, "crypto_iff: invalid filestamp %u", ntohl(ep->fstamp)); return (XEVNT_FSP); } if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) { msyslog(LOG_INFO, "crypto_iff: defective key"); return (XEVNT_PUB); } if (peer->iffval == NULL) { msyslog(LOG_INFO, "crypto_iff: missing challenge"); return (XEVNT_ID); } /* * Extract the k + b r and g^k values from the response. */ bctx = BN_CTX_new(); bk = BN_new(); bn = BN_new(); len = ntohl(ep->vallen); ptr = (const u_char *)ep->pkt; if ((sdsa = d2i_DSA_SIG(NULL, &ptr, len)) == NULL) { msyslog(LOG_ERR, "crypto_iff %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Compute g^(k + b r) g^(q - b)r mod p. */ BN_mod_exp(bn, dsa->pub_key, peer->iffval, dsa->p, bctx); BN_mod_exp(bk, dsa->g, sdsa->r, dsa->p, bctx); BN_mod_mul(bn, bn, bk, dsa->p, bctx); /* * Verify the hash of the result matches hash(x). */ bighash(bn, bn); temp = BN_cmp(bn, sdsa->s); BN_free(bn); BN_free(bk); BN_CTX_free(bctx); BN_free(peer->iffval); peer->iffval = NULL; DSA_SIG_free(sdsa); if (temp == 0) return (XEVNT_OK); else return (XEVNT_ID); } /* *********************************************************************** * * * The following routines implement the Guillou-Quisquater (GQ) * * identity scheme * * * *********************************************************************** * * The Guillou-Quisquater (GQ) identity scheme is intended for use when * the ntp-genkeys program generates the certificates used in the * protocol and the group key can be conveyed in a certificate extension * field. The scheme is self contained and independent of new * generations of host keys, sign keys and certificates. * * The GQ identity scheme is based on RSA cryptography and algorithms * described in Stinson p. 300 (with errors). The GQ values hide in a * RSA cuckoo structure, but only the modulus is used. The 512-bit * public modulus is n = p q, where p and q are secret large primes. The * TA rolls random group key b disguised as a RSA structure member. * Except for the public key, these values are shared only among group * members and never revealed in messages. * * When rolling new certificates, Bob recomputes the private and * public keys. The private key u is a random roll, while the public key * is the inverse obscured by the group key v = (u^-1)^b. These values * replace the private and public keys normally generated by the RSA * scheme. Alice challenges Bob to confirm identity using the protocol * described below. * * How it works * * The scheme goes like this. Both Alice and Bob have the same modulus n * and some random b as the group key. These values are computed and * distributed in advance via secret means, although only the group key * b is truly secret. Each has a private random private key u and public * key (u^-1)^b, although not necessarily the same ones. Bob and Alice * can regenerate the key pair from time to time without affecting * operations. The public key is conveyed on the certificate in an * extension field; the private key is never revealed. * * Alice rolls new random challenge r and sends to Bob in the GQ * request message. Bob rolls new random k, then computes y = k u^r mod * n and x = k^b mod n and sends (y, hash(x)) to Alice in the response * message. Besides making the response shorter, the hash makes it * effectivey impossible for an intruder to solve for b by observing * a number of these messages. * * Alice receives the response and computes y^b v^r mod n. After a bit * of algebra, this simplifies to k^b. If the hash of this result * matches hash(x), Alice knows that Bob has the group key b. The signed * response binds this knowledge to Bob's private key and the public key * previously received in his certificate. * * crypto_alice2 - construct Alice's challenge in GQ scheme * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group key * XEVNT_PER host certificate expired */ static int crypto_alice2( struct peer *peer, /* peer pointer */ struct value *vp /* value pointer */ ) { RSA *rsa; /* GQ parameters */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; u_int len; /* * The identity parameters must have correct format and content. */ if (peer->ident_pkey == NULL) return (XEVNT_ID); if ((rsa = peer->ident_pkey->pkey.rsa) == NULL) { msyslog(LOG_INFO, "crypto_alice2: defective key"); return (XEVNT_PUB); } /* * Roll new random r (0 < r < n). The OpenSSL library has a bug * omitting BN_rand_range, so we have to do it the hard way. */ bctx = BN_CTX_new(); len = BN_num_bytes(rsa->n); if (peer->iffval != NULL) BN_free(peer->iffval); peer->iffval = BN_new(); BN_rand(peer->iffval, len * 8, -1, 1); /* r mod n */ BN_mod(peer->iffval, peer->iffval, rsa->n, bctx); BN_CTX_free(bctx); /* * Sign and send to Bob. The filestamp is from the local file. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = htonl(peer->fstamp); vp->vallen = htonl(len); vp->ptr = emalloc(len); BN_bn2bin(peer->iffval, vp->ptr); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_bob2 - construct Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_ID bad or missing group key * XEVNT_ERR protocol error * XEVNT_PER host certificate expired */ static int crypto_bob2( struct exten *ep, /* extension pointer */ struct value *vp /* value pointer */ ) { RSA *rsa; /* GQ parameters */ DSA_SIG *sdsa; /* DSA parameters */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; /* NTP timestamp */ BIGNUM *r, *k, *g, *y; u_char *ptr; u_int len; /* * If the GQ parameters are not valid, something awful * happened or we are being tormented. */ if (gqpar_pkey == NULL) { msyslog(LOG_INFO, "crypto_bob2: scheme unavailable"); return (XEVNT_ID); } rsa = gqpar_pkey->pkey.rsa; /* * Extract r from the challenge. */ len = ntohl(ep->vallen); if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) { msyslog(LOG_ERR, "crypto_bob2 %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Bob rolls random k (0 < k < n), computes y = k u^r mod n and * x = k^b mod n, then sends (y, hash(x)) to Alice. */ bctx = BN_CTX_new(); k = BN_new(); g = BN_new(); y = BN_new(); sdsa = DSA_SIG_new(); BN_rand(k, len * 8, -1, 1); /* k */ BN_mod(k, k, rsa->n, bctx); BN_mod_exp(y, rsa->p, r, rsa->n, bctx); /* u^r mod n */ BN_mod_mul(y, k, y, rsa->n, bctx); /* k u^r mod n */ sdsa->r = BN_dup(y); BN_mod_exp(g, k, rsa->e, rsa->n, bctx); /* k^b mod n */ bighash(g, g); sdsa->s = BN_dup(g); BN_CTX_free(bctx); BN_free(r); BN_free(k); BN_free(g); BN_free(y); /* * Encode the values in ASN.1 and sign. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = htonl(gq_fstamp); len = i2d_DSA_SIG(sdsa, NULL); if (len <= 0) { msyslog(LOG_ERR, "crypto_bob2 %s\n", ERR_error_string(ERR_get_error(), NULL)); DSA_SIG_free(sdsa); return (XEVNT_ERR); } vp->vallen = htonl(len); ptr = emalloc(len); vp->ptr = ptr; i2d_DSA_SIG(sdsa, &ptr); DSA_SIG_free(sdsa); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_gq - verify Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group keys * XEVNT_ERR protocol error * XEVNT_FSP bad filestamp */ int crypto_gq( struct exten *ep, /* extension pointer */ struct peer *peer /* peer structure pointer */ ) { RSA *rsa; /* GQ parameters */ BN_CTX *bctx; /* BIGNUM context */ DSA_SIG *sdsa; /* RSA signature context fake */ BIGNUM *y, *v; const u_char *ptr; u_int len; int temp; /* * If the GQ parameters are not valid or no challenge was sent, * something awful happened or we are being tormented. */ if (peer->ident_pkey == NULL) { msyslog(LOG_INFO, "crypto_gq: scheme unavailable"); return (XEVNT_ID); } if (ntohl(ep->fstamp) != peer->fstamp) { msyslog(LOG_INFO, "crypto_gq: invalid filestamp %u", ntohl(ep->fstamp)); return (XEVNT_FSP); } if ((rsa = peer->ident_pkey->pkey.rsa) == NULL) { msyslog(LOG_INFO, "crypto_gq: defective key"); return (XEVNT_PUB); } if (peer->iffval == NULL) { msyslog(LOG_INFO, "crypto_gq: missing challenge"); return (XEVNT_ID); } /* * Extract the y = k u^r and hash(x = k^b) values from the * response. */ bctx = BN_CTX_new(); y = BN_new(); v = BN_new(); len = ntohl(ep->vallen); ptr = (const u_char *)ep->pkt; if ((sdsa = d2i_DSA_SIG(NULL, &ptr, len)) == NULL) { msyslog(LOG_ERR, "crypto_gq %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Compute v^r y^b mod n. */ BN_mod_exp(v, peer->grpkey, peer->iffval, rsa->n, bctx); /* v^r mod n */ BN_mod_exp(y, sdsa->r, rsa->e, rsa->n, bctx); /* y^b mod n */ BN_mod_mul(y, v, y, rsa->n, bctx); /* v^r y^b mod n */ /* * Verify the hash of the result matches hash(x). */ bighash(y, y); temp = BN_cmp(y, sdsa->s); BN_CTX_free(bctx); BN_free(y); BN_free(v); BN_free(peer->iffval); peer->iffval = NULL; DSA_SIG_free(sdsa); if (temp == 0) return (XEVNT_OK); else return (XEVNT_ID); } /* *********************************************************************** * * * The following routines implement the Mu-Varadharajan (MV) identity * * scheme * * * *********************************************************************** */ /* * The Mu-Varadharajan (MV) cryptosystem was originally intended when * servers broadcast messages to clients, but clients never send * messages to servers. There is one encryption key for the server and a * separate decryption key for each client. It operated something like a * pay-per-view satellite broadcasting system where the session key is * encrypted by the broadcaster and the decryption keys are held in a * tamperproof set-top box. * * The MV parameters and private encryption key hide in a DSA cuckoo * structure which uses the same parameters, but generated in a * different way. The values are used in an encryption scheme similar to * El Gamal cryptography and a polynomial formed from the expansion of * product terms (x - x[j]), as described in Mu, Y., and V. * Varadharajan: Robust and Secure Broadcasting, Proc. Indocrypt 2001, * 223-231. The paper has significant errors and serious omissions. * * Let q be the product of n distinct primes s'[j] (j = 1...n), where * each s'[j] has m significant bits. Let p be a prime p = 2 * q + 1, so * that q and each s'[j] divide p - 1 and p has M = n * m + 1 * significant bits. The elements x mod q of Zq with the elements 2 and * the primes removed form a field Zq* valid for polynomial arithetic. * Let g be a generator of Zp; that is, gcd(g, p - 1) = 1 and g^q = 1 * mod p. We expect M to be in the 500-bit range and n relatively small, * like 25, so the likelihood of a randomly generated element of x mod q * of Zq colliding with a factor of p - 1 is very small and can be * avoided. Associated with each s'[j] is an element s[j] such that s[j] * s'[j] = s'[j] mod q. We find s[j] as the quotient (q + s'[j]) / * s'[j]. These are the parameters of the scheme and they are expensive * to compute. * * We set up an instance of the scheme as follows. A set of random * values x[j] mod q (j = 1...n), are generated as the zeros of a * polynomial of order n. The product terms (x - x[j]) are expanded to * form coefficients a[i] mod q (i = 0...n) in powers of x. These are * used as exponents of the generator g mod p to generate the private * encryption key A. The pair (gbar, ghat) of public server keys and the * pairs (xbar[j], xhat[j]) (j = 1...n) of private client keys are used * to construct the decryption keys. The devil is in the details. * * The distinguishing characteristic of this scheme is the capability to * revoke keys. Included in the calculation of E, gbar and ghat is the * product s = prod(s'[j]) (j = 1...n) above. If the factor s'[j] is * subsequently removed from the product and E, gbar and ghat * recomputed, the jth client will no longer be able to compute E^-1 and * thus unable to decrypt the block. * * How it works * * The scheme goes like this. Bob has the server values (p, A, q, gbar, * ghat) and Alice the client values (p, xbar, xhat). * * Alice rolls new random challenge r (0 < r < p) and sends to Bob in * the MV request message. Bob rolls new random k (0 < k < q), encrypts * y = A^k mod p (a permutation) and sends (hash(y), gbar^k, ghat^k) to * Alice. * * Alice receives the response and computes the decryption key (the * inverse permutation) from previously obtained (xbar, xhat) and * (gbar^k, ghat^k) in the message. She computes the inverse, which is * unique by reasons explained in the ntp-keygen.c program sources. If * the hash of this result matches hash(y), Alice knows that Bob has the * group key b. The signed response binds this knowledge to Bob's * private key and the public key previously received in his * certificate. * * crypto_alice3 - construct Alice's challenge in MV scheme * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group key * XEVNT_PER host certificate expired */ static int crypto_alice3( struct peer *peer, /* peer pointer */ struct value *vp /* value pointer */ ) { DSA *dsa; /* MV parameters */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; u_int len; /* * The identity parameters must have correct format and content. */ if (peer->ident_pkey == NULL) return (XEVNT_ID); if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) { msyslog(LOG_INFO, "crypto_alice3: defective key"); return (XEVNT_PUB); } /* * Roll new random r (0 < r < q). The OpenSSL library has a bug * omitting BN_rand_range, so we have to do it the hard way. */ bctx = BN_CTX_new(); len = BN_num_bytes(dsa->p); if (peer->iffval != NULL) BN_free(peer->iffval); peer->iffval = BN_new(); BN_rand(peer->iffval, len * 8, -1, 1); /* r */ BN_mod(peer->iffval, peer->iffval, dsa->p, bctx); BN_CTX_free(bctx); /* * Sign and send to Bob. The filestamp is from the local file. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = htonl(peer->fstamp); vp->vallen = htonl(len); vp->ptr = emalloc(len); BN_bn2bin(peer->iffval, vp->ptr); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_bob3 - construct Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_ERR protocol error * XEVNT_PER host certificate expired */ static int crypto_bob3( struct exten *ep, /* extension pointer */ struct value *vp /* value pointer */ ) { DSA *dsa; /* MV parameters */ DSA *sdsa; /* DSA signature context fake */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; /* NTP timestamp */ BIGNUM *r, *k, *u; u_char *ptr; u_int len; /* * If the MV parameters are not valid, something awful * happened or we are being tormented. */ if (mvpar_pkey == NULL) { msyslog(LOG_INFO, "crypto_bob3: scheme unavailable"); return (XEVNT_ID); } dsa = mvpar_pkey->pkey.dsa; /* * Extract r from the challenge. */ len = ntohl(ep->vallen); if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) { msyslog(LOG_ERR, "crypto_bob3 %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Bob rolls random k (0 < k < q), making sure it is not a * factor of q. He then computes y = A^k r and sends (hash(y), * gbar^k, ghat^k) to Alice. */ bctx = BN_CTX_new(); k = BN_new(); u = BN_new(); sdsa = DSA_new(); sdsa->p = BN_new(); sdsa->q = BN_new(); sdsa->g = BN_new(); while (1) { BN_rand(k, BN_num_bits(dsa->q), 0, 0); BN_mod(k, k, dsa->q, bctx); BN_gcd(u, k, dsa->q, bctx); if (BN_is_one(u)) break; } BN_mod_exp(u, dsa->g, k, dsa->p, bctx); /* A r */ BN_mod_mul(u, u, r, dsa->p, bctx); bighash(u, sdsa->p); BN_mod_exp(sdsa->q, dsa->priv_key, k, dsa->p, bctx); /* gbar */ BN_mod_exp(sdsa->g, dsa->pub_key, k, dsa->p, bctx); /* ghat */ BN_CTX_free(bctx); BN_free(k); BN_free(r); BN_free(u); /* * Encode the values in ASN.1 and sign. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = htonl(mv_fstamp); len = i2d_DSAparams(sdsa, NULL); if (len <= 0) { msyslog(LOG_ERR, "crypto_bob3 %s\n", ERR_error_string(ERR_get_error(), NULL)); DSA_free(sdsa); return (XEVNT_ERR); } vp->vallen = htonl(len); ptr = emalloc(len); vp->ptr = ptr; i2d_DSAparams(sdsa, &ptr); DSA_free(sdsa); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_mv - verify Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group key * XEVNT_ERR protocol error * XEVNT_FSP bad filestamp */ int crypto_mv( struct exten *ep, /* extension pointer */ struct peer *peer /* peer structure pointer */ ) { DSA *dsa; /* MV parameters */ DSA *sdsa; /* DSA parameters */ BN_CTX *bctx; /* BIGNUM context */ BIGNUM *k, *u, *v; u_int len; const u_char *ptr; int temp; /* * If the MV parameters are not valid or no challenge was sent, * something awful happened or we are being tormented. */ if (peer->ident_pkey == NULL) { msyslog(LOG_INFO, "crypto_mv: scheme unavailable"); return (XEVNT_ID); } if (ntohl(ep->fstamp) != peer->fstamp) { msyslog(LOG_INFO, "crypto_mv: invalid filestamp %u", ntohl(ep->fstamp)); return (XEVNT_FSP); } if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) { msyslog(LOG_INFO, "crypto_mv: defective key"); return (XEVNT_PUB); } if (peer->iffval == NULL) { msyslog(LOG_INFO, "crypto_mv: missing challenge"); return (XEVNT_ID); } /* * Extract the (hash(y), gbar, ghat) values from the response. */ bctx = BN_CTX_new(); k = BN_new(); u = BN_new(); v = BN_new(); len = ntohl(ep->vallen); ptr = (const u_char *)ep->pkt; if ((sdsa = d2i_DSAparams(NULL, &ptr, len)) == NULL) { msyslog(LOG_ERR, "crypto_mv %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Compute (gbar^xhat ghat^xbar)^-1 mod p. */ BN_mod_exp(u, sdsa->q, dsa->pub_key, dsa->p, bctx); BN_mod_exp(v, sdsa->g, dsa->priv_key, dsa->p, bctx); BN_mod_mul(u, u, v, dsa->p, bctx); BN_mod_inverse(u, u, dsa->p, bctx); BN_mod_mul(v, u, peer->iffval, dsa->p, bctx); /* * The result should match the hash of r mod p. */ bighash(v, v); temp = BN_cmp(v, sdsa->p); BN_CTX_free(bctx); BN_free(k); BN_free(u); BN_free(v); BN_free(peer->iffval); peer->iffval = NULL; DSA_free(sdsa); if (temp == 0) return (XEVNT_OK); else return (XEVNT_ID); } /* *********************************************************************** * * * The following routines are used to manipulate certificates * * * *********************************************************************** */ /* * cert_parse - parse x509 certificate and create info/value structures. * * The server certificate includes the version number, issuer name, * subject name, public key and valid date interval. If the issuer name * is the same as the subject name, the certificate is self signed and * valid only if the server is configured as trustable. If the names are * different, another issuer has signed the server certificate and * vouched for it. In this case the server certificate is valid if * verified by the issuer public key. * * Returns certificate info/value pointer if valid, NULL if not. */ struct cert_info * /* certificate information structure */ cert_parse( u_char *asn1cert, /* X509 certificate */ u_int len, /* certificate length */ tstamp_t fstamp /* filestamp */ ) { X509 *cert; /* X509 certificate */ X509_EXTENSION *ext; /* X509v3 extension */ struct cert_info *ret; /* certificate info/value */ BIO *bp; X509V3_EXT_METHOD *method; char pathbuf[MAXFILENAME]; u_char *uptr; char *ptr; int temp, cnt, i; /* * Decode ASN.1 objects and construct certificate structure. */ uptr = asn1cert; if ((cert = d2i_X509(NULL, &uptr, len)) == NULL) { msyslog(LOG_ERR, "cert_parse %s\n", ERR_error_string(ERR_get_error(), NULL)); return (NULL); } /* * Extract version, subject name and public key. */ ret = emalloc(sizeof(struct cert_info)); memset(ret, 0, sizeof(struct cert_info)); if ((ret->pkey = X509_get_pubkey(cert)) == NULL) { msyslog(LOG_ERR, "cert_parse %s\n", ERR_error_string(ERR_get_error(), NULL)); cert_free(ret); X509_free(cert); return (NULL); } ret->version = X509_get_version(cert); X509_NAME_oneline(X509_get_subject_name(cert), pathbuf, MAXFILENAME - 1); ptr = strstr(pathbuf, "CN="); if (ptr == NULL) { msyslog(LOG_INFO, "cert_parse: invalid subject %s", pathbuf); cert_free(ret); X509_free(cert); return (NULL); } ret->subject = emalloc(strlen(ptr) + 1); strcpy(ret->subject, ptr + 3); /* * Extract remaining objects. Note that the NTP serial number is * the NTP seconds at the time of signing, but this might not be * the case for other authority. We don't bother to check the * objects at this time, since the real crunch can happen only * when the time is valid but not yet certificated. */ ret->nid = OBJ_obj2nid(cert->cert_info->signature->algorithm); ret->digest = (const EVP_MD *)EVP_get_digestbynid(ret->nid); ret->serial = (u_long)ASN1_INTEGER_get(X509_get_serialNumber(cert)); X509_NAME_oneline(X509_get_issuer_name(cert), pathbuf, MAXFILENAME); if ((ptr = strstr(pathbuf, "CN=")) == NULL) { msyslog(LOG_INFO, "cert_parse: invalid issuer %s", pathbuf); cert_free(ret); X509_free(cert); return (NULL); } ret->issuer = emalloc(strlen(ptr) + 1); strcpy(ret->issuer, ptr + 3); ret->first = asn2ntp(X509_get_notBefore(cert)); ret->last = asn2ntp(X509_get_notAfter(cert)); /* * Extract extension fields. These are ad hoc ripoffs of * currently assigned functions and will certainly be changed * before prime time. */ cnt = X509_get_ext_count(cert); for (i = 0; i < cnt; i++) { ext = X509_get_ext(cert, i); method = X509V3_EXT_get(ext); temp = OBJ_obj2nid(ext->object); switch (temp) { /* * If a key_usage field is present, we decode whether * this is a trusted or private certificate. This is * dorky; all we want is to compare NIDs, but OpenSSL * insists on BIO text strings. */ case NID_ext_key_usage: bp = BIO_new(BIO_s_mem()); X509V3_EXT_print(bp, ext, 0, 0); BIO_gets(bp, pathbuf, MAXFILENAME); BIO_free(bp); #if DEBUG if (debug) printf("cert_parse: %s: %s\n", OBJ_nid2ln(temp), pathbuf); #endif if (strcmp(pathbuf, "Trust Root") == 0) ret->flags |= CERT_TRUST; else if (strcmp(pathbuf, "Private") == 0) ret->flags |= CERT_PRIV; break; /* * If a NID_subject_key_identifier field is present, it * contains the GQ public key. */ case NID_subject_key_identifier: ret->grplen = ext->value->length - 2; ret->grpkey = emalloc(ret->grplen); memcpy(ret->grpkey, &ext->value->data[2], ret->grplen); break; } } /* * If certificate is self signed, verify signature. */ if (strcmp(ret->subject, ret->issuer) == 0) { if (!X509_verify(cert, ret->pkey)) { msyslog(LOG_INFO, "cert_parse: signature not verified %s", pathbuf); cert_free(ret); X509_free(cert); return (NULL); } } /* * Verify certificate valid times. Note that certificates cannot * be retroactive. */ if (ret->first > ret->last || ret->first < fstamp) { msyslog(LOG_INFO, "cert_parse: invalid certificate %s first %u last %u fstamp %u", ret->subject, ret->first, ret->last, fstamp); cert_free(ret); X509_free(cert); return (NULL); } /* * Build the value structure to sign and send later. */ ret->cert.fstamp = htonl(fstamp); ret->cert.vallen = htonl(len); ret->cert.ptr = emalloc(len); memcpy(ret->cert.ptr, asn1cert, len); #ifdef DEBUG if (debug > 1) X509_print_fp(stdout, cert); #endif X509_free(cert); return (ret); } /* * cert_sign - sign x509 certificate equest and update value structure. * * The certificate request includes a copy of the host certificate, * which includes the version number, subject name and public key of the * host. The resulting certificate includes these values plus the * serial number, issuer name and valid interval of the server. The * valid interval extends from the current time to the same time one * year hence. This may extend the life of the signed certificate beyond * that of the signer certificate. * * It is convenient to use the NTP seconds of the current time as the * serial number. In the value structure the timestamp is the current * time and the filestamp is taken from the extension field. Note this * routine is called only when the client clock is synchronized to a * proventic source, so timestamp comparisons are valid. * * The host certificate is valid from the time it was generated for a * period of one year. A signed certificate is valid from the time of * signature for a period of one year, but only the host certificate (or * sign certificate if used) is actually used to encrypt and decrypt * signatures. The signature trail is built from the client via the * intermediate servers to the trusted server. Each signature on the * trail must be valid at the time of signature, but it could happen * that a signer certificate expire before the signed certificate, which * remains valid until its expiration. * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_CRT bad or missing certificate * XEVNT_VFY certificate not verified * XEVNT_PER host certificate expired */ static int cert_sign( struct exten *ep, /* extension field pointer */ struct value *vp /* value pointer */ ) { X509 *req; /* X509 certificate request */ X509 *cert; /* X509 certificate */ X509_EXTENSION *ext; /* certificate extension */ ASN1_INTEGER *serial; /* serial number */ X509_NAME *subj; /* distinguished (common) name */ EVP_PKEY *pkey; /* public key */ EVP_MD_CTX ctx; /* message digest context */ tstamp_t tstamp; /* NTP timestamp */ u_int len; u_char *ptr; int i, temp; /* * Decode ASN.1 objects and construct certificate structure. * Make sure the system clock is synchronized to a proventic * source. */ tstamp = crypto_time(); if (tstamp == 0) return (XEVNT_TSP); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); ptr = (u_char *)ep->pkt; if ((req = d2i_X509(NULL, &ptr, ntohl(ep->vallen))) == NULL) { msyslog(LOG_ERR, "cert_sign %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_CRT); } /* * Extract public key and check for errors. */ if ((pkey = X509_get_pubkey(req)) == NULL) { msyslog(LOG_ERR, "cert_sign %s\n", ERR_error_string(ERR_get_error(), NULL)); X509_free(req); return (XEVNT_PUB); } /* * Generate X509 certificate signed by this server. For this * purpose the issuer name is the server name. Also copy any * extensions that might be present. */ cert = X509_new(); X509_set_version(cert, X509_get_version(req)); serial = ASN1_INTEGER_new(); ASN1_INTEGER_set(serial, tstamp); X509_set_serialNumber(cert, serial); X509_gmtime_adj(X509_get_notBefore(cert), 0L); X509_gmtime_adj(X509_get_notAfter(cert), YEAR); subj = X509_get_issuer_name(cert); X509_NAME_add_entry_by_txt(subj, "commonName", MBSTRING_ASC, (u_char *)sys_hostname, strlen(sys_hostname), -1, 0); subj = X509_get_subject_name(req); X509_set_subject_name(cert, subj); X509_set_pubkey(cert, pkey); ext = X509_get_ext(req, 0); temp = X509_get_ext_count(req); for (i = 0; i < temp; i++) { ext = X509_get_ext(req, i); X509_add_ext(cert, ext, -1); } X509_free(req); /* * Sign and verify the certificate. */ X509_sign(cert, sign_pkey, sign_digest); if (!X509_verify(cert, sign_pkey)) { printf("cert_sign\n%s\n", ERR_error_string(ERR_get_error(), NULL)); X509_free(cert); return (XEVNT_VFY); } len = i2d_X509(cert, NULL); /* * Build and sign the value structure. We have to sign it here, * since the response has to be returned right away. This is a * clogging hazard. */ memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = ep->fstamp; vp->vallen = htonl(len); vp->ptr = emalloc(len); ptr = vp->ptr; i2d_X509(cert, &ptr); vp->siglen = 0; vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)vp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); #ifdef DEBUG if (debug > 1) X509_print_fp(stdout, cert); #endif X509_free(cert); return (XEVNT_OK); } /* * cert_valid - verify certificate with given public key * * This is pretty ugly, as the certificate has to be verified in the * OpenSSL X509 structure, not in the DER format in the info/value * structure. * * Returns * XEVNT_OK success * XEVNT_VFY certificate not verified */ int cert_valid( struct cert_info *cinf, /* certificate information structure */ EVP_PKEY *pkey /* public key */ ) { X509 *cert; /* X509 certificate */ u_char *ptr; if (cinf->flags & CERT_SIGN) return (XEVNT_OK); ptr = (u_char *)cinf->cert.ptr; cert = d2i_X509(NULL, &ptr, ntohl(cinf->cert.vallen)); if (cert == NULL || !X509_verify(cert, pkey)) return (XEVNT_VFY); X509_free(cert); return (XEVNT_OK); } /* * cert - install certificate in certificate list * * This routine encodes an extension field into a certificate info/value * structure. It searches the certificate list for duplicates and * expunges whichever is older. It then searches the list for other * certificates that might be verified by this latest one. Finally, it * inserts this certificate first on the list. * * Returns * XEVNT_OK success * XEVNT_FSP bad or missing filestamp * XEVNT_CRT bad or missing certificate */ int cert_install( struct exten *ep, /* cert info/value */ struct peer *peer /* peer structure */ ) { struct cert_info *cp, *xp, *yp, **zp; /* * Parse and validate the signed certificate. If valid, * construct the info/value structure; otherwise, scamper home. */ if ((cp = cert_parse((u_char *)ep->pkt, ntohl(ep->vallen), ntohl(ep->fstamp))) == NULL) return (XEVNT_CRT); /* * Scan certificate list looking for another certificate with * the same subject and issuer. If another is found with the * same or older filestamp, unlink it and return the goodies to * the heap. If another is found with a later filestamp, discard * the new one and leave the building. * * Make a note to study this issue again. An earlier certificate * with a long lifetime might be overtaken by a later * certificate with a short lifetime, thus invalidating the * earlier signature. However, we gotta find a way to leak old * stuff from the cache, so we do it anyway. */ yp = cp; zp = &cinfo; for (xp = cinfo; xp != NULL; xp = xp->link) { if (strcmp(cp->subject, xp->subject) == 0 && strcmp(cp->issuer, xp->issuer) == 0) { if (ntohl(cp->cert.fstamp) <= ntohl(xp->cert.fstamp)) { *zp = xp->link;; cert_free(xp); } else { cert_free(cp); return (XEVNT_FSP); } break; } zp = &xp->link; } yp->link = cinfo; cinfo = yp; /* * Scan the certificate list to see if Y is signed by X. This is * independent of order. */ for (yp = cinfo; yp != NULL; yp = yp->link) { for (xp = cinfo; xp != NULL; xp = xp->link) { /* * If the issuer of certificate Y matches the * subject of certificate X, verify the * signature of Y using the public key of X. If * so, X signs Y. */ if (strcmp(yp->issuer, xp->subject) != 0 || xp->flags & CERT_ERROR) continue; if (cert_valid(yp, xp->pkey) != XEVNT_OK) { yp->flags |= CERT_ERROR; continue; } /* * The signature Y is valid only if it begins * during the lifetime of X; however, it is not * necessarily an error, since some other * certificate might sign Y. */ if (yp->first < xp->first || yp->first > xp->last) continue; yp->flags |= CERT_SIGN; /* * If X is trusted, then Y is trusted. Note that * we might stumble over a self-signed * certificate that is not trusted, at least * temporarily. This can happen when a dude * first comes up, but has not synchronized the * clock and had its certificate signed by its * server. In case of broken certificate trail, * this might result in a loop that could * persist until timeout. */ if (!(xp->flags & (CERT_TRUST | CERT_VALID))) continue; yp->flags |= CERT_VALID; /* * If subject Y matches the server subject name, * then Y has completed the certificate trail. * Save the group key and light the valid bit. */ if (strcmp(yp->subject, peer->subject) != 0) continue; if (yp->grpkey != NULL) { if (peer->grpkey != NULL) BN_free(peer->grpkey); peer->grpkey = BN_bin2bn(yp->grpkey, yp->grplen, NULL); } peer->crypto |= CRYPTO_FLAG_VALID; /* * If the server has an an identity scheme, * fetch the identity credentials. If not, the * identity is verified only by the trusted * certificate. The next signature will set the * server proventic. */ if (peer->crypto & (CRYPTO_FLAG_GQ | CRYPTO_FLAG_IFF | CRYPTO_FLAG_MV)) continue; peer->crypto |= CRYPTO_FLAG_VRFY; } } /* * That was awesome. Now update the timestamps and signatures. */ crypto_update(); return (XEVNT_OK); } /* * cert_free - free certificate information structure */ void cert_free( struct cert_info *cinf /* certificate info/value structure */ ) { if (cinf->pkey != NULL) EVP_PKEY_free(cinf->pkey); if (cinf->subject != NULL) free(cinf->subject); if (cinf->issuer != NULL) free(cinf->issuer); if (cinf->grpkey != NULL) free(cinf->grpkey); value_free(&cinf->cert); free(cinf); } /* *********************************************************************** * * * The following routines are used only at initialization time * * * *********************************************************************** */ /* * crypto_key - load cryptographic parameters and keys from files * * This routine loads a PEM-encoded public/private key pair and extracts * the filestamp from the file name. * * Returns public key pointer if valid, NULL if not. Side effect updates * the filestamp if valid. */ static EVP_PKEY * crypto_key( char *cp, /* file name */ tstamp_t *fstamp /* filestamp */ ) { FILE *str; /* file handle */ EVP_PKEY *pkey = NULL; /* public/private key */ char filename[MAXFILENAME]; /* name of key file */ char linkname[MAXFILENAME]; /* filestamp buffer) */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ char *ptr; /* * Open the key file. If the first character of the file name is * not '/', prepend the keys directory string. If something goes * wrong, abandon ship. */ if (*cp == '/') strcpy(filename, cp); else snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp); str = fopen(filename, "r"); if (str == NULL) return (NULL); /* * Read the filestamp, which is contained in the first line. */ if ((ptr = fgets(linkname, MAXFILENAME, str)) == NULL) { msyslog(LOG_ERR, "crypto_key: no data %s\n", filename); (void)fclose(str); return (NULL); } if ((ptr = strrchr(ptr, '.')) == NULL) { msyslog(LOG_ERR, "crypto_key: no filestamp %s\n", filename); (void)fclose(str); return (NULL); } if (sscanf(++ptr, "%u", fstamp) != 1) { msyslog(LOG_ERR, "crypto_key: invalid timestamp %s\n", filename); (void)fclose(str); return (NULL); } /* * Read and decrypt PEM-encoded private key. */ pkey = PEM_read_PrivateKey(str, NULL, NULL, passwd); fclose(str); if (pkey == NULL) { msyslog(LOG_ERR, "crypto_key %s\n", ERR_error_string(ERR_get_error(), NULL)); return (NULL); } /* * Leave tracks in the cryptostats. */ if ((ptr = strrchr(linkname, '\n')) != NULL) *ptr = '\0'; snprintf(statstr, NTP_MAXSTRLEN, "%s mod %d", &linkname[2], EVP_PKEY_size(pkey) * 8); record_crypto_stats(NULL, statstr); #ifdef DEBUG if (debug) printf("crypto_key: %s\n", statstr); if (debug > 1) { if (EVP_MD_type(pkey) == EVP_PKEY_DSA) DSA_print_fp(stdout, pkey->pkey.dsa, 0); else RSA_print_fp(stdout, pkey->pkey.rsa, 0); } #endif return (pkey); } /* * crypto_cert - load certificate from file * * This routine loads a X.509 RSA or DSA certificate from a file and * constructs a info/cert value structure for this machine. The * structure includes a filestamp extracted from the file name. Later * the certificate can be sent to another machine by request. * * Returns certificate info/value pointer if valid, NULL if not. */ static struct cert_info * /* certificate information */ crypto_cert( char *cp /* file name */ ) { struct cert_info *ret; /* certificate information */ FILE *str; /* file handle */ char filename[MAXFILENAME]; /* name of certificate file */ char linkname[MAXFILENAME]; /* filestamp buffer */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ tstamp_t fstamp; /* filestamp */ long len; char *ptr; char *name, *header; u_char *data; /* * Open the certificate file. If the first character of the file * name is not '/', prepend the keys directory string. If * something goes wrong, abandon ship. */ if (*cp == '/') strcpy(filename, cp); else snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp); str = fopen(filename, "r"); if (str == NULL) return (NULL); /* * Read the filestamp, which is contained in the first line. */ if ((ptr = fgets(linkname, MAXFILENAME, str)) == NULL) { msyslog(LOG_ERR, "crypto_cert: no data %s\n", filename); (void)fclose(str); return (NULL); } if ((ptr = strrchr(ptr, '.')) == NULL) { msyslog(LOG_ERR, "crypto_cert: no filestamp %s\n", filename); (void)fclose(str); return (NULL); } if (sscanf(++ptr, "%u", &fstamp) != 1) { msyslog(LOG_ERR, "crypto_cert: invalid filestamp %s\n", filename); (void)fclose(str); return (NULL); } /* * Read PEM-encoded certificate and install. */ if (!PEM_read(str, &name, &header, &data, &len)) { msyslog(LOG_ERR, "crypto_cert %s\n", ERR_error_string(ERR_get_error(), NULL)); (void)fclose(str); return (NULL); } free(header); if (strcmp(name, "CERTIFICATE") !=0) { msyslog(LOG_INFO, "crypto_cert: wrong PEM type %s", name); free(name); free(data); (void)fclose(str); return (NULL); } free(name); /* * Parse certificate and generate info/value structure. */ ret = cert_parse(data, len, fstamp); free(data); (void)fclose(str); if (ret == NULL) return (NULL); if ((ptr = strrchr(linkname, '\n')) != NULL) *ptr = '\0'; snprintf(statstr, NTP_MAXSTRLEN, "%s 0x%x len %lu", &linkname[2], ret->flags, len); record_crypto_stats(NULL, statstr); #ifdef DEBUG if (debug) printf("crypto_cert: %s\n", statstr); #endif return (ret); } /* * crypto_tai - load leapseconds table from file * * This routine loads the ERTS leapsecond file in NIST text format, * converts to a value structure and extracts a filestamp from the file * name. The data are used to establish the TAI offset from UTC, which * is provided to the kernel if supported. Later the data can be sent to * another machine on request. */ static void crypto_tai( char *cp /* file name */ ) { FILE *str; /* file handle */ char buf[NTP_MAXSTRLEN]; /* file line buffer */ u_int32 leapsec[MAX_LEAP]; /* NTP time at leaps */ int offset; /* offset at leap (s) */ char filename[MAXFILENAME]; /* name of leapseconds file */ char linkname[MAXFILENAME]; /* file link (for filestamp) */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ tstamp_t fstamp; /* filestamp */ u_int len; u_int32 *ptr; char *dp; int rval, i, j; /* * Open the file and discard comment lines. If the first * character of the file name is not '/', prepend the keys * directory string. If the file is not found, not to worry; it * can be retrieved over the net. But, if it is found with * errors, we crash and burn. */ if (*cp == '/') strcpy(filename, cp); else snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp); if ((str = fopen(filename, "r")) == NULL) return; /* * Extract filestamp if present. */ rval = readlink(filename, linkname, MAXFILENAME - 1); if (rval > 0) { linkname[rval] = '\0'; dp = strrchr(linkname, '.'); } else { dp = strrchr(filename, '.'); } if (dp != NULL) sscanf(++dp, "%u", &fstamp); else fstamp = 0; tai_leap.fstamp = htonl(fstamp); /* * We are rather paranoid here, since an intruder might cause a * coredump by infiltrating naughty values. Empty lines and * comments are ignored. Other lines must begin with two * integers followed by junk or comments. The first integer is * the NTP seconds of leap insertion, the second is the offset * of TAI relative to UTC after that insertion. The second word * must equal the initial insertion of ten seconds on 1 January * 1972 plus one second for each succeeding insertion. */ i = 0; while (i < MAX_LEAP) { dp = fgets(buf, NTP_MAXSTRLEN - 1, str); if (dp == NULL) break; if (strlen(buf) < 1) continue; if (*buf == '#') continue; if (sscanf(buf, "%u %d", &leapsec[i], &offset) != 2) continue; if (i != offset - TAI_1972) break; i++; } fclose(str); if (dp != NULL) { msyslog(LOG_INFO, "crypto_tai: leapseconds file %s error %d", cp, rval); exit (-1); } /* * The extension field table entries consists of the NTP seconds * of leap insertion in network byte order. */ len = i * sizeof(u_int32); tai_leap.vallen = htonl(len); ptr = emalloc(len); tai_leap.ptr = (u_char *)ptr; for (j = 0; j < i; j++) *ptr++ = htonl(leapsec[j]); crypto_flags |= CRYPTO_FLAG_TAI; snprintf(statstr, NTP_MAXSTRLEN, "%s fs %u leap %u len %u", cp, fstamp, leapsec[--j], len); record_crypto_stats(NULL, statstr); #ifdef DEBUG if (debug) printf("crypto_tai: %s\n", statstr); #endif } /* * crypto_setup - load keys, certificate and leapseconds table * * This routine loads the public/private host key and certificate. If * available, it loads the public/private sign key, which defaults to * the host key, and leapseconds table. The host key must be RSA, but * the sign key can be either RSA or DSA. In either case, the public key * on the certificate must agree with the sign key. */ void crypto_setup(void) { EVP_PKEY *pkey; /* private/public key pair */ char filename[MAXFILENAME]; /* file name buffer */ l_fp seed; /* crypto PRNG seed as NTP timestamp */ tstamp_t fstamp; /* filestamp */ tstamp_t sstamp; /* sign filestamp */ u_int len, bytes; u_char *ptr; /* * Initialize structures. */ if (!crypto_flags) return; gethostname(filename, MAXFILENAME); bytes = strlen(filename) + 1; sys_hostname = emalloc(bytes); memcpy(sys_hostname, filename, bytes); if (passwd == NULL) passwd = sys_hostname; memset(&hostval, 0, sizeof(hostval)); memset(&pubkey, 0, sizeof(pubkey)); memset(&tai_leap, 0, sizeof(tai_leap)); /* * Load required random seed file and seed the random number * generator. Be default, it is found in the user home * directory. The root home directory may be / or /root, * depending on the system. Wiggle the contents a bit and write * it back so the sequence does not repeat when we next restart. */ ERR_load_crypto_strings(); if (rand_file == NULL) { if ((RAND_file_name(filename, MAXFILENAME)) != NULL) { rand_file = emalloc(strlen(filename) + 1); strcpy(rand_file, filename); } } else if (*rand_file != '/') { snprintf(filename, MAXFILENAME, "%s/%s", keysdir, rand_file); free(rand_file); rand_file = emalloc(strlen(filename) + 1); strcpy(rand_file, filename); } if (rand_file == NULL) { msyslog(LOG_ERR, "crypto_setup: random seed file not specified"); exit (-1); } if ((bytes = RAND_load_file(rand_file, -1)) == 0) { msyslog(LOG_ERR, "crypto_setup: random seed file %s not found\n", rand_file); exit (-1); } arc4random_buf(&seed, sizeof(l_fp)); RAND_seed(&seed, sizeof(l_fp)); RAND_write_file(rand_file); OpenSSL_add_all_algorithms(); #ifdef DEBUG if (debug) printf( "crypto_setup: OpenSSL version %lx random seed file %s bytes read %d\n", SSLeay(), rand_file, bytes); #endif /* * Load required host key from file "ntpkey_host_". It * also becomes the default sign key. */ if (host_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_host_%s", sys_hostname); host_file = emalloc(strlen(filename) + 1); strcpy(host_file, filename); } pkey = crypto_key(host_file, &fstamp); if (pkey == NULL) { msyslog(LOG_ERR, "crypto_setup: host key file %s not found or corrupt", host_file); exit (-1); } host_pkey = pkey; sign_pkey = pkey; sstamp = fstamp; hostval.fstamp = htonl(fstamp); if (EVP_MD_type(host_pkey) != EVP_PKEY_RSA) { msyslog(LOG_ERR, "crypto_setup: host key is not RSA key type"); exit (-1); } hostval.vallen = htonl(strlen(sys_hostname)); hostval.ptr = (u_char *)sys_hostname; /* * Construct public key extension field for agreement scheme. */ len = i2d_PublicKey(host_pkey, NULL); ptr = emalloc(len); pubkey.ptr = ptr; i2d_PublicKey(host_pkey, &ptr); pubkey.vallen = htonl(len); pubkey.fstamp = hostval.fstamp; /* * Load optional sign key from file "ntpkey_sign_". If * loaded, it becomes the sign key. */ if (sign_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_sign_%s", sys_hostname); sign_file = emalloc(strlen(filename) + 1); strcpy(sign_file, filename); } pkey = crypto_key(sign_file, &fstamp); if (pkey != NULL) { sign_pkey = pkey; sstamp = fstamp; } sign_siglen = EVP_PKEY_size(sign_pkey); /* * Load optional IFF parameters from file * "ntpkey_iff_". */ if (iffpar_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_iff_%s", sys_hostname); iffpar_file = emalloc(strlen(filename) + 1); strcpy(iffpar_file, filename); } iffpar_pkey = crypto_key(iffpar_file, &if_fstamp); if (iffpar_pkey != NULL) crypto_flags |= CRYPTO_FLAG_IFF; /* * Load optional GQ parameters from file "ntpkey_gq_". */ if (gqpar_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_gq_%s", sys_hostname); gqpar_file = emalloc(strlen(filename) + 1); strcpy(gqpar_file, filename); } gqpar_pkey = crypto_key(gqpar_file, &gq_fstamp); if (gqpar_pkey != NULL) crypto_flags |= CRYPTO_FLAG_GQ; /* * Load optional MV parameters from file "ntpkey_mv_". */ if (mvpar_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_mv_%s", sys_hostname); mvpar_file = emalloc(strlen(filename) + 1); strcpy(mvpar_file, filename); } mvpar_pkey = crypto_key(mvpar_file, &mv_fstamp); if (mvpar_pkey != NULL) crypto_flags |= CRYPTO_FLAG_MV; /* * Load required certificate from file "ntpkey_cert_". */ if (cert_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_cert_%s", sys_hostname); cert_file = emalloc(strlen(filename) + 1); strcpy(cert_file, filename); } if ((cinfo = crypto_cert(cert_file)) == NULL) { msyslog(LOG_ERR, "certificate file %s not found or corrupt", cert_file); exit (-1); } /* * The subject name must be the same as the host name, unless * the certificate is private, in which case it may have come * from another host. */ if (!(cinfo->flags & CERT_PRIV) && strcmp(cinfo->subject, sys_hostname) != 0) { msyslog(LOG_ERR, "crypto_setup: certificate %s not for this host", cert_file); cert_free(cinfo); exit (-1); } /* * It the certificate is trusted, the subject must be the same * as the issuer, in other words it must be self signed. */ if (cinfo->flags & CERT_TRUST && strcmp(cinfo->subject, cinfo->issuer) != 0) { if (cert_valid(cinfo, sign_pkey) != XEVNT_OK) { msyslog(LOG_ERR, "crypto_setup: certificate %s is trusted, but not self signed.", cert_file); cert_free(cinfo); exit (-1); } } sign_digest = cinfo->digest; if (cinfo->flags & CERT_PRIV) crypto_flags |= CRYPTO_FLAG_PRIV; crypto_flags |= cinfo->nid << 16; /* * Load optional leapseconds table from file "ntpkey_leap". If * the file is missing or defective, the values can later be * retrieved from a server. */ if (leap_file == NULL) leap_file = "ntpkey_leap"; crypto_tai(leap_file); #ifdef DEBUG if (debug) printf( "crypto_setup: flags 0x%x host %s signature %s\n", crypto_flags, sys_hostname, OBJ_nid2ln(cinfo->nid)); #endif } /* * crypto_config - configure data from crypto configuration command. */ void crypto_config( int item, /* configuration item */ char *cp /* file name */ ) { switch (item) { /* * Set random seed file name. */ case CRYPTO_CONF_RAND: rand_file = emalloc(strlen(cp) + 1); strcpy(rand_file, cp); break; /* * Set private key password. */ case CRYPTO_CONF_PW: passwd = emalloc(strlen(cp) + 1); strcpy(passwd, cp); break; /* * Set host file name. */ case CRYPTO_CONF_PRIV: host_file = emalloc(strlen(cp) + 1); strcpy(host_file, cp); break; /* * Set sign key file name. */ case CRYPTO_CONF_SIGN: sign_file = emalloc(strlen(cp) + 1); strcpy(sign_file, cp); break; /* * Set iff parameters file name. */ case CRYPTO_CONF_IFFPAR: iffpar_file = emalloc(strlen(cp) + 1); strcpy(iffpar_file, cp); break; /* * Set gq parameters file name. */ case CRYPTO_CONF_GQPAR: gqpar_file = emalloc(strlen(cp) + 1); strcpy(gqpar_file, cp); break; /* * Set mv parameters file name. */ case CRYPTO_CONF_MVPAR: mvpar_file = emalloc(strlen(cp) + 1); strcpy(mvpar_file, cp); break; /* * Set identity scheme. */ case CRYPTO_CONF_IDENT: if (!strcasecmp(cp, "iff")) ident_scheme |= CRYPTO_FLAG_IFF; else if (!strcasecmp(cp, "gq")) ident_scheme |= CRYPTO_FLAG_GQ; else if (!strcasecmp(cp, "mv")) ident_scheme |= CRYPTO_FLAG_MV; break; /* * Set certificate file name. */ case CRYPTO_CONF_CERT: cert_file = emalloc(strlen(cp) + 1); strcpy(cert_file, cp); break; /* * Set leapseconds file name. */ case CRYPTO_CONF_LEAP: leap_file = emalloc(strlen(cp) + 1); strcpy(leap_file, cp); break; } crypto_flags |= CRYPTO_FLAG_ENAB; } # else int ntp_crypto_bs_pubkey; # endif /* OPENSSL */ Index: stable/8/contrib/ntp/ntpd/ntp_proto.c =================================================================== --- stable/8/contrib/ntp/ntpd/ntp_proto.c (revision 281230) +++ stable/8/contrib/ntp/ntpd/ntp_proto.c (revision 281231) @@ -1,3451 +1,3461 @@ /* * ntp_proto.c - NTP version 4 protocol machinery * * ATTENTION: Get approval from Dave Mills on all changes to this file! * */ #ifdef HAVE_CONFIG_H #include #endif #include "ntpd.h" #include "ntp_stdlib.h" #include "ntp_unixtime.h" #include "ntp_control.h" #include "ntp_string.h" #include #if defined(VMS) && defined(VMS_LOCALUNIT) /*wjm*/ #include "ntp_refclock.h" #endif #if defined(__FreeBSD__) && __FreeBSD__ >= 3 #include #endif /* * This macro defines the authentication state. If x is 1 authentication * is required; othewise it is optional. */ #define AUTH(x, y) ((x) ? (y) == AUTH_OK : (y) == AUTH_OK || \ (y) == AUTH_NONE) /* * System variables are declared here. See Section 3.2 of the * specification. */ u_char sys_leap; /* system leap indicator */ u_char sys_stratum; /* stratum of system */ s_char sys_precision; /* local clock precision (log2 s) */ double sys_rootdelay; /* roundtrip delay to primary source */ double sys_rootdispersion; /* dispersion to primary source */ u_int32 sys_refid; /* source/loop in network byte order */ static double sys_offset; /* current local clock offset */ l_fp sys_reftime; /* time we were last updated */ struct peer *sys_peer; /* our current peer */ struct peer *sys_pps; /* our PPS peer */ struct peer *sys_prefer; /* our cherished peer */ int sys_kod; /* kod credit */ int sys_kod_rate = 2; /* max kod packets per second */ #ifdef OPENSSL u_long sys_automax; /* maximum session key lifetime */ #endif /* OPENSSL */ /* * Nonspecified system state variables. */ int sys_bclient; /* broadcast client enable */ double sys_bdelay; /* broadcast client default delay */ int sys_calldelay; /* modem callup delay (s) */ int sys_authenticate; /* requre authentication for config */ l_fp sys_authdelay; /* authentication delay */ static u_long sys_authdly[2]; /* authentication delay shift reg */ static double sys_mindisp = MINDISPERSE; /* min disp increment (s) */ static double sys_maxdist = MAXDISTANCE; /* selection threshold (s) */ double sys_jitter; /* system jitter (s) */ static int sys_hopper; /* anticlockhop counter */ static int sys_maxhop = MAXHOP; /* anticlockhop counter threshold */ int leap_next; /* leap consensus */ keyid_t sys_private; /* private value for session seed */ int sys_manycastserver; /* respond to manycast client pkts */ int peer_ntpdate; /* active peers in ntpdate mode */ int sys_survivors; /* truest of the truechimers */ #ifdef OPENSSL char *sys_hostname; /* gethostname() name */ #endif /* OPENSSL */ /* * TOS and multicast mapping stuff */ int sys_floor = 0; /* cluster stratum floor */ int sys_ceiling = STRATUM_UNSPEC; /* cluster stratum ceiling */ int sys_minsane = 1; /* minimum candidates */ int sys_minclock = NTP_MINCLOCK; /* minimum survivors */ int sys_maxclock = NTP_MAXCLOCK; /* maximum candidates */ int sys_cohort = 0; /* cohort switch */ int sys_orphan = STRATUM_UNSPEC + 1; /* orphan stratum */ double sys_orphandelay = 0; /* orphan root delay */ int sys_beacon = BEACON; /* manycast beacon interval */ int sys_ttlmax; /* max ttl mapping vector index */ u_char sys_ttl[MAX_TTL]; /* ttl mapping vector */ /* * Statistics counters */ u_long sys_stattime; /* time since reset */ u_long sys_received; /* packets received */ u_long sys_processed; /* packets processed */ u_long sys_newversionpkt; /* current version */ u_long sys_oldversionpkt; /* recent version */ u_long sys_unknownversion; /* invalid version */ u_long sys_restricted; /* access denied */ u_long sys_badlength; /* bad length or format */ u_long sys_badauth; /* bad authentication */ u_long sys_limitrejected; /* rate exceeded */ static double root_distance P((struct peer *)); static void clock_combine P((struct peer **, int)); static void peer_xmit P((struct peer *)); static void fast_xmit P((struct recvbuf *, int, keyid_t, int)); static void clock_update P((void)); static int default_get_precision P((void)); static int peer_unfit P((struct peer *)); /* * transmit - Transmit Procedure. See Section 3.4.2 of the * specification. */ void transmit( struct peer *peer /* peer structure pointer */ ) { int hpoll; /* * The polling state machine. There are two kinds of machines, * those that never expect a reply (broadcast and manycast * server modes) and those that do (all other modes). The dance * is intricate... */ /* * Orphan mode is active when enabled and when no servers less * than the orphan statum are available. In this mode packets * are sent at the orphan stratum. An orphan with no other * synchronization source is an orphan parent. It assumes root * delay zero and reference ID the loopback address. All others * are orphan children with root delay randomized over a 1-s * range. The root delay is used by the election algorithm to * select the order of synchronization. */ hpoll = peer->hpoll; if (sys_orphan < STRATUM_UNSPEC && sys_peer == NULL) { sys_leap = LEAP_NOWARNING; sys_stratum = sys_orphan; sys_refid = htonl(LOOPBACKADR); sys_rootdelay = 0; sys_rootdispersion = 0; } /* * In broadcast mode the poll interval is never changed from * minpoll. */ if (peer->cast_flags & (MDF_BCAST | MDF_MCAST)) { peer->outdate = current_time; peer_xmit(peer); poll_update(peer, hpoll); return; } /* * In manycast mode we start with unity ttl. The ttl is * increased by one for each poll until either sys_maxclock * servers have been found or the maximum ttl is reached. When * sys_maxclock servers are found we stop polling until one or * more servers have timed out or until less than minpoll * associations turn up. In this case additional better servers * are dragged in and preempt the existing ones. */ if (peer->cast_flags & MDF_ACAST) { peer->outdate = current_time; if (peer->unreach > sys_beacon) { peer->unreach = 0; peer->ttl = 0; peer_xmit(peer); } else if (sys_survivors < sys_minclock || peer_preempt < sys_maxclock) { if (peer->ttl < sys_ttlmax) peer->ttl++; peer_xmit(peer); } peer->unreach++; poll_update(peer, hpoll); return; } /* * In unicast modes the dance is much more intricate. It is * desigmed to back off whenever possible to minimize network * traffic. */ if (peer->burst == 0) { u_char oreach; /* * Update the reachability status. If not heard for * three consecutive polls, stuff infinity in the clock * filter. */ oreach = peer->reach; peer->outdate = current_time; if (peer == sys_peer) sys_hopper++; peer->reach <<= 1; if (!(peer->reach & 0x07)) clock_filter(peer, 0., 0., MAXDISPERSE); if (!peer->reach) { /* * Here the peer is unreachable. If it was * previously reachable, raise a trap. */ if (oreach) { report_event(EVNT_UNREACH, peer); peer->timereachable = current_time; } /* * Send a burst if enabled, but only once after * a peer becomes unreachable. If the prempt * flag is dim, bump the unreach counter by one; * otherwise, bump it by three. */ if (peer->flags & FLAG_IBURST && peer->unreach == 0) { peer->burst = NTP_BURST; } if (!(peer->flags & FLAG_PREEMPT)) peer->unreach++; else peer->unreach += 3; } else { /* * Here the peer is reachable. Set the poll * interval to the system poll interval. Send a * burst only if enabled and the peer is fit. * * Respond to the peer evaluation produced by * the selection algorithm. If less than the * outlyer level, up the unreach by three. If * there are excess associations, up the unreach * by two if not a candidate and by one if so. */ if (!(peer->flags & FLAG_PREEMPT)) { peer->unreach = 0; } else if (peer->status < CTL_PST_SEL_SELCAND) { peer->unreach += 3; } else if (peer_preempt > sys_maxclock) { if (peer->status < CTL_PST_SEL_SYNCCAND) peer->unreach += 2; else peer->unreach++; } else { peer->unreach = 0; } hpoll = sys_poll; if (peer->flags & FLAG_BURST && !peer_unfit(peer)) peer->burst = NTP_BURST; } /* * Watch for timeout. If ephemeral or preemptable, toss * the rascal; otherwise, bump the poll interval. */ if (peer->unreach >= NTP_UNREACH) { if (peer->flags & FLAG_PREEMPT || !(peer->flags & FLAG_CONFIG)) { peer_clear(peer, "TIME"); unpeer(peer); return; } else { hpoll++; } } } else { peer->burst--; /* * If a broadcast client at this point, the burst has * concluded, so we switch to client mode and purge the * keylist, since no further transmissions will be made. */ if (peer->burst == 0) { if (peer->cast_flags & MDF_BCLNT) { peer->hmode = MODE_BCLIENT; #ifdef OPENSSL key_expire(peer); #endif /* OPENSSL */ } /* * If ntpdate mode and the clock has not been * set and all peers have completed the burst, * we declare a successful failure. */ if (mode_ntpdate) { peer_ntpdate--; if (peer_ntpdate == 0) { msyslog(LOG_NOTICE, "no reply; clock not set"); exit (0); } } } } /* * Do not transmit if in broadcast client mode. */ if (peer->hmode != MODE_BCLIENT) peer_xmit(peer); poll_update(peer, hpoll); } /* * receive - Receive Procedure. See section 3.4.3 in the specification. */ void receive( struct recvbuf *rbufp ) { register struct peer *peer; /* peer structure pointer */ register struct pkt *pkt; /* receive packet pointer */ int hisversion; /* packet version */ int hisleap; /* packet leap indicator */ int hismode; /* packet mode */ int hisstratum; /* packet stratum */ int restrict_mask; /* restrict bits */ int has_mac; /* length of MAC field */ int authlen; /* offset of MAC field */ int is_authentic = 0; /* cryptosum ok */ keyid_t skeyid = 0; /* key ID */ struct sockaddr_storage *dstadr_sin; /* active runway */ struct peer *peer2; /* aux peer structure pointer */ l_fp p_org; /* origin timestamp */ l_fp p_rec; /* receive timestamp */ l_fp p_xmt; /* transmit timestamp */ #ifdef OPENSSL keyid_t tkeyid = 0; /* temporary key ID */ keyid_t pkeyid = 0; /* previous key ID */ struct autokey *ap; /* autokey structure pointer */ int rval; /* cookie snatcher */ #endif /* OPENSSL */ int retcode = AM_NOMATCH; int at_listhead; /* * Monitor the packet and get restrictions. Note that the packet * length for control and private mode packets must be checked * by the service routines. Note that no statistics counters are * recorded for restrict violations, since these counters are in * the restriction routine. Note the careful distinctions here * between a packet with a format error and a packet that is * simply discarded without prejudice. Some restrictions have to * be handled later in order to generate a kiss-of-death packet. */ /* * Bogus port check is before anything, since it probably * reveals a clogging attack. */ sys_received++; if (SRCPORT(&rbufp->recv_srcadr) == 0) { sys_badlength++; return; /* bogus port */ } at_listhead = ntp_monitor(rbufp); restrict_mask = restrictions(&rbufp->recv_srcadr, at_listhead); #ifdef DEBUG if (debug > 1) printf("receive: at %ld %s<-%s flags %x restrict %03x\n", current_time, stoa(&rbufp->dstadr->sin), stoa(&rbufp->recv_srcadr), rbufp->dstadr->flags, restrict_mask); #endif if (restrict_mask & RES_IGNORE) { sys_restricted++; return; /* ignore everything */ } pkt = &rbufp->recv_pkt; hisversion = PKT_VERSION(pkt->li_vn_mode); hisleap = PKT_LEAP(pkt->li_vn_mode); hismode = (int)PKT_MODE(pkt->li_vn_mode); hisstratum = PKT_TO_STRATUM(pkt->stratum); if (hismode == MODE_PRIVATE) { if (restrict_mask & RES_NOQUERY) { sys_restricted++; return; /* no query private */ } process_private(rbufp, ((restrict_mask & RES_NOMODIFY) == 0)); return; } if (hismode == MODE_CONTROL) { if (restrict_mask & RES_NOQUERY) { sys_restricted++; return; /* no query control */ } process_control(rbufp, restrict_mask); return; } if (restrict_mask & RES_DONTSERVE) { sys_restricted++; return; /* no time */ } if (rbufp->recv_length < LEN_PKT_NOMAC) { sys_badlength++; return; /* runt packet */ } /* * Version check must be after the query packets, since they * intentionally use early version. */ if (hisversion == NTP_VERSION) { sys_newversionpkt++; /* new version */ } else if (!(restrict_mask & RES_VERSION) && hisversion >= NTP_OLDVERSION) { sys_oldversionpkt++; /* previous version */ } else { sys_unknownversion++; return; /* old version */ } /* * Figure out his mode and validate the packet. This has some * legacy raunch that probably should be removed. In very early * NTP versions mode 0 was equivalent to what later versions * would interpret as client mode. */ if (hismode == MODE_UNSPEC) { if (hisversion == NTP_OLDVERSION) { hismode = MODE_CLIENT; } else { sys_badlength++; return; /* invalid mode */ } } /* * Parse the extension field if present. We figure out whether * an extension field is present by measuring the MAC size. If * the number of words following the packet header is 0, no MAC * is present and the packet is not authenticated. If 1, the * packet is a crypto-NAK; if 3, the packet is authenticated * with DES; if 5, the packet is authenticated with MD5. If 2 or * 4, the packet is a runt and discarded forthwith. If greater * than 5, an extension field is present, so we subtract the * length of the field and go around again. */ authlen = LEN_PKT_NOMAC; has_mac = rbufp->recv_length - authlen; while (has_mac > 0) { int temp; - if (has_mac % 4 != 0 || has_mac < 0) { + if (has_mac % 4 != 0 || has_mac < MIN_MAC_LEN) { sys_badlength++; return; /* bad MAC length */ } if (has_mac == 1 * 4 || has_mac == 3 * 4 || has_mac == MAX_MAC_LEN) { skeyid = ntohl(((u_int32 *)pkt)[authlen / 4]); break; } else if (has_mac > MAX_MAC_LEN) { temp = ntohl(((u_int32 *)pkt)[authlen / 4]) & 0xffff; if (temp < 4 || temp > NTP_MAXEXTEN || temp % 4 != 0) { sys_badlength++; return; /* bad MAC length */ } authlen += temp; has_mac -= temp; } else { sys_badlength++; return; /* bad MAC length */ } } + /* + * If has_mac is < 0 we had a malformed packet. + */ + if (has_mac < 0) { + sys_badlength++; + return; /* bad length */ + } #ifdef OPENSSL pkeyid = tkeyid = 0; #endif /* OPENSSL */ /* * We have tossed out as many buggy packets as possible early in * the game to reduce the exposure to a clogging attack. Now we * have to burn some cycles to find the association and * authenticate the packet if required. Note that we burn only * MD5 cycles, again to reduce exposure. There may be no * matching association and that's okay. * * More on the autokey mambo. Normally the local interface is * found when the association was mobilized with respect to a * designated remote address. We assume packets arriving from * the remote address arrive via this interface and the local * address used to construct the autokey is the unicast address * of the interface. However, if the sender is a broadcaster, * the interface broadcast address is used instead. & Notwithstanding this technobabble, if the sender is a * multicaster, the broadcast address is null, so we use the * unicast address anyway. Don't ask. */ peer = findpeer(&rbufp->recv_srcadr, rbufp->dstadr, hismode, &retcode); dstadr_sin = &rbufp->dstadr->sin; NTOHL_FP(&pkt->org, &p_org); NTOHL_FP(&pkt->rec, &p_rec); NTOHL_FP(&pkt->xmt, &p_xmt); /* * Authentication is conditioned by three switches: * * NOPEER (RES_NOPEER) do not mobilize an association unless * authenticated * NOTRUST (RES_DONTTRUST) do not allow access unless * authenticated (implies NOPEER) * enable (sys_authenticate) master NOPEER switch, by default * on * * The NOPEER and NOTRUST can be specified on a per-client basis * using the restrict command. The enable switch if on implies * NOPEER for all clients. There are four outcomes: * * NONE The packet has no MAC. * OK the packet has a MAC and authentication succeeds * ERROR the packet has a MAC and authentication fails * CRYPTO crypto-NAK. The MAC has four octets only. * * Note: The AUTH(x, y) macro is used to filter outcomes. If x * is zero, acceptable outcomes of y are NONE and OK. If x is * one, the only acceptable outcome of y is OK. */ if (has_mac == 0) { is_authentic = AUTH_NONE; /* not required */ #ifdef DEBUG if (debug) printf("receive: at %ld %s<-%s mode %d code %d auth %d\n", current_time, stoa(dstadr_sin), stoa(&rbufp->recv_srcadr), hismode, retcode, is_authentic); #endif } else if (has_mac == 4) { is_authentic = AUTH_CRYPTO; /* crypto-NAK */ #ifdef DEBUG if (debug) printf( "receive: at %ld %s<-%s mode %d code %d keyid %08x len %d mac %d auth %d\n", current_time, stoa(dstadr_sin), stoa(&rbufp->recv_srcadr), hismode, retcode, skeyid, authlen, has_mac, is_authentic); #endif } else { #ifdef OPENSSL /* * For autokey modes, generate the session key * and install in the key cache. Use the socket * broadcast or unicast address as appropriate. */ if (skeyid > NTP_MAXKEY) { /* * More on the autokey dance (AKD). A cookie is * constructed from public and private values. * For broadcast packets, the cookie is public * (zero). For packets that match no * association, the cookie is hashed from the * addresses and private value. For server * packets, the cookie was previously obtained * from the server. For symmetric modes, the * cookie was previously constructed using an * agreement protocol; however, should PKI be * unavailable, we construct a fake agreement as * the EXOR of the peer and host cookies. * * hismode ephemeral persistent * ======================================= * active 0 cookie# * passive 0% cookie# * client sys cookie 0% * server 0% sys cookie * broadcast 0 0 * * # if unsync, 0 * % can't happen */ if (hismode == MODE_BROADCAST) { /* * For broadcaster, use the interface * broadcast address when available; * otherwise, use the unicast address * found when the association was * mobilized. However, if this is from * the wildcard interface, game over. */ if (crypto_flags && rbufp->dstadr == any_interface) { sys_restricted++; return; /* no wildcard */ } pkeyid = 0; if (!SOCKNUL(&rbufp->dstadr->bcast)) dstadr_sin = &rbufp->dstadr->bcast; } else if (peer == NULL) { pkeyid = session_key( &rbufp->recv_srcadr, dstadr_sin, 0, sys_private, 0); } else { pkeyid = peer->pcookie; } /* * The session key includes both the public * values and cookie. In case of an extension * field, the cookie used for authentication * purposes is zero. Note the hash is saved for * use later in the autokey mambo. */ if (authlen > LEN_PKT_NOMAC && pkeyid != 0) { session_key(&rbufp->recv_srcadr, dstadr_sin, skeyid, 0, 2); tkeyid = session_key( &rbufp->recv_srcadr, dstadr_sin, skeyid, pkeyid, 0); } else { tkeyid = session_key( &rbufp->recv_srcadr, dstadr_sin, skeyid, pkeyid, 2); } } #endif /* OPENSSL */ /* * Compute the cryptosum. Note a clogging attack may * succeed in bloating the key cache. If an autokey, * purge it immediately, since we won't be needing it * again. If the packet is authentic, it can mobilize an * association. Note that there is no key zero. */ if (!authdecrypt(skeyid, (u_int32 *)pkt, authlen, has_mac)) { is_authentic = AUTH_ERROR; sys_badauth++; return; } else { is_authentic = AUTH_OK; } #ifdef OPENSSL if (skeyid > NTP_MAXKEY) authtrust(skeyid, 0); #endif /* OPENSSL */ #ifdef DEBUG if (debug) printf( "receive: at %ld %s<-%s mode %d code %d keyid %08x len %d mac %d auth %d\n", current_time, stoa(dstadr_sin), stoa(&rbufp->recv_srcadr), hismode, retcode, skeyid, authlen, has_mac, is_authentic); #endif } /* * The association matching rules are implemented by a set of * routines and an association table. A packet matching an * association is processed by the peer process for that * association. If there are no errors, an ephemeral association * is mobilized: a broadcast packet mobilizes a broadcast client * aassociation; a manycast server packet mobilizes a manycast * client association; a symmetric active packet mobilizes a * symmetric passive association. */ switch (retcode) { /* * This is a client mode packet not matching any association. If * an ordinary client, simply toss a server mode packet back * over the fence. If a manycast client, we have to work a * little harder. */ case AM_FXMIT: /* * The vanilla case is when this is not a multicast * interface. If authentication succeeds, return a * server mode packet; if not and the key ID is nonzero, * return a crypto-NAK. */ if (!(rbufp->dstadr->flags & INT_MCASTOPEN)) { if (AUTH(restrict_mask & RES_DONTTRUST, is_authentic)) fast_xmit(rbufp, MODE_SERVER, skeyid, restrict_mask); else if (is_authentic == AUTH_ERROR) fast_xmit(rbufp, MODE_SERVER, 0, restrict_mask); return; /* hooray */ } /* * This must be manycast. Do not respond if not * configured as a manycast server. */ if (!sys_manycastserver) { sys_restricted++; return; /* not enabled */ } /* * Do not respond if unsynchronized or stratum is below * the floor or at or above the ceiling. */ if (sys_leap == LEAP_NOTINSYNC || sys_stratum < sys_floor || sys_stratum >= sys_ceiling) return; /* bad stratum */ /* * Do not respond if our stratum is greater than the * manycaster or it has already synchronized to us. */ if (sys_peer == NULL || hisstratum < sys_stratum || (sys_cohort && hisstratum == sys_stratum) || rbufp->dstadr->addr_refid == pkt->refid) return; /* no help */ /* * Respond only if authentication succeeds. Don't do a * crypto-NAK, as that would not be useful. */ if (AUTH(restrict_mask & RES_DONTTRUST, is_authentic)) fast_xmit(rbufp, MODE_SERVER, skeyid, restrict_mask); return; /* hooray */ /* * This is a server mode packet returned in response to a client * mode packet sent to a multicast group address. The origin * timestamp is a good nonce to reliably associate the reply * with what was sent. If there is no match, that's curious and * could be an intruder attempting to clog, so we just ignore * it. * * If the packet is authentic and the manycast association is * found, we mobilize a client association and copy pertinent * variables from the manycast association to the new client * association. If not, just ignore the packet. * * There is an implosion hazard at the manycast client, since * the manycast servers send the server packet immediately. If * the guy is already here, don't fire up a duplicate. */ case AM_MANYCAST: if (!AUTH(sys_authenticate | (restrict_mask & (RES_NOPEER | RES_DONTTRUST)), is_authentic)) return; /* bad auth */ if ((peer2 = findmanycastpeer(rbufp)) == NULL) { sys_restricted++; return; /* not enabled */ } if ((peer = newpeer(&rbufp->recv_srcadr, rbufp->dstadr, MODE_CLIENT, hisversion, NTP_MINDPOLL, NTP_MAXDPOLL, FLAG_IBURST | FLAG_PREEMPT, MDF_UCAST | MDF_ACLNT, 0, skeyid)) == NULL) return; /* system error */ /* * We don't need these, but it warms the billboards. */ peer->ttl = peer2->ttl; break; /* * This is the first packet received from a broadcast server. If * the packet is authentic and we are enabled as broadcast * client, mobilize a broadcast client association. We don't * kiss any frogs here. */ case AM_NEWBCL: if (!AUTH(sys_authenticate | (restrict_mask & (RES_NOPEER | RES_DONTTRUST)), is_authentic)) return; /* bad auth */ /* * Do not respond if unsynchronized or stratum is below * the floor or at or above the ceiling. */ if (hisleap == LEAP_NOTINSYNC || hisstratum < sys_floor || hisstratum >= sys_ceiling) return; /* bad stratum */ switch (sys_bclient) { /* * If not enabled, just skedaddle. */ case 0: sys_restricted++; return; /* not enabled */ /* * Execute the initial volley in order to calibrate the * propagation delay and run the Autokey protocol, if * enabled. */ case 1: if ((peer = newpeer(&rbufp->recv_srcadr, rbufp->dstadr, MODE_CLIENT, hisversion, NTP_MINDPOLL, NTP_MAXDPOLL, FLAG_MCAST | FLAG_IBURST, MDF_BCLNT, 0, skeyid)) == NULL) return; /* system error */ #ifdef OPENSSL if (skeyid > NTP_MAXKEY) crypto_recv(peer, rbufp); #endif /* OPENSSL */ return; /* hooray */ /* * Do not execute the initial volley. */ case 2: #ifdef OPENSSL /* * If a two-way exchange is not possible, * neither is Autokey. */ if (skeyid > NTP_MAXKEY) { msyslog(LOG_INFO, "receive: autokey requires two-way communication"); return; /* no autokey */ } #endif /* OPENSSL */ if ((peer = newpeer(&rbufp->recv_srcadr, rbufp->dstadr, MODE_BCLIENT, hisversion, NTP_MINDPOLL, NTP_MAXDPOLL, 0, MDF_BCLNT, 0, skeyid)) == NULL) return; /* system error */ } break; /* * This is the first packet received from a symmetric active * peer. If the packet is authentic and the first he sent, * mobilize a passive association. If not, kiss the frog. */ case AM_NEWPASS: /* * If the inbound packet is correctly authenticated and * enabled, a symmetric passive association is * mobilized. If not but correctly authenticated, a * symmetric active response is sent. If authentication * fails, send a crypto-NAK packet. */ if (!AUTH(restrict_mask & RES_DONTTRUST, is_authentic)) { if (is_authentic == AUTH_ERROR) fast_xmit(rbufp, MODE_ACTIVE, 0, restrict_mask); return; /* bad auth */ } if (!AUTH(sys_authenticate | (restrict_mask & RES_NOPEER), is_authentic)) { fast_xmit(rbufp, MODE_ACTIVE, skeyid, restrict_mask); return; /* hooray */ } /* * Do not respond if stratum is below the floor. */ if (hisstratum < sys_floor) return; /* bad stratum */ if ((peer = newpeer(&rbufp->recv_srcadr, rbufp->dstadr, MODE_PASSIVE, hisversion, NTP_MINDPOLL, NTP_MAXDPOLL, 0, MDF_UCAST, 0, skeyid)) == NULL) return; /* system error */ break; /* * Process regular packet. Nothing special. */ case AM_PROCPKT: break; /* * A passive packet matches a passive association. This is * usually the result of reconfiguring a client on the fly. As * this association might be legitamate and this packet an * attempt to deny service, just ignore it. */ case AM_ERR: return; /* * For everything else there is the bit bucket. */ default: return; } peer->flash &= ~PKT_TEST_MASK; /* * Next comes a rigorous schedule of timestamp checking. If the * transmit timestamp is zero, the server is horribly broken. */ if (L_ISZERO(&p_xmt)) { return; /* read rfc1305 */ /* * If the transmit timestamp duplicates a previous one, the * packet is a replay. This prevents the bad guys from replaying * the most recent packet, authenticated or not. */ } else if (L_ISEQU(&peer->org, &p_xmt)) { peer->flash |= TEST1; peer->oldpkt++; return; /* duplicate packet */ /* * If this is a broadcast mode packet, skip further checking. */ } else if (hismode != MODE_BROADCAST) { if (L_ISZERO(&p_org)) peer->flash |= TEST3; /* protocol unsynch */ else if (!L_ISEQU(&p_org, &peer->xmt)) peer->flash |= TEST2; /* bogus packet */ } /* - * Update the origin and destination timestamps. If - * unsynchronized or bogus abandon ship. If the crypto machine + * If unsynchronized or bogus abandon ship. If the crypto machine * breaks, light the crypto bit and plaint the log. */ - peer->org = p_xmt; - peer->rec = rbufp->recv_time; if (peer->flash & PKT_TEST_MASK) { #ifdef OPENSSL if (crypto_flags && (peer->flags & FLAG_SKEY)) { rval = crypto_recv(peer, rbufp); if (rval != XEVNT_OK) { peer_clear(peer, "CRYP"); peer->flash |= TEST9; /* crypto error */ } } #endif /* OPENSSL */ return; /* unsynch */ } /* * The timestamps are valid and the receive packet matches the * last one sent. If the packet is a crypto-NAK, the server * might have just changed keys. We reset the association * and restart the protocol. */ if (is_authentic == AUTH_CRYPTO) { peer_clear(peer, "AUTH"); return; /* crypto-NAK */ /* * If the association is authenticated, the key ID is nonzero * and received packets must be authenticated. This is designed * to avoid a bait-and-switch attack, which was possible in past * versions. If symmetric modes, return a crypto-NAK. The peer * should restart the protocol. */ - } else if (!AUTH(peer->keyid || (restrict_mask & RES_DONTTRUST), - is_authentic)) { + } else if (!AUTH(peer->keyid || has_mac || + (restrict_mask & RES_DONTTRUST), is_authentic)) { peer->flash |= TEST5; - if (hismode == MODE_ACTIVE || hismode == MODE_PASSIVE) + if (has_mac && + (hismode == MODE_ACTIVE || hismode == MODE_PASSIVE)) fast_xmit(rbufp, MODE_ACTIVE, 0, restrict_mask); return; /* bad auth */ } /* * That was hard and I am sweaty, but the packet is squeaky * clean. Get on with real work. + * + * Update the origin and destination timestamps. */ + peer->org = p_xmt; + peer->rec = rbufp->recv_time; + peer->received++; peer->timereceived = current_time; if (is_authentic == AUTH_OK) peer->flags |= FLAG_AUTHENTIC; else peer->flags &= ~FLAG_AUTHENTIC; #ifdef OPENSSL /* * More autokey dance. The rules of the cha-cha are as follows: * * 1. If there is no key or the key is not auto, do nothing. * * 2. If this packet is in response to the one just previously * sent or from a broadcast server, do the extension fields. * Otherwise, assume bogosity and bail out. * * 3. If an extension field contains a verified signature, it is * self-authenticated and we sit the dance. * * 4. If this is a server reply, check only to see that the * transmitted key ID matches the received key ID. * * 5. Check to see that one or more hashes of the current key ID * matches the previous key ID or ultimate original key ID * obtained from the broadcaster or symmetric peer. If no * match, sit the dance and wait for timeout. * * In case of crypto error, fire the orchestra and stop dancing. * This is considered a permanant error, so light the crypto bit * to suppress further requests. If preemptable or ephemeral, * scuttle the ship. */ if (crypto_flags && (peer->flags & FLAG_SKEY)) { peer->flash |= TEST8; rval = crypto_recv(peer, rbufp); if (rval != XEVNT_OK) { peer_clear(peer, "CRYP"); peer->flash |= TEST9; /* crypto error */ if (peer->flags & FLAG_PREEMPT || !(peer->flags & FLAG_CONFIG)) unpeer(peer); return; } else if (hismode == MODE_SERVER) { if (skeyid == peer->keyid) peer->flash &= ~TEST8; } else if (!(peer->flash & TEST8)) { peer->pkeyid = skeyid; } else if ((ap = (struct autokey *)peer->recval.ptr) != NULL) { int i; for (i = 0; ; i++) { if (tkeyid == peer->pkeyid || tkeyid == ap->key) { peer->flash &= ~TEST8; peer->pkeyid = skeyid; break; } if (i > ap->seq) break; tkeyid = session_key( &rbufp->recv_srcadr, dstadr_sin, tkeyid, pkeyid, 0); } } if (!(peer->crypto & CRYPTO_FLAG_PROV)) /* test 9 */ peer->flash |= TEST8; /* not proventic */ /* * If the transmit queue is nonempty, clamp the host * poll interval to the packet poll interval. */ if (peer->cmmd != 0) { peer->ppoll = pkt->ppoll; poll_update(peer, peer->hpoll); } } #endif /* OPENSSL */ /* * The dance is complete and the flash bits have been lit. Toss * the packet over the fence for processing, which may light up * more flashers. */ process_packet(peer, pkt); /* * Well, that was nice. If TEST4 is lit, either the crypto * machine jammed or a kiss-o'-death packet flew in, either of * which is fatal. */ if (peer->flash & TEST4) { msyslog(LOG_INFO, "receive: fatal error %04x for %s", peer->flash, stoa(&peer->srcadr)); return; } } /* * process_packet - Packet Procedure, a la Section 3.4.4 of the * specification. Or almost, at least. If we're in here we have a * reasonable expectation that we will be having a long term * relationship with this host. */ void process_packet( register struct peer *peer, register struct pkt *pkt ) { double t34, t21; double p_offset, p_del, p_disp; l_fp p_rec, p_xmt, p_org, p_reftime; l_fp ci; u_char pmode, pleap, pstratum; sys_processed++; peer->processed++; p_del = FPTOD(NTOHS_FP(pkt->rootdelay)); p_disp = FPTOD(NTOHS_FP(pkt->rootdispersion)); NTOHL_FP(&pkt->reftime, &p_reftime); NTOHL_FP(&pkt->rec, &p_rec); NTOHL_FP(&pkt->xmt, &p_xmt); pmode = PKT_MODE(pkt->li_vn_mode); pleap = PKT_LEAP(pkt->li_vn_mode); if (pmode != MODE_BROADCAST) NTOHL_FP(&pkt->org, &p_org); else p_org = peer->rec; pstratum = PKT_TO_STRATUM(pkt->stratum); /* * Test for kiss-o'death packet) */ if (pleap == LEAP_NOTINSYNC && pstratum == STRATUM_UNSPEC) { if (memcmp(&pkt->refid, "DENY", 4) == 0) { peer_clear(peer, "DENY"); peer->flash |= TEST4; /* access denied */ } } /* * Capture the header values. */ record_raw_stats(&peer->srcadr, peer->dstadr ? &peer->dstadr->sin : NULL, &p_org, &p_rec, &p_xmt, &peer->rec); peer->leap = pleap; peer->stratum = min(pstratum, STRATUM_UNSPEC); peer->pmode = pmode; peer->ppoll = pkt->ppoll; peer->precision = pkt->precision; peer->rootdelay = p_del; peer->rootdispersion = p_disp; peer->refid = pkt->refid; /* network byte order */ peer->reftime = p_reftime; /* * Verify the server is synchronized; that is, the leap bits and * stratum are valid, the root delay and root dispersion are * valid and the reference timestamp is not later than the * transmit timestamp. */ if (pleap == LEAP_NOTINSYNC || /* test 6 */ pstratum < sys_floor || pstratum >= sys_ceiling) peer->flash |= TEST6; /* peer not synch */ if (p_del < 0 || p_disp < 0 || p_del / /* test 7 */ 2 + p_disp >= MAXDISPERSE || !L_ISHIS(&p_xmt, &p_reftime)) peer->flash |= TEST7; /* bad header */ /* * If any tests fail at this point, the packet is discarded. * Note that some flashers may have already been set in the * receive() routine. */ if (peer->flash & PKT_TEST_MASK) { #ifdef DEBUG if (debug) printf("packet: flash header %04x\n", peer->flash); #endif return; } if (!(peer->reach)) { report_event(EVNT_REACH, peer); peer->timereachable = current_time; } poll_update(peer, peer->hpoll); peer->reach |= 1; /* * For a client/server association, calculate the clock offset, * roundtrip delay and dispersion. The equations are reordered * from the spec for more efficient use of temporaries. For a * broadcast association, offset the last measurement by the * computed delay during the client/server volley. Note that * org has been set to the time of last reception. Note the * computation of dispersion includes the system precision plus * that due to the frequency error since the origin time. * * It is very important to respect the hazards of overflow. The * only permitted operation on raw timestamps is subtraction, * where the result is a signed quantity spanning from 68 years * in the past to 68 years in the future. To avoid loss of * precision, these calculations are done using 64-bit integer * arithmetic. However, the offset and delay calculations are * sums and differences of these first-order differences, which * if done using 64-bit integer arithmetic, would be valid over * only half that span. Since the typical first-order * differences are usually very small, they are converted to 64- * bit doubles and all remaining calculations done in floating- * point arithmetic. This preserves the accuracy while retaining * the 68-year span. * * Let t1 = p_org, t2 = p_rec, t3 = p_xmt, t4 = peer->rec: */ ci = p_xmt; /* t3 - t4 */ L_SUB(&ci, &peer->rec); LFPTOD(&ci, t34); ci = p_rec; /* t2 - t1 */ L_SUB(&ci, &p_org); LFPTOD(&ci, t21); ci = peer->rec; /* t4 - t1 */ L_SUB(&ci, &p_org); /* * If running in a broadcast association, the clock offset is * (t1 - t0) corrected by the one-way delay, but we can't * measure that directly. Therefore, we start up in MODE_CLIENT * mode, set FLAG_MCAST and exchange eight messages to determine * the clock offset. When the last message is sent, we switch to * MODE_BCLIENT mode. The next broadcast message after that * computes the broadcast offset and clears FLAG_MCAST. */ if (pmode == MODE_BROADCAST) { p_offset = t34; if (peer->flags & FLAG_MCAST) { peer->estbdelay = peer->offset - p_offset; if (peer->hmode == MODE_CLIENT) return; peer->flags &= ~(FLAG_MCAST | FLAG_BURST); } p_offset += peer->estbdelay; p_del = peer->delay; p_disp = 0; } else { p_offset = (t21 + t34) / 2.; p_del = t21 - t34; LFPTOD(&ci, p_disp); p_disp = LOGTOD(sys_precision) + LOGTOD(peer->precision) + clock_phi * p_disp; } p_del = max(p_del, LOGTOD(sys_precision)); clock_filter(peer, p_offset, p_del, p_disp); record_peer_stats(&peer->srcadr, ctlpeerstatus(peer), peer->offset, peer->delay, peer->disp, peer->jitter); } /* * clock_update - Called at system process update intervals. */ static void clock_update(void) { u_char oleap; u_char ostratum; double dtemp; /* * There must be a system peer at this point. If we just changed * the system peer, but have a newer sample from the old one, * wait until newer data are available. */ if (sys_poll < sys_peer->minpoll) sys_poll = sys_peer->minpoll; if (sys_poll > sys_peer->maxpoll) sys_poll = sys_peer->maxpoll; poll_update(sys_peer, sys_poll); if (sys_peer->epoch <= sys_clocktime) return; #ifdef DEBUG if (debug) printf("clock_update: at %ld assoc %d \n", current_time, peer_associations); #endif oleap = sys_leap; ostratum = sys_stratum; switch (local_clock(sys_peer, sys_offset)) { /* * Clock exceeds panic threshold. Life as we know it ends. */ case -1: report_event(EVNT_SYSFAULT, NULL); exit (-1); /* not reached */ /* * Clock was stepped. Flush all time values of all peers. */ case 2: clear_all(); sys_leap = LEAP_NOTINSYNC; sys_stratum = STRATUM_UNSPEC; sys_peer = NULL; sys_rootdelay = 0; sys_rootdispersion = 0; memcpy(&sys_refid, "STEP", 4); report_event(EVNT_CLOCKRESET, NULL); break; /* * Clock was slewed. Update the system stratum, leap bits, root * delay, root dispersion, reference ID and reference time. If * the leap changes, we gotta reroll the keys. Except for * reference clocks, the minimum dispersion increment is not * less than sys_mindisp. */ case 1: sys_leap = leap_next; sys_stratum = min(sys_peer->stratum + 1, STRATUM_UNSPEC); sys_reftime = sys_peer->rec; /* * In orphan mode the stratum defaults to the orphan * stratum. The root delay is set to a random value * generated at startup. The root dispersion is set from * the peer dispersion; the peer root dispersion is * ignored. */ dtemp = sys_peer->disp + clock_phi * (current_time - sys_peer->update) + sys_jitter + fabs(sys_peer->offset); #ifdef REFCLOCK if (!(sys_peer->flags & FLAG_REFCLOCK) && dtemp < sys_mindisp) dtemp = sys_mindisp; #else if (dtemp < sys_mindisp) dtemp = sys_mindisp; #endif /* REFCLOCK */ if (sys_stratum >= sys_orphan) { sys_stratum = sys_orphan; sys_rootdelay = sys_peer->delay; sys_rootdispersion = dtemp; } else { sys_rootdelay = sys_peer->delay + sys_peer->rootdelay; sys_rootdispersion = dtemp + sys_peer->rootdispersion; } if (oleap == LEAP_NOTINSYNC) { report_event(EVNT_SYNCCHG, NULL); #ifdef OPENSSL expire_all(); crypto_update(); #endif /* OPENSSL */ } break; /* * Popcorn spike or step threshold exceeded. Pretend it never * happened. */ default: break; } if (ostratum != sys_stratum) report_event(EVNT_PEERSTCHG, NULL); } /* * poll_update - update peer poll interval */ void poll_update( struct peer *peer, int mpoll ) { int hpoll; /* * This routine figures out when the next poll should be sent. * That turns out to be wickedly complicated. The big problem is * that sometimes the time for the next poll is in the past. * Watch out for races here between the receive process and the * poll process. The key assertion is that, if nextdate equals * current_time, the call is from the poll process; otherwise, * it is from the receive process. * * First, bracket the poll interval according to the type of * association and options. If a fixed interval is configured, * use minpoll. This primarily is for reference clocks, but * works for any association. */ if (peer->flags & FLAG_FIXPOLL) { hpoll = peer->minpoll; /* * The ordinary case; clamp the poll interval between minpoll * and maxpoll. */ } else { hpoll = max(min(peer->maxpoll, mpoll), peer->minpoll); } #ifdef OPENSSL /* * Bit of crass arrogance at this point. If the poll interval * has changed and we have a keylist, the lifetimes in the * keylist are probably bogus. In this case purge the keylist * and regenerate it later. */ if (hpoll != peer->hpoll) key_expire(peer); #endif /* OPENSSL */ peer->hpoll = hpoll; /* * Now we figure out if there is an override. If during the * crypto protocol and a message is pending, make it wait not * more than two seconds. */ #ifdef OPENSSL if (peer->cmmd != NULL && (sys_leap != LEAP_NOTINSYNC || peer->crypto)) { peer->nextdate = current_time + RESP_DELAY; /* * If we get called from the receive routine while a burst is * pending, just slink away. If from the poll routine and a * reference clock or a pending crypto response, delay for one * second. If this is the first sent in a burst, wait for the * modem to come up. For others in the burst, delay two seconds. */ } else if (peer->burst > 0) { #else /* OPENSSL */ if (peer->burst > 0) { #endif /* OPENSSL */ if (peer->nextdate != current_time) return; #ifdef REFCLOCK else if (peer->flags & FLAG_REFCLOCK) peer->nextdate += RESP_DELAY; #endif /* REFCLOCK */ else if (peer->flags & (FLAG_IBURST | FLAG_BURST) && peer->burst == NTP_BURST) peer->nextdate += sys_calldelay; else peer->nextdate += BURST_DELAY; /* * The ordinary case; use the minimum of the host and peer * intervals, but not less than minpoll. In other words, * oversampling is okay but understampling is evil. */ } else { peer->nextdate = peer->outdate + RANDPOLL(max(min(peer->ppoll, hpoll), peer->minpoll)); } /* * If the time for the next poll has already happened, bring it * up to the next second after this one. This way the only way * to get nexdate == current time is from the poll routine. */ if (peer->nextdate <= current_time) peer->nextdate = current_time + 1; #ifdef DEBUG if (debug > 1) printf("poll_update: at %lu %s flags %04x poll %d burst %d last %lu next %lu\n", current_time, ntoa(&peer->srcadr), peer->flags, peer->hpoll, peer->burst, peer->outdate, peer->nextdate); #endif } /* * peer_crypto_clear - discard crypto information */ void peer_crypto_clear( struct peer *peer ) { /* * If cryptographic credentials have been acquired, toss them to * Valhalla. Note that autokeys are ephemeral, in that they are * tossed immediately upon use. Therefore, the keylist can be * purged anytime without needing to preserve random keys. Note * that, if the peer is purged, the cryptographic variables are * purged, too. This makes it much harder to sneak in some * unauthenticated data in the clock filter. */ DPRINTF(1, ("peer_crypto_clear: at %ld next %ld assoc ID %d\n", current_time, peer->nextdate, peer->associd)); #ifdef OPENSSL peer->assoc = 0; peer->crypto = 0; if (peer->pkey != NULL) EVP_PKEY_free(peer->pkey); peer->pkey = NULL; peer->digest = NULL; /* XXX MEMLEAK? check whether this needs to be freed in any way - never was freed */ if (peer->subject != NULL) free(peer->subject); peer->subject = NULL; if (peer->issuer != NULL) free(peer->issuer); peer->issuer = NULL; peer->pkeyid = 0; peer->pcookie = 0; if (peer->ident_pkey != NULL) EVP_PKEY_free(peer->ident_pkey); peer->ident_pkey = NULL; memset(&peer->fstamp, 0, sizeof(peer->fstamp)); if (peer->iffval != NULL) BN_free(peer->iffval); peer->iffval = NULL; if (peer->grpkey != NULL) BN_free(peer->grpkey); peer->grpkey = NULL; value_free(&peer->cookval); value_free(&peer->recval); if (peer->cmmd != NULL) { free(peer->cmmd); peer->cmmd = NULL; } key_expire(peer); value_free(&peer->encrypt); #endif /* OPENSSL */ } /* * peer_clear - clear peer filter registers. See Section 3.4.8 of the spec. */ void peer_clear( struct peer *peer, /* peer structure */ char *ident /* tally lights */ ) { int i; peer_crypto_clear(peer); if (peer == sys_peer) sys_peer = NULL; /* * Wipe the association clean and initialize the nonzero values. */ memset(CLEAR_TO_ZERO(peer), 0, LEN_CLEAR_TO_ZERO); peer->estbdelay = sys_bdelay; peer->ppoll = peer->maxpoll; peer->hpoll = peer->minpoll; peer->disp = MAXDISPERSE; peer->jitter = LOGTOD(sys_precision); for (i = 0; i < NTP_SHIFT; i++) { peer->filter_order[i] = i; peer->filter_disp[i] = MAXDISPERSE; } #ifdef REFCLOCK if (!(peer->flags & FLAG_REFCLOCK)) { peer->leap = LEAP_NOTINSYNC; peer->stratum = STRATUM_UNSPEC; memcpy(&peer->refid, ident, 4); } #else peer->leap = LEAP_NOTINSYNC; peer->stratum = STRATUM_UNSPEC; memcpy(&peer->refid, ident, 4); #endif /* REFCLOCK */ /* * During initialization use the association count to spread out * the polls at one-second intervals. Othersie, randomize over * the minimum poll interval in order to avoid broadcast * implosion. */ peer->nextdate = peer->update = peer->outdate = current_time; if (initializing) peer->nextdate += peer_associations; else if (peer->hmode == MODE_PASSIVE) peer->nextdate += RESP_DELAY; else peer->nextdate += (ntp_random() & ((1 << NTP_MINDPOLL) - 1)); DPRINTF(1, ("peer_clear: at %ld next %ld assoc ID %d refid %s\n", current_time, peer->nextdate, peer->associd, ident)); } /* * clock_filter - add incoming clock sample to filter register and run * the filter procedure to find the best sample. */ void clock_filter( struct peer *peer, /* peer structure pointer */ double sample_offset, /* clock offset */ double sample_delay, /* roundtrip delay */ double sample_disp /* dispersion */ ) { double dst[NTP_SHIFT]; /* distance vector */ int ord[NTP_SHIFT]; /* index vector */ int i, j, k, m; double dtemp, etemp; /* * Shift the new sample into the register and discard the oldest * one. The new offset and delay come directly from the * timestamp calculations. The dispersion grows from the last * outbound packet or reference clock update to the present time * and increased by the sum of the peer precision and the system * precision. The delay can sometimes swing negative due to * frequency skew, so it is clamped non-negative. */ j = peer->filter_nextpt; peer->filter_offset[j] = sample_offset; peer->filter_delay[j] = max(0, sample_delay); peer->filter_disp[j] = sample_disp; peer->filter_epoch[j] = current_time; j = (j + 1) % NTP_SHIFT; peer->filter_nextpt = j; /* * Update dispersions since the last update and at the same * time initialize the distance and index lists. The distance * list uses a compound metric. If the sample is valid and * younger than the minimum Allan intercept, use delay; * otherwise, use biased dispersion. */ dtemp = clock_phi * (current_time - peer->update); peer->update = current_time; for (i = NTP_SHIFT - 1; i >= 0; i--) { if (i != 0) peer->filter_disp[j] += dtemp; if (peer->filter_disp[j] >= MAXDISPERSE) peer->filter_disp[j] = MAXDISPERSE; if (peer->filter_disp[j] >= MAXDISPERSE) dst[i] = MAXDISPERSE; else if (peer->update - peer->filter_epoch[j] > allan_xpt) dst[i] = sys_maxdist + peer->filter_disp[j]; else dst[i] = peer->filter_delay[j]; ord[i] = j; j++; j %= NTP_SHIFT; } /* * If the clock discipline has stabilized, sort the samples in * both lists by distance. Note, we do not displace a higher * distance sample by a lower distance one unless lower by at * least the precision. */ if (state == 4) { for (i = 1; i < NTP_SHIFT; i++) { for (j = 0; j < i; j++) { if (dst[j] > dst[i] + LOGTOD(sys_precision)) { k = ord[j]; ord[j] = ord[i]; ord[i] = k; etemp = dst[j]; dst[j] = dst[i]; dst[i] = etemp; } } } } /* * Copy the index list to the association structure so ntpq * can see it later. Prune the distance list to samples less * than max distance, but keep at least two valid samples for * jitter calculation. */ m = 0; for (i = 0; i < NTP_SHIFT; i++) { peer->filter_order[i] = (u_char) ord[i]; if (dst[i] >= MAXDISPERSE || (m >= 2 && dst[i] >= sys_maxdist)) continue; m++; } /* * Compute the dispersion and jitter. The dispersion is weighted * exponentially by NTP_FWEIGHT (0.5) so it is normalized close * to 1.0. The jitter is the RMS differences relative to the * lowest delay sample. If no acceptable samples remain in the * shift register, quietly tiptoe home leaving only the * dispersion. */ peer->disp = peer->jitter = 0; k = ord[0]; for (i = NTP_SHIFT - 1; i >= 0; i--) { j = ord[i]; peer->disp = NTP_FWEIGHT * (peer->disp + peer->filter_disp[j]); if (i < m) peer->jitter += DIFF(peer->filter_offset[j], peer->filter_offset[k]); } /* * If no acceptable samples remain in the shift register, * quietly tiptoe home leaving only the dispersion. Otherwise, * save the offset, delay and jitter. Note the jitter must not * be less than the precision. */ if (m == 0) return; etemp = fabs(peer->offset - peer->filter_offset[k]); peer->offset = peer->filter_offset[k]; peer->delay = peer->filter_delay[k]; if (m > 1) peer->jitter /= m - 1; peer->jitter = max(SQRT(peer->jitter), LOGTOD(sys_precision)); /* * A new sample is useful only if it is younger than the last * one used. Note the order is FIFO if the clock discipline has * not stabilized. */ if (peer->filter_epoch[k] <= peer->epoch) { #ifdef DEBUG if (debug) printf("clock_filter: discard %lu\n", peer->epoch - peer->filter_epoch[k]); #endif return; } /* * If the difference between the last offset and the current one * exceeds the jitter by CLOCK_SGATE and the interval since the * last update is less than twice the system poll interval, * consider the update a popcorn spike and ignore it. */ if (etemp > CLOCK_SGATE * peer->jitter && m > 1 && peer->filter_epoch[k] - peer->epoch < 2. * ULOGTOD(sys_poll)) { #ifdef DEBUG if (debug) printf("clock_filter: popcorn %.6f %.6f\n", etemp, dtemp); #endif return; } /* * The mitigated sample statistics are saved for later * processing. If not in a burst, tickle the select. */ peer->epoch = peer->filter_epoch[k]; #ifdef DEBUG if (debug) printf( "clock_filter: n %d off %.6f del %.6f dsp %.6f jit %.6f, age %lu\n", m, peer->offset, peer->delay, peer->disp, peer->jitter, current_time - peer->epoch); #endif if (peer->burst == 0 || sys_leap == LEAP_NOTINSYNC) clock_select(); } /* * clock_select - find the pick-of-the-litter clock * * LOCKCLOCK: If the local clock is the prefer peer, it will always be * enabled, even if declared falseticker, (2) only the prefer peer can * be selected as the system peer, (3) if the external source is down, * the system leap bits are set to 11 and the stratum set to infinity. */ void clock_select(void) { struct peer *peer; int i, j, k, n; int nlist, nl3; int allow, osurv; double d, e, f, g; double high, low; double synch[NTP_MAXASSOC], error[NTP_MAXASSOC]; struct peer *osys_peer; struct peer *typeacts = NULL; struct peer *typelocal = NULL; struct peer *typesystem = NULL; static int list_alloc = 0; static struct endpoint *endpoint = NULL; static int *indx = NULL; static struct peer **peer_list = NULL; static u_int endpoint_size = 0; static u_int indx_size = 0; static u_int peer_list_size = 0; /* * Initialize and create endpoint, index and peer lists big * enough to handle all associations. */ osys_peer = sys_peer; sys_peer = NULL; sys_pps = NULL; sys_prefer = NULL; osurv = sys_survivors; sys_survivors = 0; #ifdef LOCKCLOCK sys_leap = LEAP_NOTINSYNC; sys_stratum = STRATUM_UNSPEC; memcpy(&sys_refid, "DOWN", 4); #endif /* LOCKCLOCK */ nlist = 0; for (n = 0; n < NTP_HASH_SIZE; n++) nlist += peer_hash_count[n]; if (nlist > list_alloc) { if (list_alloc > 0) { free(endpoint); free(indx); free(peer_list); } while (list_alloc < nlist) { list_alloc += 5; endpoint_size += 5 * 3 * sizeof(*endpoint); indx_size += 5 * 3 * sizeof(*indx); peer_list_size += 5 * sizeof(*peer_list); } endpoint = (struct endpoint *)emalloc(endpoint_size); indx = (int *)emalloc(indx_size); peer_list = (struct peer **)emalloc(peer_list_size); } /* * Initially, we populate the island with all the rifraff peers * that happen to be lying around. Those with seriously * defective clocks are immediately booted off the island. Then, * the falsetickers are culled and put to sea. The truechimers * remaining are subject to repeated rounds where the most * unpopular at each round is kicked off. When the population * has dwindled to sys_minclock, the survivors split a million * bucks and collectively crank the chimes. */ nlist = nl3 = 0; /* none yet */ for (n = 0; n < NTP_HASH_SIZE; n++) { for (peer = peer_hash[n]; peer != NULL; peer = peer->next) { peer->flags &= ~FLAG_SYSPEER; peer->status = CTL_PST_SEL_REJECT; /* * Leave the island immediately if the peer is * unfit to synchronize. */ if (peer_unfit(peer)) continue; /* * Don't allow the local clock or modem drivers * in the kitchen at this point, unless the * prefer peer. Do that later, but only if * nobody else is around. These guys are all * configured, so we never throw them away. */ #ifdef REFCLOCK if (peer->refclktype == REFCLK_LOCALCLOCK #if defined(VMS) && defined(VMS_LOCALUNIT) /* wjm: VMS_LOCALUNIT taken seriously */ && REFCLOCKUNIT(&peer->srcadr) != VMS_LOCALUNIT #endif /* VMS && VMS_LOCALUNIT */ ) { typelocal = peer; #ifndef LOCKCLOCK if (!(peer->flags & FLAG_PREFER)) continue; /* no local clock */ #endif /* LOCKCLOCK */ } if (peer->sstclktype == CTL_SST_TS_TELEPHONE) { typeacts = peer; if (!(peer->flags & FLAG_PREFER)) continue; /* no acts */ } #endif /* REFCLOCK */ /* * If we get this far, the peer can stay on the * island, but does not yet have the immunity * idol. */ peer->status = CTL_PST_SEL_SANE; peer_list[nlist++] = peer; /* * Insert each interval endpoint on the sorted * list. */ e = peer->offset; /* Upper end */ f = root_distance(peer); e = e + f; for (i = nl3 - 1; i >= 0; i--) { if (e >= endpoint[indx[i]].val) break; indx[i + 3] = indx[i]; } indx[i + 3] = nl3; endpoint[nl3].type = 1; endpoint[nl3++].val = e; e = e - f; /* Center point */ for (; i >= 0; i--) { if (e >= endpoint[indx[i]].val) break; indx[i + 2] = indx[i]; } indx[i + 2] = nl3; endpoint[nl3].type = 0; endpoint[nl3++].val = e; e = e - f; /* Lower end */ for (; i >= 0; i--) { if (e >= endpoint[indx[i]].val) break; indx[i + 1] = indx[i]; } indx[i + 1] = nl3; endpoint[nl3].type = -1; endpoint[nl3++].val = e; } } #ifdef DEBUG if (debug > 2) for (i = 0; i < nl3; i++) printf("select: endpoint %2d %.6f\n", endpoint[indx[i]].type, endpoint[indx[i]].val); #endif /* * This is the actual algorithm that cleaves the truechimers * from the falsetickers. The original algorithm was described * in Keith Marzullo's dissertation, but has been modified for * better accuracy. * * Briefly put, we first assume there are no falsetickers, then * scan the candidate list first from the low end upwards and * then from the high end downwards. The scans stop when the * number of intersections equals the number of candidates less * the number of falsetickers. If this doesn't happen for a * given number of falsetickers, we bump the number of * falsetickers and try again. If the number of falsetickers * becomes equal to or greater than half the number of * candidates, the Albanians have won the Byzantine wars and * correct synchronization is not possible. * * Here, nlist is the number of candidates and allow is the * number of falsetickers. Upon exit, the truechimers are the * susvivors with offsets not less than low and not greater than * high. There may be none of them. */ low = 1e9; high = -1e9; for (allow = 0; 2 * allow < nlist; allow++) { int found; /* * Bound the interval (low, high) as the largest * interval containing points from presumed truechimers. */ found = 0; n = 0; for (i = 0; i < nl3; i++) { low = endpoint[indx[i]].val; n -= endpoint[indx[i]].type; if (n >= nlist - allow) break; if (endpoint[indx[i]].type == 0) found++; } n = 0; for (j = nl3 - 1; j >= 0; j--) { high = endpoint[indx[j]].val; n += endpoint[indx[j]].type; if (n >= nlist - allow) break; if (endpoint[indx[j]].type == 0) found++; } /* * If the number of candidates found outside the * interval is greater than the number of falsetickers, * then at least one truechimer is outside the interval, * so go around again. This is what makes this algorithm * different than Marzullo's. */ if (found > allow) continue; /* * If an interval containing truechimers is found, stop. * If not, increase the number of falsetickers and go * around again. */ if (high > low) break; } /* * Clustering algorithm. Construct candidate list in order first * by stratum then by root distance, but keep only the best * NTP_MAXASSOC of them. Scan the list to find falsetickers, who * leave the island immediately. The TRUE peer is always a * truechimer. We must leave at least one peer to collect the * million bucks. If in orphan mode, rascals found with lower * stratum are guaranteed a seat on the bus. */ j = 0; for (i = 0; i < nlist; i++) { peer = peer_list[i]; if (nlist > 1 && (peer->offset <= low || peer->offset >= high) && !(peer->flags & FLAG_TRUE) && !(sys_stratum >= sys_orphan && peer->stratum < sys_orphan)) continue; peer->status = CTL_PST_SEL_DISTSYSPEER; /* * The order metric is formed from the stratum times * max distance (1.) plus the root distance. It strongly * favors the lowest stratum, but a higher stratum peer * can capture the clock if the low stratum dominant * hasn't been heard for awhile. */ d = root_distance(peer) + peer->stratum * sys_maxdist; if (j >= NTP_MAXASSOC) { if (d >= synch[j - 1]) continue; else j--; } for (k = j; k > 0; k--) { if (d >= synch[k - 1]) break; peer_list[k] = peer_list[k - 1]; error[k] = error[k - 1]; synch[k] = synch[k - 1]; } peer_list[k] = peer; error[k] = peer->jitter; synch[k] = d; j++; } nlist = j; /* * If no survivors remain at this point, check if the local * clock or modem drivers have been found. If so, nominate one * of them as the only survivor. Otherwise, give up and leave * the island to the rats. */ if (nlist == 0) { if (typeacts != 0) { typeacts->status = CTL_PST_SEL_DISTSYSPEER; peer_list[0] = typeacts; nlist = 1; } else if (typelocal != 0) { typelocal->status = CTL_PST_SEL_DISTSYSPEER; peer_list[0] = typelocal; nlist = 1; } else { if (osys_peer != NULL) { NLOG(NLOG_SYNCSTATUS) msyslog(LOG_INFO, "no servers reachable"); report_event(EVNT_PEERSTCHG, NULL); } } } /* * We can only trust the survivors if the number of candidates * sys_minsane is at least the number required to detect and * cast out one falsticker. For the Byzantine agreement * algorithm used here, that number is 4; however, the default * sys_minsane is 1 to speed initial synchronization. Careful * operators will tinker a higher value and use at least that * number of synchronization sources. */ if (nlist < sys_minsane) return; for (i = 0; i < nlist; i++) peer_list[i]->status = CTL_PST_SEL_SELCAND; /* * Now, vote outlyers off the island by select jitter weighted * by root distance. Continue voting as long as there are more * than sys_minclock survivors and the minimum select jitter is * greater than the maximum peer jitter. Stop if we are about to * discard a TRUE or PREFER peer, who of course has the * immunity idol. */ while (1) { d = 1e9; e = -1e9; f = g = 0; k = 0; for (i = 0; i < nlist; i++) { if (error[i] < d) d = error[i]; f = 0; if (nlist > 1) { for (j = 0; j < nlist; j++) f += DIFF(peer_list[j]->offset, peer_list[i]->offset); f = SQRT(f / (nlist - 1)); } if (f * synch[i] > e) { g = f; e = f * synch[i]; k = i; } } f = max(f, LOGTOD(sys_precision)); if (nlist <= sys_minclock || f <= d || peer_list[k]->flags & (FLAG_TRUE | FLAG_PREFER)) break; #ifdef DEBUG if (debug > 2) printf( "select: drop %s select %.6f jitter %.6f\n", ntoa(&peer_list[k]->srcadr), g, d); #endif for (j = k + 1; j < nlist; j++) { peer_list[j - 1] = peer_list[j]; error[j - 1] = error[j]; } nlist--; } /* * What remains is a list usually not greater than sys_minclock * peers. We want only a peer at the lowest stratum to become * the system peer, although all survivors are eligible for the * combining algorithm. Consider each peer in turn and OR the * leap bits on the assumption that, if some of them honk * nonzero bits, they must know what they are doing. Check for * prefer and pps peers at any stratum. Note that the head of * the list is at the lowest stratum and that unsynchronized * peers cannot survive this far. */ leap_next = 0; for (i = 0; i < nlist; i++) { peer = peer_list[i]; sys_survivors++; leap_next |= peer->leap; peer->status = CTL_PST_SEL_SYNCCAND; if (peer->flags & FLAG_PREFER) sys_prefer = peer; if (peer == osys_peer) typesystem = peer; #ifdef REFCLOCK if (peer->refclktype == REFCLK_ATOM_PPS) sys_pps = peer; #endif /* REFCLOCK */ #if DEBUG if (debug > 1) printf("cluster: survivor %s metric %.6f\n", ntoa(&peer_list[i]->srcadr), synch[i]); #endif } /* * Anticlockhop provision. Keep the current system peer if it is * a survivor but not first in the list. But do that only HOPPER * times. */ if (osys_peer == NULL || typesystem == NULL || typesystem == peer_list[0] || sys_hopper > sys_maxhop) { typesystem = peer_list[0]; sys_hopper = 0; } else { peer->selbroken++; } /* * Mitigation rules of the game. There are several types of * peers that can be selected here: (1) orphan, (2) prefer peer * (flag FLAG_PREFER) (3) pps peers (type REFCLK_ATOM_PPS), (4) * the existing system peer, if any, and (5) the head of the * survivor list. */ if (typesystem->stratum >= sys_orphan) { /* * If in orphan mode, choose the system peer. If the * lowest distance, we are the orphan parent and the * offset is zero. */ sys_peer = typesystem; sys_peer->status = CTL_PST_SEL_SYSPEER; if (sys_orphandelay < sys_peer->rootdelay) { sys_offset = 0; sys_refid = htonl(LOOPBACKADR); } else { sys_offset = sys_peer->offset; sys_refid = addr2refid(&sys_peer->srcadr); } sys_jitter = LOGTOD(sys_precision); #ifdef DEBUG if (debug > 1) printf("select: orphan offset %.6f\n", sys_offset); #endif } else if (sys_prefer) { /* * If a pps peer is present, choose it; otherwise, * choose the prefer peer. */ if (sys_pps) { sys_peer = sys_pps; sys_peer->status = CTL_PST_SEL_PPS; sys_offset = sys_peer->offset; if (!pps_control) NLOG(NLOG_SYSEVENT) msyslog(LOG_INFO, "pps sync enabled"); pps_control = current_time; #ifdef DEBUG if (debug > 1) printf("select: pps offset %.6f\n", sys_offset); #endif } else { sys_peer = sys_prefer; sys_peer->status = CTL_PST_SEL_SYSPEER; sys_offset = sys_peer->offset; #ifdef DEBUG if (debug > 1) printf("select: prefer offset %.6f\n", sys_offset); #endif } if (sys_peer->stratum == STRATUM_REFCLOCK || sys_peer->stratum == STRATUM_UNSPEC) sys_refid = sys_peer->refid; else sys_refid = addr2refid(&sys_peer->srcadr); sys_jitter = sys_peer->jitter; } else { /* * Otherwise, choose the anticlockhopper. */ sys_peer = typesystem; sys_peer->status = CTL_PST_SEL_SYSPEER; clock_combine(peer_list, nlist); if (sys_peer->stratum == STRATUM_REFCLOCK || sys_peer->stratum == STRATUM_UNSPEC) sys_refid = sys_peer->refid; else sys_refid = addr2refid(&sys_peer->srcadr); sys_jitter = SQRT(SQUARE(sys_peer->jitter) + SQUARE(sys_jitter)); #ifdef DEBUG if (debug > 1) printf("select: combine offset %.6f\n", sys_offset); #endif } /* * We have found the alpha male. */ sys_peer->flags |= FLAG_SYSPEER; if (osys_peer != sys_peer) { char *src; report_event(EVNT_PEERSTCHG, NULL); #ifdef REFCLOCK if (sys_peer->flags & FLAG_REFCLOCK) src = refnumtoa(&sys_peer->srcadr); else #endif /* REFCLOCK */ src = ntoa(&sys_peer->srcadr); NLOG(NLOG_SYNCSTATUS) msyslog(LOG_INFO, "synchronized to %s, stratum %d", src, sys_peer->stratum); } clock_update(); } /* * clock_combine - compute system offset and jitter from selected peers */ static void clock_combine( struct peer **peers, /* survivor list */ int npeers /* number of survivors */ ) { int i; double x, y, z, w; y = z = w = 0; for (i = 0; i < npeers; i++) { x = root_distance(peers[i]); y += 1. / x; z += peers[i]->offset / x; w += SQUARE(peers[i]->offset - peers[0]->offset) / x; } sys_offset = z / y; sys_jitter = SQRT(w / y); } /* * root_distance - compute synchronization distance from peer to root */ static double root_distance( struct peer *peer ) { double dist; /* * Careful squeak here. The value returned must be greater than * the minimum root dispersion in order to avoid clockhop with * highly precise reference clocks. In orphan mode lose the peer * root delay, as that is used by the election algorithm. */ if (peer->stratum >= sys_orphan) dist = 0; else dist = peer->rootdelay; dist += max(sys_mindisp, dist + peer->delay) / 2 + peer->rootdispersion + peer->disp + clock_phi * (current_time - peer->update) + peer->jitter; return (dist); } /* * peer_xmit - send packet for persistent association. */ static void peer_xmit( struct peer *peer /* peer structure pointer */ ) { struct pkt xpkt; /* transmit packet */ int sendlen, authlen; keyid_t xkeyid = 0; /* transmit key ID */ l_fp xmt_tx; if (!peer->dstadr) /* don't bother with peers without interface */ return; /* * This is deliciously complicated. There are three cases. * * case leap stratum refid delay dispersion * * normal system system system system system * orphan child 00 orphan system orphan system * orphan parent 00 orphan loopbk 0 0 */ /* * This is a normal packet. Use the system variables. */ if (sys_stratum < sys_orphan) { xpkt.li_vn_mode = PKT_LI_VN_MODE(sys_leap, peer->version, peer->hmode); xpkt.stratum = STRATUM_TO_PKT(sys_stratum); xpkt.refid = sys_refid; xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay)); xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); /* * This is a orphan child packet. The host is synchronized to an * orphan parent. Show leap synchronized, orphan stratum, system * reference ID, orphan root delay and system root dispersion. */ } else if (sys_peer != NULL) { xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING, peer->version, peer->hmode); xpkt.stratum = STRATUM_TO_PKT(sys_orphan); xpkt.refid = htonl(LOOPBACKADR); xpkt.rootdelay = HTONS_FP(DTOFP(sys_orphandelay)); xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); /* * This is an orphan parent. Show leap synchronized, orphan * stratum, loopack reference ID and zero root delay and root * dispersion. */ } else { xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING, peer->version, peer->hmode); xpkt.stratum = STRATUM_TO_PKT(sys_orphan); xpkt.refid = sys_refid; xpkt.rootdelay = 0; xpkt.rootdispersion = 0; } xpkt.ppoll = peer->hpoll; xpkt.precision = sys_precision; HTONL_FP(&sys_reftime, &xpkt.reftime); HTONL_FP(&peer->org, &xpkt.org); HTONL_FP(&peer->rec, &xpkt.rec); /* * If the received packet contains a MAC, the transmitted packet * is authenticated and contains a MAC. If not, the transmitted * packet is not authenticated. * * It is most important when autokey is in use that the local * interface IP address be known before the first packet is * sent. Otherwise, it is not possible to compute a correct MAC * the recipient will accept. Thus, the I/O semantics have to do * a little more work. In particular, the wildcard interface * might not be usable. */ sendlen = LEN_PKT_NOMAC; if (!(peer->flags & FLAG_AUTHENABLE)) { get_systime(&peer->xmt); HTONL_FP(&peer->xmt, &xpkt.xmt); sendpkt(&peer->srcadr, peer->dstadr, sys_ttl[peer->ttl], &xpkt, sendlen); peer->sent++; #ifdef DEBUG if (debug) printf("transmit: at %ld %s->%s mode %d\n", current_time, peer->dstadr ? stoa(&peer->dstadr->sin) : "-", stoa(&peer->srcadr), peer->hmode); #endif return; } /* * The received packet contains a MAC, so the transmitted packet * must be authenticated. If autokey is enabled, fuss with the * various modes; otherwise, symmetric key cryptography is used. */ #ifdef OPENSSL if (crypto_flags && (peer->flags & FLAG_SKEY)) { struct exten *exten; /* extension field */ /* * The Public Key Dance (PKD): Cryptographic credentials * are contained in extension fields, each including a * 4-octet length/code word followed by a 4-octet * association ID and optional additional data. Optional * data includes a 4-octet data length field followed by * the data itself. Request messages are sent from a * configured association; response messages can be sent * from a configured association or can take the fast * path without ever matching an association. Response * messages have the same code as the request, but have * a response bit and possibly an error bit set. In this * implementation, a message may contain no more than * one command and no more than one response. * * Cryptographic session keys include both a public and * a private componet. Request and response messages * using extension fields are always sent with the * private component set to zero. Packets without * extension fields indlude the private component when * the session key is generated. */ while (1) { /* * Allocate and initialize a keylist if not * already done. Then, use the list in inverse * order, discarding keys once used. Keep the * latest key around until the next one, so * clients can use client/server packets to * compute propagation delay. * * Note that once a key is used from the list, * it is retained in the key cache until the * next key is used. This is to allow a client * to retrieve the encrypted session key * identifier to verify authenticity. * * If for some reason a key is no longer in the * key cache, a birthday has happened and the * pseudo-random sequence is probably broken. In * that case, purge the keylist and regenerate * it. */ if (peer->keynumber == 0) make_keylist(peer, peer->dstadr); else peer->keynumber--; xkeyid = peer->keylist[peer->keynumber]; if (authistrusted(xkeyid)) break; else key_expire(peer); } peer->keyid = xkeyid; exten = NULL; switch (peer->hmode) { /* * In broadcast server mode the autokey values are * required by the broadcast clients. Push them when a * new keylist is generated; otherwise, push the * association message so the client can request them at * other times. */ case MODE_BROADCAST: if (peer->flags & FLAG_ASSOC) exten = crypto_args(peer, CRYPTO_AUTO | CRYPTO_RESP, NULL); else exten = crypto_args(peer, CRYPTO_ASSOC | CRYPTO_RESP, NULL); break; /* * In symmetric modes the digest, certificate, agreement * parameters, cookie and autokey values are required. * The leapsecond table is optional. But, a passive peer * will not believe the active peer until the latter has * synchronized, so the agreement must be postponed * until then. In any case, if a new keylist is * generated, the autokey values are pushed. * * If the crypto bit is lit, don't send requests. */ case MODE_ACTIVE: case MODE_PASSIVE: if (peer->flash & TEST9) break; /* * Parameter and certificate. */ if (!peer->crypto) exten = crypto_args(peer, CRYPTO_ASSOC, sys_hostname); else if (!(peer->crypto & CRYPTO_FLAG_VALID)) exten = crypto_args(peer, CRYPTO_CERT, peer->issuer); /* * Identity. Note we have to sign the * certificate before the cookie to avoid a * deadlock when the passive peer is walking the * certificate trail. Awesome. */ else if (!(peer->crypto & CRYPTO_FLAG_VRFY)) exten = crypto_args(peer, crypto_ident(peer), NULL); else if (sys_leap != LEAP_NOTINSYNC && !(peer->crypto & CRYPTO_FLAG_SIGN)) exten = crypto_args(peer, CRYPTO_SIGN, sys_hostname); /* * Autokey. We request the cookie only when the * server and client are synchronized and * signatures work both ways. On the other hand, * the active peer needs the autokey values * before then and when the passive peer is * waiting for the active peer to synchronize. * Any time we regenerate the key list, we offer * the autokey values without being asked. */ else if (sys_leap != LEAP_NOTINSYNC && peer->leap != LEAP_NOTINSYNC && !(peer->crypto & CRYPTO_FLAG_AGREE)) exten = crypto_args(peer, CRYPTO_COOK, NULL); else if (peer->flags & FLAG_ASSOC) exten = crypto_args(peer, CRYPTO_AUTO | CRYPTO_RESP, NULL); else if (!(peer->crypto & CRYPTO_FLAG_AUTO)) exten = crypto_args(peer, CRYPTO_AUTO, NULL); /* * Postamble. We trade leapseconds only when the * server and client are synchronized. */ else if (sys_leap != LEAP_NOTINSYNC && peer->leap != LEAP_NOTINSYNC && peer->crypto & CRYPTO_FLAG_TAI && !(peer->crypto & CRYPTO_FLAG_LEAP)) exten = crypto_args(peer, CRYPTO_TAI, NULL); break; /* * In client mode the digest, certificate, agreement * parameters and cookie are required. The leapsecond * table is optional. If broadcast client mode, the * autokey values are required as well. In broadcast * client mode, these values must be acquired during the * client/server exchange to avoid having to wait until * the next key list regeneration. Otherwise, the poor * dude may die a lingering death until becoming * unreachable and attempting rebirth. * * If neither the server or client have the agreement * parameters, the protocol transmits the cookie in the * clear. If the server has the parameters, the client * requests them and the protocol blinds it using the * agreed key. It is a protocol error if the client has * the parameters but the server does not. * * If the crypto bit is lit, don't send requests. */ case MODE_CLIENT: if (peer->flash & TEST9) break; /* * Parameter and certificate. */ if (!peer->crypto) exten = crypto_args(peer, CRYPTO_ASSOC, sys_hostname); else if (!(peer->crypto & CRYPTO_FLAG_VALID)) exten = crypto_args(peer, CRYPTO_CERT, peer->issuer); /* * Identity */ else if (!(peer->crypto & CRYPTO_FLAG_VRFY)) exten = crypto_args(peer, crypto_ident(peer), NULL); /* * Autokey */ else if (!(peer->crypto & CRYPTO_FLAG_AGREE)) exten = crypto_args(peer, CRYPTO_COOK, NULL); else if (!(peer->crypto & CRYPTO_FLAG_AUTO) && (peer->cast_flags & MDF_BCLNT)) exten = crypto_args(peer, CRYPTO_AUTO, NULL); /* * Postamble. We can sign the certificate here, * since there is no chance of deadlock. */ else if (sys_leap != LEAP_NOTINSYNC && !(peer->crypto & CRYPTO_FLAG_SIGN)) exten = crypto_args(peer, CRYPTO_SIGN, sys_hostname); else if (sys_leap != LEAP_NOTINSYNC && peer->crypto & CRYPTO_FLAG_TAI && !(peer->crypto & CRYPTO_FLAG_LEAP)) exten = crypto_args(peer, CRYPTO_TAI, NULL); break; } /* * Build the extension fields as directed. A response to * a request is always sent, even if an error. If an * error occurs when sending a request, the crypto * machinery broke or was misconfigured. In that case * light the crypto bit to suppress further requests. */ if (peer->cmmd != NULL) { peer->cmmd->associd = htonl(peer->associd); sendlen += crypto_xmit(&xpkt, &peer->srcadr, sendlen, peer->cmmd, 0); free(peer->cmmd); peer->cmmd = NULL; } if (exten != NULL) { int ltemp = 0; if (exten->opcode != 0) { ltemp = crypto_xmit(&xpkt, &peer->srcadr, sendlen, exten, 0); if (ltemp == 0) { peer->flash |= TEST9; /* crypto error */ free(exten); return; } } sendlen += ltemp; free(exten); } /* * If extension fields are present, we must use a * private cookie value of zero. Don't send if the * crypto bit is set and no extension field is present, * but in that case give back the key. Most intricate. */ if (sendlen > LEN_PKT_NOMAC) { session_key(&peer->dstadr->sin, &peer->srcadr, xkeyid, 0, 2); } else if (peer->flash & TEST9) { authtrust(xkeyid, 0); return; } } #endif /* OPENSSL */ /* * Stash the transmit timestamp corrected for the encryption * delay. If autokey, give back the key, as we use keys only * once. Check for errors such as missing keys, buffer overflow, * etc. */ xkeyid = peer->keyid; get_systime(&peer->xmt); L_ADD(&peer->xmt, &sys_authdelay); HTONL_FP(&peer->xmt, &xpkt.xmt); authlen = authencrypt(xkeyid, (u_int32 *)&xpkt, sendlen); if (authlen == 0) { msyslog(LOG_INFO, "transmit: %s key %u not found", stoa(&peer->srcadr), xkeyid); peer->flash |= TEST9; /* no key found */ return; } sendlen += authlen; #ifdef OPENSSL if (xkeyid > NTP_MAXKEY) authtrust(xkeyid, 0); #endif /* OPENSSL */ get_systime(&xmt_tx); if (sendlen > sizeof(xpkt)) { msyslog(LOG_ERR, "buffer overflow %u", sendlen); exit (-1); } sendpkt(&peer->srcadr, peer->dstadr, sys_ttl[peer->ttl], &xpkt, sendlen); /* * Calculate the encryption delay. Keep the minimum over * the latest two samples. */ L_SUB(&xmt_tx, &peer->xmt); L_ADD(&xmt_tx, &sys_authdelay); sys_authdly[1] = sys_authdly[0]; sys_authdly[0] = xmt_tx.l_uf; if (sys_authdly[0] < sys_authdly[1]) sys_authdelay.l_uf = sys_authdly[0]; else sys_authdelay.l_uf = sys_authdly[1]; peer->sent++; #ifdef OPENSSL #ifdef DEBUG if (debug) printf( "transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d index %d\n", current_time, peer->dstadr ? ntoa(&peer->dstadr->sin) : "-", ntoa(&peer->srcadr), peer->hmode, xkeyid, sendlen - authlen, authlen, peer->keynumber); #endif #else #ifdef DEBUG if (debug) printf( "transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d\n", current_time, peer->dstadr ? ntoa(&peer->dstadr->sin) : "-", ntoa(&peer->srcadr), peer->hmode, xkeyid, sendlen - authlen, authlen); #endif #endif /* OPENSSL */ } /* * fast_xmit - Send packet for nonpersistent association. Note that * neither the source or destination can be a broadcast address. */ static void fast_xmit( struct recvbuf *rbufp, /* receive packet pointer */ int xmode, /* transmit mode */ keyid_t xkeyid, /* transmit key ID */ int mask /* restrict mask */ ) { struct pkt xpkt; /* transmit packet structure */ struct pkt *rpkt; /* receive packet structure */ l_fp xmt_ts; /* timestamp */ l_fp xmt_tx; /* timestamp after authent */ int sendlen, authlen; #ifdef OPENSSL u_int32 temp32; #endif /* * Initialize transmit packet header fields from the receive * buffer provided. We leave some fields intact as received. If * the gazinta was from a multicast address, the gazoutta must * go out another way. * * The root delay field is special. If the system stratum is * less than the orphan stratum, send the real root delay. * Otherwise, if there is no system peer, send the orphan delay. * Otherwise, we must be an orphan parent, so send zero. */ rpkt = &rbufp->recv_pkt; if (rbufp->dstadr->flags & INT_MCASTOPEN) rbufp->dstadr = findinterface(&rbufp->recv_srcadr); /* * This is deliciously complicated. There are four cases. * * case leap stratum refid delay dispersion * * KoD 11 16 KISS system system * normal system system system system system * orphan child 00 orphan system orphan system * orphan parent 00 orphan loopbk 0 0 */ /* * This is a kiss-of-death (KoD) packet. Show leap * unsynchronized, stratum zero, reference ID the four-character * kiss code and system root delay. Note the rate limit on these * packets. Once a second initialize a bucket counter. Every * packet sent decrements the counter until reaching zero. If * the counter is zero, drop the kiss. */ if (mask & RES_LIMITED) { sys_limitrejected++; if (sys_kod == 0 || !(mask & RES_DEMOBILIZE)) return; sys_kod--; xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOTINSYNC, PKT_VERSION(rpkt->li_vn_mode), xmode); xpkt.stratum = STRATUM_UNSPEC; memcpy(&xpkt.refid, "RATE", 4); xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay)); xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); /* * This is a normal packet. Use the system variables. */ } else if (sys_stratum < sys_orphan) { xpkt.li_vn_mode = PKT_LI_VN_MODE(sys_leap, PKT_VERSION(rpkt->li_vn_mode), xmode); xpkt.stratum = STRATUM_TO_PKT(sys_stratum); xpkt.refid = sys_refid; xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay)); xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); /* * This is a orphan child packet. The host is synchronized to an * orphan parent. Show leap synchronized, orphan stratum, system * reference ID and orphan root delay. */ } else if (sys_peer != NULL) { xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING, PKT_VERSION(rpkt->li_vn_mode), xmode); xpkt.stratum = STRATUM_TO_PKT(sys_orphan); xpkt.refid = sys_refid; xpkt.rootdelay = HTONS_FP(DTOFP(sys_orphandelay)); xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); /* * This is an orphan parent. Show leap synchronized, orphan * stratum, loopack reference ID and zero root delay. */ } else { xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING, PKT_VERSION(rpkt->li_vn_mode), xmode); xpkt.stratum = STRATUM_TO_PKT(sys_orphan); xpkt.refid = htonl(LOOPBACKADR); xpkt.rootdelay = HTONS_FP(DTOFP(0)); xpkt.rootdispersion = HTONS_FP(DTOFP(0)); } xpkt.ppoll = rpkt->ppoll; xpkt.precision = sys_precision; xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); HTONL_FP(&sys_reftime, &xpkt.reftime); xpkt.org = rpkt->xmt; HTONL_FP(&rbufp->recv_time, &xpkt.rec); /* * If the received packet contains a MAC, the transmitted packet * is authenticated and contains a MAC. If not, the transmitted * packet is not authenticated. */ sendlen = LEN_PKT_NOMAC; if (rbufp->recv_length == sendlen) { get_systime(&xmt_ts); HTONL_FP(&xmt_ts, &xpkt.xmt); sendpkt(&rbufp->recv_srcadr, rbufp->dstadr, 0, &xpkt, sendlen); #ifdef DEBUG if (debug) printf("transmit: at %ld %s->%s mode %d\n", current_time, stoa(&rbufp->dstadr->sin), stoa(&rbufp->recv_srcadr), xmode); #endif return; } /* * The received packet contains a MAC, so the transmitted packet * must be authenticated. For symmetric key cryptography, use * the predefined and trusted symmetric keys to generate the * cryptosum. For autokey cryptography, use the server private * value to generate the cookie, which is unique for every * source-destination-key ID combination. */ #ifdef OPENSSL if (xkeyid > NTP_MAXKEY) { keyid_t cookie; /* * The only way to get here is a reply to a legitimate * client request message, so the mode must be * MODE_SERVER. If an extension field is present, there * can be only one and that must be a command. Do what * needs, but with private value of zero so the poor * jerk can decode it. If no extension field is present, * use the cookie to generate the session key. */ cookie = session_key(&rbufp->recv_srcadr, &rbufp->dstadr->sin, 0, sys_private, 0); if (rbufp->recv_length >= (int)(sendlen + MAX_MAC_LEN + 2 * sizeof(u_int32))) { session_key(&rbufp->dstadr->sin, &rbufp->recv_srcadr, xkeyid, 0, 2); temp32 = CRYPTO_RESP; rpkt->exten[0] |= htonl(temp32); sendlen += crypto_xmit(&xpkt, &rbufp->recv_srcadr, sendlen, (struct exten *)rpkt->exten, cookie); } else { session_key(&rbufp->dstadr->sin, &rbufp->recv_srcadr, xkeyid, cookie, 2); } } #endif /* OPENSSL */ get_systime(&xmt_ts); L_ADD(&xmt_ts, &sys_authdelay); HTONL_FP(&xmt_ts, &xpkt.xmt); authlen = authencrypt(xkeyid, (u_int32 *)&xpkt, sendlen); sendlen += authlen; #ifdef OPENSSL if (xkeyid > NTP_MAXKEY) authtrust(xkeyid, 0); #endif /* OPENSSL */ get_systime(&xmt_tx); if (sendlen > sizeof(xpkt)) { msyslog(LOG_ERR, "buffer overflow %u", sendlen); exit (-1); } sendpkt(&rbufp->recv_srcadr, rbufp->dstadr, 0, &xpkt, sendlen); /* * Calculate the encryption delay. Keep the minimum over the * latest two samples. */ L_SUB(&xmt_tx, &xmt_ts); L_ADD(&xmt_tx, &sys_authdelay); sys_authdly[1] = sys_authdly[0]; sys_authdly[0] = xmt_tx.l_uf; if (sys_authdly[0] < sys_authdly[1]) sys_authdelay.l_uf = sys_authdly[0]; else sys_authdelay.l_uf = sys_authdly[1]; #ifdef DEBUG if (debug) printf( "transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d\n", current_time, ntoa(&rbufp->dstadr->sin), ntoa(&rbufp->recv_srcadr), xmode, xkeyid, sendlen - authlen, authlen); #endif } #ifdef OPENSSL /* * key_expire - purge the key list */ void key_expire( struct peer *peer /* peer structure pointer */ ) { int i; if (peer->keylist != NULL) { for (i = 0; i <= peer->keynumber; i++) authtrust(peer->keylist[i], 0); free(peer->keylist); peer->keylist = NULL; } value_free(&peer->sndval); peer->keynumber = 0; #ifdef DEBUG if (debug) printf("key_expire: at %lu\n", current_time); #endif } #endif /* OPENSSL */ /* * Determine if the peer is unfit for synchronization * * A peer is unfit for synchronization if * > TEST10 bad leap or stratum below floor or at or above ceiling * > TEST11 root distance exceeded * > TEST12 a direct or indirect synchronization loop would form * > TEST13 unreachable or noselect */ int /* FALSE if fit, TRUE if unfit */ peer_unfit( struct peer *peer /* peer structure pointer */ ) { int rval = 0; /* * A stratum error occurs if (1) the server has never been * synchronized, (2) the server stratum is below the floor or * greater than or equal to the ceiling, (3) the system stratum * is below the orphan stratum and the server stratum is greater * than or equal to the orphan stratum. */ if (peer->leap == LEAP_NOTINSYNC || peer->stratum < sys_floor || peer->stratum >= sys_ceiling || (sys_stratum < sys_orphan && peer->stratum >= sys_orphan)) rval |= TEST10; /* stratum out of bounds */ /* * A distance error occurs if the root distance is greater than * or equal to the distance threshold plus the increment due to * one poll interval. */ if (root_distance(peer) >= sys_maxdist + clock_phi * ULOGTOD(sys_poll)) rval |= TEST11; /* distance exceeded */ /* * A loop error occurs if the remote peer is synchronized to the * local peer of if the remote peer is synchronized to the same * server as the local peer, but only if the remote peer is not * the orphan parent. */ if (peer->stratum > 1 && peer->refid != htonl(LOOPBACKADR) && ((!peer->dstadr || peer->refid == peer->dstadr->addr_refid) || peer->refid == sys_refid)) rval |= TEST12; /* synch loop */ /* * An unreachable error occurs if the server is unreachable or * the noselect bit is set. */ if (!peer->reach || peer->flags & FLAG_NOSELECT) rval |= TEST13; /* unreachable */ peer->flash &= ~PEER_TEST_MASK; peer->flash |= rval; return (rval); } /* * Find the precision of this particular machine */ #define MINSTEP 100e-9 /* minimum clock increment (s) */ #define MAXSTEP 20e-3 /* maximum clock increment (s) */ #define MINLOOPS 5 /* minimum number of step samples */ /* * This routine calculates the system precision, defined as the minimum * of a sequence of differences between successive readings of the * system clock. However, if the system clock can be read more than once * during a tick interval, the difference can be zero or one LSB unit, * where the LSB corresponds to one nanosecond or one microsecond. * Conceivably, if some other process preempts this one and reads the * clock, the difference can be more than one LSB unit. * * For hardware clock frequencies of 10 MHz or less, we assume the * logical clock advances only at the hardware clock tick. For higher * frequencies, we assume the logical clock can advance no more than 100 * nanoseconds between ticks. */ int default_get_precision(void) { l_fp val; /* current seconds fraction */ l_fp last; /* last seconds fraction */ l_fp diff; /* difference */ double tick; /* computed tick value */ double dtemp; /* scratch */ int i; /* log2 precision */ /* * Loop to find tick value in nanoseconds. Toss out outlyer * values less than the minimun tick value. In wacky cases, use * the default maximum value. */ get_systime(&last); tick = MAXSTEP; for (i = 0; i < MINLOOPS;) { get_systime(&val); diff = val; L_SUB(&diff, &last); last = val; LFPTOD(&diff, dtemp); if (dtemp < MINSTEP) continue; i++; if (dtemp < tick) tick = dtemp; } /* * Find the nearest power of two. */ NLOG(NLOG_SYSEVENT) msyslog(LOG_INFO, "precision = %.3f usec", tick * 1e6); for (i = 0; tick <= 1; i++) tick *= 2; if (tick - 1. > 1. - tick / 2) i--; return (-i); } /* * kod_proto - called once per second to limit kiss-of-death packets */ void kod_proto(void) { sys_kod = sys_kod_rate; } /* * init_proto - initialize the protocol module's data */ void init_proto(void) { l_fp dummy; int i; /* * Fill in the sys_* stuff. Default is don't listen to * broadcasting, authenticate. */ sys_leap = LEAP_NOTINSYNC; sys_stratum = STRATUM_UNSPEC; memcpy(&sys_refid, "INIT", 4); sys_precision = (s_char)default_get_precision(); sys_jitter = LOGTOD(sys_precision); sys_rootdelay = 0; sys_orphandelay = (double)(ntp_random() & 0xffff) / 65536. * sys_maxdist; sys_rootdispersion = 0; L_CLR(&sys_reftime); sys_peer = NULL; sys_survivors = 0; get_systime(&dummy); sys_manycastserver = 0; sys_bclient = 0; sys_bdelay = DEFBROADDELAY; sys_calldelay = BURST_DELAY; sys_authenticate = 1; L_CLR(&sys_authdelay); sys_authdly[0] = sys_authdly[1] = 0; sys_stattime = 0; proto_clr_stats(); for (i = 0; i < MAX_TTL; i++) { sys_ttl[i] = (u_char)((i * 256) / MAX_TTL); sys_ttlmax = i; } #ifdef OPENSSL sys_automax = 1 << NTP_AUTOMAX; #endif /* OPENSSL */ /* * Default these to enable */ ntp_enable = 1; #ifndef KERNEL_FLL_BUG kern_enable = 1; #endif pps_enable = 0; stats_control = 1; } /* * proto_config - configure the protocol module */ void proto_config( int item, u_long value, double dvalue, struct sockaddr_storage* svalue ) { /* * Figure out what he wants to change, then do it */ switch (item) { /* * Turn on/off kernel discipline. */ case PROTO_KERNEL: kern_enable = (int)value; break; /* * Turn on/off clock discipline. */ case PROTO_NTP: ntp_enable = (int)value; break; /* * Turn on/off monitoring. */ case PROTO_MONITOR: if (value) mon_start(MON_ON); else mon_stop(MON_ON); break; /* * Turn on/off statistics. */ case PROTO_FILEGEN: stats_control = (int)value; break; /* * Turn on/off enable broadcasts. */ case PROTO_BROADCLIENT: sys_bclient = (int)value; if (sys_bclient == 0) io_unsetbclient(); else io_setbclient(); break; /* * Turn on/off PPS discipline. */ case PROTO_PPS: pps_enable = (int)value; break; /* * Add muliticast group address. */ case PROTO_MULTICAST_ADD: if (svalue) io_multicast_add(*svalue); sys_bclient = 1; break; /* * Delete multicast group address. */ case PROTO_MULTICAST_DEL: if (svalue) io_multicast_del(*svalue); break; /* * Set default broadcast delay. */ case PROTO_BROADDELAY: sys_bdelay = dvalue; break; /* * Set modem call delay. */ case PROTO_CALLDELAY: sys_calldelay = (int)value; break; /* * Turn on/off authentication to mobilize ephemeral * associations. */ case PROTO_AUTHENTICATE: sys_authenticate = (int)value; break; /* * Set minimum number of survivors. */ case PROTO_MINCLOCK: sys_minclock = (int)dvalue; break; /* * Set maximum number of preemptable associations. */ case PROTO_MAXCLOCK: sys_maxclock = (int)dvalue; break; /* * Set minimum number of survivors. */ case PROTO_MINSANE: sys_minsane = (int)dvalue; break; /* * Set stratum floor. */ case PROTO_FLOOR: sys_floor = (int)dvalue; break; /* * Set stratum ceiling. */ case PROTO_CEILING: sys_ceiling = (int)dvalue; break; /* * Set orphan stratum. */ case PROTO_ORPHAN: sys_orphan = (int)dvalue; break; /* * Set cohort switch. */ case PROTO_COHORT: sys_cohort = (int)dvalue; break; /* * Set minimum dispersion increment. */ case PROTO_MINDISP: sys_mindisp = dvalue; break; /* * Set maximum distance (select threshold). */ case PROTO_MAXDIST: sys_maxdist = dvalue; break; /* * Set anticlockhop threshold. */ case PROTO_MAXHOP: sys_maxhop = (int)dvalue; break; /* * Set adjtime() resolution (s). */ case PROTO_ADJ: sys_tick = dvalue; break; /* * Set manycast beacon interval. */ case PROTO_BEACON: sys_beacon = (int)dvalue; break; #ifdef REFCLOCK /* * Turn on/off refclock calibrate */ case PROTO_CAL: cal_enable = (int)value; break; #endif /* REFCLOCK */ default: /* * Log this error. */ msyslog(LOG_INFO, "proto_config: illegal item %d, value %ld", item, value); } } /* * proto_clr_stats - clear protocol stat counters */ void proto_clr_stats(void) { sys_stattime = current_time; sys_received = 0; sys_processed = 0; sys_newversionpkt = 0; sys_oldversionpkt = 0; sys_unknownversion = 0; sys_restricted = 0; sys_badlength = 0; sys_badauth = 0; sys_limitrejected = 0; } Index: stable/8/sys/netinet/igmp.c =================================================================== --- stable/8/sys/netinet/igmp.c (revision 281230) +++ stable/8/sys/netinet/igmp.c (revision 281231) @@ -1,3648 +1,3647 @@ /*- * Copyright (c) 2007-2009 Bruce Simpson. * Copyright (c) 1988 Stephen Deering. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)igmp.c 8.1 (Berkeley) 7/19/93 */ /* * Internet Group Management Protocol (IGMP) routines. * [RFC1112, RFC2236, RFC3376] * * Written by Steve Deering, Stanford, May 1988. * Modified by Rosen Sharma, Stanford, Aug 1994. * Modified by Bill Fenner, Xerox PARC, Feb 1995. * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995. * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson. * * MULTICAST Revision: 3.5.1.4 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef KTR_IGMPV3 #define KTR_IGMPV3 KTR_INET #endif static struct igmp_ifinfo * igi_alloc_locked(struct ifnet *); static void igi_delete_locked(const struct ifnet *); static void igmp_dispatch_queue(struct ifqueue *, int, const int); static void igmp_fasttimo_vnet(void); static void igmp_final_leave(struct in_multi *, struct igmp_ifinfo *); static int igmp_handle_state_change(struct in_multi *, struct igmp_ifinfo *); static int igmp_initial_join(struct in_multi *, struct igmp_ifinfo *); static int igmp_input_v1_query(struct ifnet *, const struct ip *, const struct igmp *); static int igmp_input_v2_query(struct ifnet *, const struct ip *, const struct igmp *); static int igmp_input_v3_query(struct ifnet *, const struct ip *, /*const*/ struct igmpv3 *); static int igmp_input_v3_group_query(struct in_multi *, struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *); static int igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *, /*const*/ struct igmp *); static int igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *, /*const*/ struct igmp *); static void igmp_intr(struct mbuf *); static int igmp_isgroupreported(const struct in_addr); static struct mbuf * igmp_ra_alloc(void); #ifdef KTR static char * igmp_rec_type_to_str(const int); #endif static void igmp_set_version(struct igmp_ifinfo *, const int); static void igmp_slowtimo_vnet(void); static int igmp_v1v2_queue_report(struct in_multi *, const int); static void igmp_v1v2_process_group_timer(struct in_multi *, const int); static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *); static void igmp_v2_update_group(struct in_multi *, const int); static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *); static void igmp_v3_dispatch_general_query(struct igmp_ifinfo *); static struct mbuf * igmp_v3_encap_report(struct ifnet *, struct mbuf *); static int igmp_v3_enqueue_group_record(struct ifqueue *, struct in_multi *, const int, const int, const int); static int igmp_v3_enqueue_filter_change(struct ifqueue *, struct in_multi *); static void igmp_v3_process_group_timers(struct igmp_ifinfo *, struct ifqueue *, struct ifqueue *, struct in_multi *, const int); static int igmp_v3_merge_state_changes(struct in_multi *, struct ifqueue *); static void igmp_v3_suppress_group_record(struct in_multi *); static int sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS); static int sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS); static int sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS); static const struct netisr_handler igmp_nh = { .nh_name = "igmp", .nh_handler = igmp_intr, .nh_proto = NETISR_IGMP, .nh_policy = NETISR_POLICY_SOURCE, }; /* * System-wide globals. * * Unlocked access to these is OK, except for the global IGMP output * queue. The IGMP subsystem lock ends up being system-wide for the moment, * because all VIMAGEs have to share a global output queue, as netisrs * themselves are not virtualized. * * Locking: * * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK. * Any may be taken independently; if any are held at the same * time, the above lock order must be followed. * * All output is delegated to the netisr. * Now that Giant has been eliminated, the netisr may be inlined. * * IN_MULTI_LOCK covers in_multi. * * IGMP_LOCK covers igmp_ifinfo and any global variables in this file, * including the output queue. * * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of * per-link state iterators. * * igmp_ifinfo is valid as long as PF_INET is attached to the interface, * therefore it is not refcounted. * We allow unlocked reads of igmp_ifinfo when accessed via in_multi. * * Reference counting * * IGMP acquires its own reference every time an in_multi is passed to * it and the group is being joined for the first time. * * IGMP releases its reference(s) on in_multi in a deferred way, * because the operations which process the release run as part of * a loop whose control variables are directly affected by the release * (that, and not recursing on the IF_ADDR_LOCK). * * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds * to a vnet in ifp->if_vnet. * * SMPng: XXX We may potentially race operations on ifma_protospec. * The problem is that we currently lack a clean way of taking the * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing, * as anything which modifies ifma needs to be covered by that lock. * So check for ifma_protospec being NULL before proceeding. */ struct mtx igmp_mtx; struct mbuf *m_raopt; /* Router Alert option */ MALLOC_DEFINE(M_IGMP, "igmp", "igmp state"); /* * VIMAGE-wide globals. * * The IGMPv3 timers themselves need to run per-image, however, * protosw timers run globally (see tcp). * An ifnet can only be in one vimage at a time, and the loopback * ifnet, loif, is itself virtualized. * It would otherwise be possible to seriously hose IGMP state, * and create inconsistencies in upstream multicast routing, if you have * multiple VIMAGEs running on the same link joining different multicast * groups, UNLESS the "primary IP address" is different. This is because * IGMP for IPv4 does not force link-local addresses to be used for each * node, unlike MLD for IPv6. * Obviously the IGMPv3 per-interface state has per-vimage granularity * also as a result. * * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection * policy to control the address used by IGMP on the link. */ static VNET_DEFINE(int, interface_timers_running); /* IGMPv3 general * query response */ static VNET_DEFINE(int, state_change_timers_running); /* IGMPv3 state-change * retransmit */ static VNET_DEFINE(int, current_state_timers_running); /* IGMPv1/v2 host * report; IGMPv3 g/sg * query response */ #define V_interface_timers_running VNET(interface_timers_running) #define V_state_change_timers_running VNET(state_change_timers_running) #define V_current_state_timers_running VNET(current_state_timers_running) static VNET_DEFINE(LIST_HEAD(, igmp_ifinfo), igi_head); static VNET_DEFINE(struct igmpstat, igmpstat) = { .igps_version = IGPS_VERSION_3, .igps_len = sizeof(struct igmpstat), }; static VNET_DEFINE(struct timeval, igmp_gsrdelay) = {10, 0}; #define V_igi_head VNET(igi_head) #define V_igmpstat VNET(igmpstat) #define V_igmp_gsrdelay VNET(igmp_gsrdelay) static VNET_DEFINE(int, igmp_recvifkludge) = 1; static VNET_DEFINE(int, igmp_sendra) = 1; static VNET_DEFINE(int, igmp_sendlocal) = 1; static VNET_DEFINE(int, igmp_v1enable) = 1; static VNET_DEFINE(int, igmp_v2enable) = 1; static VNET_DEFINE(int, igmp_legacysupp); static VNET_DEFINE(int, igmp_default_version) = IGMP_VERSION_3; #define V_igmp_recvifkludge VNET(igmp_recvifkludge) #define V_igmp_sendra VNET(igmp_sendra) #define V_igmp_sendlocal VNET(igmp_sendlocal) #define V_igmp_v1enable VNET(igmp_v1enable) #define V_igmp_v2enable VNET(igmp_v2enable) #define V_igmp_legacysupp VNET(igmp_legacysupp) #define V_igmp_default_version VNET(igmp_default_version) /* * Virtualized sysctls. */ SYSCTL_VNET_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RW, &VNET_NAME(igmpstat), igmpstat, ""); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW, &VNET_NAME(igmp_recvifkludge), 0, "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address"); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW, &VNET_NAME(igmp_sendra), 0, "Send IP Router Alert option in IGMPv2/v3 messages"); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW, &VNET_NAME(igmp_sendlocal), 0, "Send IGMP membership reports for 224.0.0.0/24 groups"); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW, &VNET_NAME(igmp_v1enable), 0, "Enable backwards compatibility with IGMPv1"); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW, &VNET_NAME(igmp_v2enable), 0, "Enable backwards compatibility with IGMPv2"); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW, &VNET_NAME(igmp_legacysupp), 0, "Allow v1/v2 reports to suppress v3 group responses"); SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, default_version, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &VNET_NAME(igmp_default_version), 0, sysctl_igmp_default_version, "I", "Default version of IGMP to run on each interface"); SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, gsrdelay, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &VNET_NAME(igmp_gsrdelay.tv_sec), 0, sysctl_igmp_gsr, "I", "Rate limit for IGMPv3 Group-and-Source queries in seconds"); /* * Non-virtualized sysctls. */ SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_igmp_ifinfo, "Per-interface IGMPv3 state"); static __inline void igmp_save_context(struct mbuf *m, struct ifnet *ifp) { #ifdef VIMAGE m->m_pkthdr.header = ifp->if_vnet; #endif /* VIMAGE */ m->m_pkthdr.flowid = ifp->if_index; } static __inline void igmp_scrub_context(struct mbuf *m) { m->m_pkthdr.header = NULL; m->m_pkthdr.flowid = 0; } #ifdef KTR static __inline char * inet_ntoa_haddr(in_addr_t haddr) { struct in_addr ia; ia.s_addr = htonl(haddr); return (inet_ntoa(ia)); } #endif /* * Restore context from a queued IGMP output chain. * Return saved ifindex. * * VIMAGE: The assertion is there to make sure that we * actually called CURVNET_SET() with what's in the mbuf chain. */ static __inline uint32_t igmp_restore_context(struct mbuf *m) { #ifdef notyet #if defined(VIMAGE) && defined(INVARIANTS) KASSERT(curvnet == (m->m_pkthdr.header), ("%s: called when curvnet was not restored", __func__)); #endif #endif return (m->m_pkthdr.flowid); } /* * Retrieve or set default IGMP version. * * VIMAGE: Assume curvnet set by caller. * SMPng: NOTE: Serialized by IGMP lock. */ static int sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS) { int error; int new; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error) return (error); IGMP_LOCK(); new = V_igmp_default_version; error = sysctl_handle_int(oidp, &new, 0, req); if (error || !req->newptr) goto out_locked; if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) { error = EINVAL; goto out_locked; } CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d", V_igmp_default_version, new); V_igmp_default_version = new; out_locked: IGMP_UNLOCK(); return (error); } /* * Retrieve or set threshold between group-source queries in seconds. * * VIMAGE: Assume curvnet set by caller. * SMPng: NOTE: Serialized by IGMP lock. */ static int sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS) { int error; int i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error) return (error); IGMP_LOCK(); i = V_igmp_gsrdelay.tv_sec; error = sysctl_handle_int(oidp, &i, 0, req); if (error || !req->newptr) goto out_locked; if (i < -1 || i >= 60) { error = EINVAL; goto out_locked; } CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d", V_igmp_gsrdelay.tv_sec, i); V_igmp_gsrdelay.tv_sec = i; out_locked: IGMP_UNLOCK(); return (error); } /* * Expose struct igmp_ifinfo to userland, keyed by ifindex. * For use by ifmcstat(8). * * SMPng: NOTE: Does an unlocked ifindex space read. * VIMAGE: Assume curvnet set by caller. The node handler itself * is not directly virtualized. */ static int sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS) { int *name; int error; u_int namelen; struct ifnet *ifp; struct igmp_ifinfo *igi; name = (int *)arg1; namelen = arg2; if (req->newptr != NULL) return (EPERM); if (namelen != 1) return (EINVAL); error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo)); if (error) return (error); IN_MULTI_LOCK(); IGMP_LOCK(); if (name[0] <= 0 || name[0] > V_if_index) { error = ENOENT; goto out_locked; } error = ENOENT; ifp = ifnet_byindex(name[0]); if (ifp == NULL) goto out_locked; LIST_FOREACH(igi, &V_igi_head, igi_link) { if (ifp == igi->igi_ifp) { error = SYSCTL_OUT(req, igi, sizeof(struct igmp_ifinfo)); break; } } out_locked: IGMP_UNLOCK(); IN_MULTI_UNLOCK(); return (error); } /* * Dispatch an entire queue of pending packet chains * using the netisr. * VIMAGE: Assumes the vnet pointer has been set. */ static void igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop) { struct mbuf *m; for (;;) { _IF_DEQUEUE(ifq, m); if (m == NULL) break; CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m); if (loop) m->m_flags |= M_IGMP_LOOP; netisr_dispatch(NETISR_IGMP, m); if (--limit == 0) break; } } /* * Filter outgoing IGMP report state by group. * * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1). * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are * disabled for all groups in the 224.0.0.0/24 link-local scope. However, * this may break certain IGMP snooping switches which rely on the old * report behaviour. * * Return zero if the given group is one for which IGMP reports * should be suppressed, or non-zero if reports should be issued. */ static __inline int igmp_isgroupreported(const struct in_addr addr) { if (in_allhosts(addr) || ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) return (0); return (1); } /* * Construct a Router Alert option to use in outgoing packets. */ static struct mbuf * igmp_ra_alloc(void) { struct mbuf *m; struct ipoption *p; MGET(m, M_DONTWAIT, MT_DATA); p = mtod(m, struct ipoption *); p->ipopt_dst.s_addr = INADDR_ANY; p->ipopt_list[0] = IPOPT_RA; /* Router Alert Option */ p->ipopt_list[1] = 0x04; /* 4 bytes long */ p->ipopt_list[2] = IPOPT_EOL; /* End of IP option list */ p->ipopt_list[3] = 0x00; /* pad byte */ m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1]; return (m); } /* * Attach IGMP when PF_INET is attached to an interface. */ struct igmp_ifinfo * igmp_domifattach(struct ifnet *ifp) { struct igmp_ifinfo *igi; CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp, ifp->if_xname); IGMP_LOCK(); igi = igi_alloc_locked(ifp); if (!(ifp->if_flags & IFF_MULTICAST)) igi->igi_flags |= IGIF_SILENT; IGMP_UNLOCK(); return (igi); } /* * VIMAGE: assume curvnet set by caller. */ static struct igmp_ifinfo * igi_alloc_locked(/*const*/ struct ifnet *ifp) { struct igmp_ifinfo *igi; IGMP_LOCK_ASSERT(); igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO); if (igi == NULL) goto out; igi->igi_ifp = ifp; igi->igi_version = V_igmp_default_version; igi->igi_flags = 0; igi->igi_rv = IGMP_RV_INIT; igi->igi_qi = IGMP_QI_INIT; igi->igi_qri = IGMP_QRI_INIT; igi->igi_uri = IGMP_URI_INIT; SLIST_INIT(&igi->igi_relinmhead); /* * Responses to general queries are subject to bounds. */ IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS); LIST_INSERT_HEAD(&V_igi_head, igi, igi_link); CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)", ifp, ifp->if_xname); out: return (igi); } /* * Hook for ifdetach. * * NOTE: Some finalization tasks need to run before the protocol domain * is detached, but also before the link layer does its cleanup. * * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK(). * XXX This is also bitten by unlocked ifma_protospec access. */ void igmp_ifdetach(struct ifnet *ifp) { struct igmp_ifinfo *igi; struct ifmultiaddr *ifma; struct in_multi *inm, *tinm; CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp, ifp->if_xname); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; if (igi->igi_version == IGMP_VERSION_3) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; #if 0 KASSERT(ifma->ifma_protospec != NULL, ("%s: ifma_protospec is NULL", __func__)); #endif inm = (struct in_multi *)ifma->ifma_protospec; if (inm->inm_state == IGMP_LEAVING_MEMBER) { SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele); } inm_clear_recorded(inm); } IF_ADDR_RUNLOCK(ifp); /* * Free the in_multi reference(s) for this IGMP lifecycle. */ SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, tinm) { SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele); inm_release_locked(inm); } } IGMP_UNLOCK(); } /* * Hook for domifdetach. */ void igmp_domifdetach(struct ifnet *ifp) { struct igmp_ifinfo *igi; CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp, ifp->if_xname); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; igi_delete_locked(ifp); IGMP_UNLOCK(); } static void igi_delete_locked(const struct ifnet *ifp) { struct igmp_ifinfo *igi, *tigi; CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)", __func__, ifp, ifp->if_xname); IGMP_LOCK_ASSERT(); LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) { if (igi->igi_ifp == ifp) { /* * Free deferred General Query responses. */ _IF_DRAIN(&igi->igi_gq); LIST_REMOVE(igi, igi_link); KASSERT(SLIST_EMPTY(&igi->igi_relinmhead), ("%s: there are dangling in_multi references", __func__)); free(igi, M_IGMP); return; } } #ifdef INVARIANTS panic("%s: igmp_ifinfo not found for ifp %p\n", __func__, ifp); #endif } /* * Process a received IGMPv1 query. * Return non-zero if the message should be dropped. * * VIMAGE: The curvnet pointer is derived from the input ifp. */ static int igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip, const struct igmp *igmp) { struct ifmultiaddr *ifma; struct igmp_ifinfo *igi; struct in_multi *inm; /* * IGMPv1 Host Mmembership Queries SHOULD always be addressed to * 224.0.0.1. They are always treated as General Queries. * igmp_group is always ignored. Do not drop it as a userland * daemon may wish to see it. * XXX SMPng: unlocked increments in igmpstat assumed atomic. */ if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) { IGMPSTAT_INC(igps_rcv_badqueries); return (0); } IGMPSTAT_INC(igps_rcv_gen_queries); IN_MULTI_LOCK(); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); if (igi->igi_flags & IGIF_LOOPBACK) { CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)", ifp, ifp->if_xname); goto out_locked; } /* * Switch to IGMPv1 host compatibility mode. */ igmp_set_version(igi, IGMP_VERSION_1); CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname); /* * Start the timers in all of our group records * for the interface on which the query arrived, * except those which are already running. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; inm = (struct in_multi *)ifma->ifma_protospec; if (inm->inm_timer != 0) continue; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: inm->inm_state = IGMP_REPORTING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_V1V2_MAX_RI * PR_FASTHZ); V_current_state_timers_running = 1; break; case IGMP_LEAVING_MEMBER: break; } } IF_ADDR_RUNLOCK(ifp); out_locked: IGMP_UNLOCK(); IN_MULTI_UNLOCK(); return (0); } /* * Process a received IGMPv2 general or group-specific query. */ static int igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, const struct igmp *igmp) { struct ifmultiaddr *ifma; struct igmp_ifinfo *igi; struct in_multi *inm; int is_general_query; uint16_t timer; is_general_query = 0; /* * Validate address fields upfront. * XXX SMPng: unlocked increments in igmpstat assumed atomic. */ if (in_nullhost(igmp->igmp_group)) { /* * IGMPv2 General Query. * If this was not sent to the all-hosts group, ignore it. */ if (!in_allhosts(ip->ip_dst)) return (0); IGMPSTAT_INC(igps_rcv_gen_queries); is_general_query = 1; } else { /* IGMPv2 Group-Specific Query. */ IGMPSTAT_INC(igps_rcv_group_queries); } IN_MULTI_LOCK(); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); if (igi->igi_flags & IGIF_LOOPBACK) { CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)", ifp, ifp->if_xname); goto out_locked; } /* * Ignore v2 query if in v1 Compatibility Mode. */ if (igi->igi_version == IGMP_VERSION_1) goto out_locked; igmp_set_version(igi, IGMP_VERSION_2); timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE; if (timer == 0) timer = 1; if (is_general_query) { /* * For each reporting group joined on this * interface, kick the report timer. */ CTR2(KTR_IGMPV3, "process v2 general query on ifp %p(%s)", ifp, ifp->if_xname); IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; inm = (struct in_multi *)ifma->ifma_protospec; igmp_v2_update_group(inm, timer); } IF_ADDR_RUNLOCK(ifp); } else { /* * Group-specific IGMPv2 query, we need only * look up the single group to process it. */ inm = inm_lookup(ifp, igmp->igmp_group); if (inm != NULL) { CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); igmp_v2_update_group(inm, timer); } } out_locked: IGMP_UNLOCK(); IN_MULTI_UNLOCK(); return (0); } /* * Update the report timer on a group in response to an IGMPv2 query. * * If we are becoming the reporting member for this group, start the timer. * If we already are the reporting member for this group, and timer is * below the threshold, reset it. * * We may be updating the group for the first time since we switched * to IGMPv3. If we are, then we must clear any recorded source lists, * and transition to REPORTING state; the group timer is overloaded * for group and group-source query responses. * * Unlike IGMPv3, the delay per group should be jittered * to avoid bursts of IGMPv2 reports. */ static void igmp_v2_update_group(struct in_multi *inm, const int timer) { CTR4(KTR_IGMPV3, "%s: %s/%s timer=%d", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, timer); IN_MULTI_LOCK_ASSERT(); switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_REPORTING_MEMBER: if (inm->inm_timer != 0 && inm->inm_timer <= timer) { CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, " "skipping.", __func__); break; } /* FALLTHROUGH */ case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__); inm->inm_state = IGMP_REPORTING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; break; case IGMP_SLEEPING_MEMBER: CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__); inm->inm_state = IGMP_AWAKENING_MEMBER; break; case IGMP_LEAVING_MEMBER: break; } } /* * Process a received IGMPv3 general, group-specific or * group-and-source-specific query. * Assumes m has already been pulled up to the full IGMP message length. * Return 0 if successful, otherwise an appropriate error code is returned. */ static int igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, /*const*/ struct igmpv3 *igmpv3) { struct igmp_ifinfo *igi; struct in_multi *inm; int is_general_query; uint32_t maxresp, nsrc, qqi; uint16_t timer; uint8_t qrv; is_general_query = 0; CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname); maxresp = igmpv3->igmp_code; /* in 1/10ths of a second */ if (maxresp >= 128) { maxresp = IGMP_MANT(igmpv3->igmp_code) << (IGMP_EXP(igmpv3->igmp_code) + 3); } /* * Robustness must never be less than 2 for on-wire IGMPv3. * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make * an exception for interfaces whose IGMPv3 state changes * are redirected to loopback (e.g. MANET). */ qrv = IGMP_QRV(igmpv3->igmp_misc); if (qrv < 2) { CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__, qrv, IGMP_RV_INIT); qrv = IGMP_RV_INIT; } qqi = igmpv3->igmp_qqi; if (qqi >= 128) { qqi = IGMP_MANT(igmpv3->igmp_qqi) << (IGMP_EXP(igmpv3->igmp_qqi) + 3); } timer = maxresp * PR_FASTHZ / IGMP_TIMER_SCALE; if (timer == 0) timer = 1; nsrc = ntohs(igmpv3->igmp_numsrc); /* * Validate address fields and versions upfront before * accepting v3 query. * XXX SMPng: Unlocked access to igmpstat counters here. */ if (in_nullhost(igmpv3->igmp_group)) { /* * IGMPv3 General Query. * * General Queries SHOULD be directed to 224.0.0.1. * A general query with a source list has undefined * behaviour; discard it. */ IGMPSTAT_INC(igps_rcv_gen_queries); if (!in_allhosts(ip->ip_dst) || nsrc > 0) { IGMPSTAT_INC(igps_rcv_badqueries); return (0); } is_general_query = 1; } else { /* Group or group-source specific query. */ if (nsrc == 0) IGMPSTAT_INC(igps_rcv_group_queries); else IGMPSTAT_INC(igps_rcv_gsr_queries); } IN_MULTI_LOCK(); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); if (igi->igi_flags & IGIF_LOOPBACK) { CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)", ifp, ifp->if_xname); goto out_locked; } /* * Discard the v3 query if we're in Compatibility Mode. * The RFC is not obviously worded that hosts need to stay in * compatibility mode until the Old Version Querier Present * timer expires. */ if (igi->igi_version != IGMP_VERSION_3) { CTR3(KTR_IGMPV3, "ignore v3 query in v%d mode on ifp %p(%s)", igi->igi_version, ifp, ifp->if_xname); goto out_locked; } igmp_set_version(igi, IGMP_VERSION_3); igi->igi_rv = qrv; igi->igi_qi = qqi; igi->igi_qri = maxresp; CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi, maxresp); if (is_general_query) { /* * Schedule a current-state report on this ifp for * all groups, possibly containing source lists. * If there is a pending General Query response * scheduled earlier than the selected delay, do * not schedule any other reports. * Otherwise, reset the interface timer. */ CTR2(KTR_IGMPV3, "process v3 general query on ifp %p(%s)", ifp, ifp->if_xname); if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) { igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer); V_interface_timers_running = 1; } } else { /* * Group-source-specific queries are throttled on * a per-group basis to defeat denial-of-service attempts. * Queries for groups we are not a member of on this * link are simply ignored. */ inm = inm_lookup(ifp, igmpv3->igmp_group); if (inm == NULL) goto out_locked; if (nsrc > 0) { if (!ratecheck(&inm->inm_lastgsrtv, &V_igmp_gsrdelay)) { CTR1(KTR_IGMPV3, "%s: GS query throttled.", __func__); IGMPSTAT_INC(igps_drop_gsr_queries); goto out_locked; } } CTR3(KTR_IGMPV3, "process v3 %s query on ifp %p(%s)", inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_xname); /* * If there is a pending General Query response * scheduled sooner than the selected delay, no * further report need be scheduled. * Otherwise, prepare to respond to the * group-specific or group-and-source query. */ if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) igmp_input_v3_group_query(inm, igi, timer, igmpv3); } out_locked: IGMP_UNLOCK(); IN_MULTI_UNLOCK(); return (0); } /* * Process a recieved IGMPv3 group-specific or group-and-source-specific * query. * Return <0 if any error occured. Currently this is ignored. */ static int igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifinfo *igi, int timer, /*const*/ struct igmpv3 *igmpv3) { int retval; uint16_t nsrc; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); retval = 0; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LEAVING_MEMBER: return (retval); break; case IGMP_REPORTING_MEMBER: case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: break; } nsrc = ntohs(igmpv3->igmp_numsrc); /* * Deal with group-specific queries upfront. * If any group query is already pending, purge any recorded * source-list state if it exists, and schedule a query response * for this group-specific query. */ if (nsrc == 0) { if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER || inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) { inm_clear_recorded(inm); timer = min(inm->inm_timer, timer); } inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; return (retval); } /* * Deal with the case where a group-and-source-specific query has * been received but a group-specific query is already pending. */ if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) { timer = min(inm->inm_timer, timer); inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; return (retval); } /* * Finally, deal with the case where a group-and-source-specific * query has been received, where a response to a previous g-s-r * query exists, or none exists. * In this case, we need to parse the source-list which the Querier * has provided us with and check if we have any source list filter * entries at T1 for these sources. If we do not, there is no need * schedule a report and the query may be dropped. * If we do, we must record them and schedule a current-state * report for those sources. * FIXME: Handling source lists larger than 1 mbuf requires that * we pass the mbuf chain pointer down to this function, and use * m_getptr() to walk the chain. */ if (inm->inm_nsrc > 0) { const struct in_addr *ap; int i, nrecorded; ap = (const struct in_addr *)(igmpv3 + 1); nrecorded = 0; for (i = 0; i < nsrc; i++, ap++) { retval = inm_record_source(inm, ap->s_addr); if (retval < 0) break; nrecorded += retval; } if (nrecorded > 0) { CTR1(KTR_IGMPV3, "%s: schedule response to SG query", __func__); inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; } } return (retval); } /* * Process a received IGMPv1 host membership report. * * NOTE: 0.0.0.0 workaround breaks const correctness. */ static int igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip, /*const*/ struct igmp *igmp) { struct in_ifaddr *ia; struct in_multi *inm; IGMPSTAT_INC(igps_rcv_reports); if (ifp->if_flags & IFF_LOOPBACK) return (0); if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || !in_hosteq(igmp->igmp_group, ip->ip_dst)) { IGMPSTAT_INC(igps_rcv_badreports); return (EINVAL); } /* * RFC 3376, Section 4.2.13, 9.2, 9.3: * Booting clients may use the source address 0.0.0.0. Some * IGMP daemons may not know how to use IP_RECVIF to determine * the interface upon which this message was received. * Replace 0.0.0.0 with the subnet address if told to do so. */ if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) { IFP_TO_IA(ifp, ia); if (ia != NULL) { ip->ip_src.s_addr = htonl(ia->ia_subnet); ifa_free(&ia->ia_ifa); } } CTR3(KTR_IGMPV3, "process v1 report %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); /* * IGMPv1 report suppression. * If we are a member of this group, and our membership should be * reported, stop our group timer and transition to the 'lazy' state. */ IN_MULTI_LOCK(); inm = inm_lookup(ifp, igmp->igmp_group); if (inm != NULL) { struct igmp_ifinfo *igi; igi = inm->inm_igi; if (igi == NULL) { KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp)); goto out_locked; } IGMPSTAT_INC(igps_rcv_ourreports); /* * If we are in IGMPv3 host mode, do not allow the * other host's IGMPv1 report to suppress our reports * unless explicitly configured to do so. */ if (igi->igi_version == IGMP_VERSION_3) { if (V_igmp_legacysupp) igmp_v3_suppress_group_record(inm); goto out_locked; } inm->inm_timer = 0; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: CTR3(KTR_IGMPV3, "report suppressed for %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); case IGMP_SLEEPING_MEMBER: inm->inm_state = IGMP_SLEEPING_MEMBER; break; case IGMP_REPORTING_MEMBER: CTR3(KTR_IGMPV3, "report suppressed for %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); if (igi->igi_version == IGMP_VERSION_1) inm->inm_state = IGMP_LAZY_MEMBER; else if (igi->igi_version == IGMP_VERSION_2) inm->inm_state = IGMP_SLEEPING_MEMBER; break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } out_locked: IN_MULTI_UNLOCK(); return (0); } /* * Process a received IGMPv2 host membership report. * * NOTE: 0.0.0.0 workaround breaks const correctness. */ static int igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip, /*const*/ struct igmp *igmp) { struct in_ifaddr *ia; struct in_multi *inm; /* * Make sure we don't hear our own membership report. Fast * leave requires knowing that we are the only member of a * group. */ IFP_TO_IA(ifp, ia); if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) { ifa_free(&ia->ia_ifa); return (0); } IGMPSTAT_INC(igps_rcv_reports); if (ifp->if_flags & IFF_LOOPBACK) { if (ia != NULL) ifa_free(&ia->ia_ifa); return (0); } if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || !in_hosteq(igmp->igmp_group, ip->ip_dst)) { if (ia != NULL) ifa_free(&ia->ia_ifa); IGMPSTAT_INC(igps_rcv_badreports); return (EINVAL); } /* * RFC 3376, Section 4.2.13, 9.2, 9.3: * Booting clients may use the source address 0.0.0.0. Some * IGMP daemons may not know how to use IP_RECVIF to determine * the interface upon which this message was received. * Replace 0.0.0.0 with the subnet address if told to do so. */ if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) { if (ia != NULL) ip->ip_src.s_addr = htonl(ia->ia_subnet); } if (ia != NULL) ifa_free(&ia->ia_ifa); CTR3(KTR_IGMPV3, "process v2 report %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); /* * IGMPv2 report suppression. * If we are a member of this group, and our membership should be * reported, and our group timer is pending or about to be reset, * stop our group timer by transitioning to the 'lazy' state. */ IN_MULTI_LOCK(); inm = inm_lookup(ifp, igmp->igmp_group); if (inm != NULL) { struct igmp_ifinfo *igi; igi = inm->inm_igi; KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp)); IGMPSTAT_INC(igps_rcv_ourreports); /* * If we are in IGMPv3 host mode, do not allow the * other host's IGMPv1 report to suppress our reports * unless explicitly configured to do so. */ if (igi->igi_version == IGMP_VERSION_3) { if (V_igmp_legacysupp) igmp_v3_suppress_group_record(inm); goto out_locked; } inm->inm_timer = 0; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_SLEEPING_MEMBER: break; case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_AWAKENING_MEMBER: CTR3(KTR_IGMPV3, "report suppressed for %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); case IGMP_LAZY_MEMBER: inm->inm_state = IGMP_LAZY_MEMBER; break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } out_locked: IN_MULTI_UNLOCK(); return (0); } void igmp_input(struct mbuf *m, int off) { int iphlen; struct ifnet *ifp; struct igmp *igmp; struct ip *ip; int igmplen; int minlen; int queryver; CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, m, off); ifp = m->m_pkthdr.rcvif; IGMPSTAT_INC(igps_rcv_total); ip = mtod(m, struct ip *); iphlen = off; igmplen = ip->ip_len; /* * Validate lengths. */ if (igmplen < IGMP_MINLEN) { IGMPSTAT_INC(igps_rcv_tooshort); m_freem(m); return; } /* * Always pullup to the minimum size for v1/v2 or v3 * to amortize calls to m_pullup(). */ minlen = iphlen; if (igmplen >= IGMP_V3_QUERY_MINLEN) minlen += IGMP_V3_QUERY_MINLEN; else minlen += IGMP_MINLEN; if ((m->m_flags & M_EXT || m->m_len < minlen) && (m = m_pullup(m, minlen)) == 0) { IGMPSTAT_INC(igps_rcv_tooshort); return; } ip = mtod(m, struct ip *); /* * Validate checksum. */ m->m_data += iphlen; m->m_len -= iphlen; igmp = mtod(m, struct igmp *); if (in_cksum(m, igmplen)) { IGMPSTAT_INC(igps_rcv_badsum); m_freem(m); return; } m->m_data -= iphlen; m->m_len += iphlen; /* * IGMP control traffic is link-scope, and must have a TTL of 1. * DVMRP traffic (e.g. mrinfo, mtrace) is an exception; * probe packets may come from beyond the LAN. */ if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) { IGMPSTAT_INC(igps_rcv_badttl); m_freem(m); return; } switch (igmp->igmp_type) { case IGMP_HOST_MEMBERSHIP_QUERY: if (igmplen == IGMP_MINLEN) { if (igmp->igmp_code == 0) queryver = IGMP_VERSION_1; else queryver = IGMP_VERSION_2; } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { queryver = IGMP_VERSION_3; } else { IGMPSTAT_INC(igps_rcv_tooshort); m_freem(m); return; } switch (queryver) { case IGMP_VERSION_1: IGMPSTAT_INC(igps_rcv_v1v2_queries); if (!V_igmp_v1enable) break; if (igmp_input_v1_query(ifp, ip, igmp) != 0) { m_freem(m); return; } break; case IGMP_VERSION_2: IGMPSTAT_INC(igps_rcv_v1v2_queries); if (!V_igmp_v2enable) break; if (igmp_input_v2_query(ifp, ip, igmp) != 0) { m_freem(m); return; } break; case IGMP_VERSION_3: { struct igmpv3 *igmpv3; uint16_t igmpv3len; uint16_t nsrc; - int srclen; IGMPSTAT_INC(igps_rcv_v3_queries); igmpv3 = (struct igmpv3 *)igmp; /* * Validate length based on source count. */ nsrc = ntohs(igmpv3->igmp_numsrc); - srclen = sizeof(struct in_addr) * nsrc; - if (nsrc * sizeof(in_addr_t) > srclen) { + if (nsrc * sizeof(in_addr_t) > + UINT16_MAX - iphlen - IGMP_V3_QUERY_MINLEN) { IGMPSTAT_INC(igps_rcv_tooshort); return; } /* * m_pullup() may modify m, so pullup in * this scope. */ igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN + - srclen; + sizeof(struct in_addr) * nsrc; if ((m->m_flags & M_EXT || m->m_len < igmpv3len) && (m = m_pullup(m, igmpv3len)) == NULL) { IGMPSTAT_INC(igps_rcv_tooshort); return; } igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *) + iphlen); if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) { m_freem(m); return; } } break; } break; case IGMP_v1_HOST_MEMBERSHIP_REPORT: if (!V_igmp_v1enable) break; if (igmp_input_v1_report(ifp, ip, igmp) != 0) { m_freem(m); return; } break; case IGMP_v2_HOST_MEMBERSHIP_REPORT: if (!V_igmp_v2enable) break; if (!ip_checkrouteralert(m)) IGMPSTAT_INC(igps_rcv_nora); if (igmp_input_v2_report(ifp, ip, igmp) != 0) { m_freem(m); return; } break; case IGMP_v3_HOST_MEMBERSHIP_REPORT: /* * Hosts do not need to process IGMPv3 membership reports, * as report suppression is no longer required. */ if (!ip_checkrouteralert(m)) IGMPSTAT_INC(igps_rcv_nora); break; default: break; } /* * Pass all valid IGMP packets up to any process(es) listening on a * raw IGMP socket. */ rip_input(m, off); } /* * Fast timeout handler (global). * VIMAGE: Timeout handlers are expected to service all vimages. */ void igmp_fasttimo(void) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); igmp_fasttimo_vnet(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * Fast timeout handler (per-vnet). * Sends are shuffled off to a netisr to deal with Giant. * * VIMAGE: Assume caller has set up our curvnet. */ static void igmp_fasttimo_vnet(void) { struct ifqueue scq; /* State-change packets */ struct ifqueue qrq; /* Query response packets */ struct ifnet *ifp; struct igmp_ifinfo *igi; struct ifmultiaddr *ifma; struct in_multi *inm; int loop, uri_fasthz; loop = 0; uri_fasthz = 0; /* * Quick check to see if any work needs to be done, in order to * minimize the overhead of fasttimo processing. * SMPng: XXX Unlocked reads. */ if (!V_current_state_timers_running && !V_interface_timers_running && !V_state_change_timers_running) return; IN_MULTI_LOCK(); IGMP_LOCK(); /* * IGMPv3 General Query response timer processing. */ if (V_interface_timers_running) { CTR1(KTR_IGMPV3, "%s: interface timers running", __func__); V_interface_timers_running = 0; LIST_FOREACH(igi, &V_igi_head, igi_link) { if (igi->igi_v3_timer == 0) { /* Do nothing. */ } else if (--igi->igi_v3_timer == 0) { igmp_v3_dispatch_general_query(igi); } else { V_interface_timers_running = 1; } } } if (!V_current_state_timers_running && !V_state_change_timers_running) goto out_locked; V_current_state_timers_running = 0; V_state_change_timers_running = 0; CTR1(KTR_IGMPV3, "%s: state change timers running", __func__); /* * IGMPv1/v2/v3 host report and state-change timer processing. * Note: Processing a v3 group timer may remove a node. */ LIST_FOREACH(igi, &V_igi_head, igi_link) { ifp = igi->igi_ifp; if (igi->igi_version == IGMP_VERSION_3) { loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri * PR_FASTHZ); memset(&qrq, 0, sizeof(struct ifqueue)); IFQ_SET_MAXLEN(&qrq, IGMP_MAX_G_GS_PACKETS); memset(&scq, 0, sizeof(struct ifqueue)); IFQ_SET_MAXLEN(&scq, IGMP_MAX_STATE_CHANGE_PACKETS); } IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; inm = (struct in_multi *)ifma->ifma_protospec; switch (igi->igi_version) { case IGMP_VERSION_1: case IGMP_VERSION_2: igmp_v1v2_process_group_timer(inm, igi->igi_version); break; case IGMP_VERSION_3: igmp_v3_process_group_timers(igi, &qrq, &scq, inm, uri_fasthz); break; } } IF_ADDR_RUNLOCK(ifp); if (igi->igi_version == IGMP_VERSION_3) { struct in_multi *tinm; igmp_dispatch_queue(&qrq, 0, loop); igmp_dispatch_queue(&scq, 0, loop); /* * Free the in_multi reference(s) for this * IGMP lifecycle. */ SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, tinm) { SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele); inm_release_locked(inm); } } } out_locked: IGMP_UNLOCK(); IN_MULTI_UNLOCK(); } /* * Update host report group timer for IGMPv1/v2. * Will update the global pending timer flags. */ static void igmp_v1v2_process_group_timer(struct in_multi *inm, const int version) { int report_timer_expired; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); if (inm->inm_timer == 0) { report_timer_expired = 0; } else if (--inm->inm_timer == 0) { report_timer_expired = 1; } else { V_current_state_timers_running = 1; return; } switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: break; case IGMP_REPORTING_MEMBER: if (report_timer_expired) { inm->inm_state = IGMP_IDLE_MEMBER; (void)igmp_v1v2_queue_report(inm, (version == IGMP_VERSION_2) ? IGMP_v2_HOST_MEMBERSHIP_REPORT : IGMP_v1_HOST_MEMBERSHIP_REPORT); } break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } /* * Update a group's timers for IGMPv3. * Will update the global pending timer flags. * Note: Unlocked read from igi. */ static void igmp_v3_process_group_timers(struct igmp_ifinfo *igi, struct ifqueue *qrq, struct ifqueue *scq, struct in_multi *inm, const int uri_fasthz) { int query_response_timer_expired; int state_change_retransmit_timer_expired; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); query_response_timer_expired = 0; state_change_retransmit_timer_expired = 0; /* * During a transition from v1/v2 compatibility mode back to v3, * a group record in REPORTING state may still have its group * timer active. This is a no-op in this function; it is easier * to deal with it here than to complicate the slow-timeout path. */ if (inm->inm_timer == 0) { query_response_timer_expired = 0; } else if (--inm->inm_timer == 0) { query_response_timer_expired = 1; } else { V_current_state_timers_running = 1; } if (inm->inm_sctimer == 0) { state_change_retransmit_timer_expired = 0; } else if (--inm->inm_sctimer == 0) { state_change_retransmit_timer_expired = 1; } else { V_state_change_timers_running = 1; } /* We are in fasttimo, so be quick about it. */ if (!state_change_retransmit_timer_expired && !query_response_timer_expired) return; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: case IGMP_IDLE_MEMBER: break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: /* * Respond to a previously pending Group-Specific * or Group-and-Source-Specific query by enqueueing * the appropriate Current-State report for * immediate transmission. */ if (query_response_timer_expired) { int retval; retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1, (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); inm->inm_state = IGMP_REPORTING_MEMBER; /* XXX Clear recorded sources for next time. */ inm_clear_recorded(inm); } /* FALLTHROUGH */ case IGMP_REPORTING_MEMBER: case IGMP_LEAVING_MEMBER: if (state_change_retransmit_timer_expired) { /* * State-change retransmission timer fired. * If there are any further pending retransmissions, * set the global pending state-change flag, and * reset the timer. */ if (--inm->inm_scrv > 0) { inm->inm_sctimer = uri_fasthz; V_state_change_timers_running = 1; } /* * Retransmit the previously computed state-change * report. If there are no further pending * retransmissions, the mbuf queue will be consumed. * Update T0 state to T1 as we have now sent * a state-change. */ (void)igmp_v3_merge_state_changes(inm, scq); inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); /* * If we are leaving the group for good, make sure * we release IGMP's reference to it. * This release must be deferred using a SLIST, * as we are called from a loop which traverses * the in_ifmultiaddr TAILQ. */ if (inm->inm_state == IGMP_LEAVING_MEMBER && inm->inm_scrv == 0) { inm->inm_state = IGMP_NOT_MEMBER; SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele); } } break; } } /* * Suppress a group's pending response to a group or source/group query. * * Do NOT suppress state changes. This leads to IGMPv3 inconsistency. * Do NOT update ST1/ST0 as this operation merely suppresses * the currently pending group record. * Do NOT suppress the response to a general query. It is possible but * it would require adding another state or flag. */ static void igmp_v3_suppress_group_record(struct in_multi *inm) { IN_MULTI_LOCK_ASSERT(); KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3, ("%s: not IGMPv3 mode on link", __func__)); if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER || inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) return; if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) inm_clear_recorded(inm); inm->inm_timer = 0; inm->inm_state = IGMP_REPORTING_MEMBER; } /* * Switch to a different IGMP version on the given interface, * as per Section 7.2.1. */ static void igmp_set_version(struct igmp_ifinfo *igi, const int version) { int old_version_timer; IGMP_LOCK_ASSERT(); CTR4(KTR_IGMPV3, "%s: switching to v%d on ifp %p(%s)", __func__, version, igi->igi_ifp, igi->igi_ifp->if_xname); if (version == IGMP_VERSION_1 || version == IGMP_VERSION_2) { /* * Compute the "Older Version Querier Present" timer as per * Section 8.12. */ old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri; old_version_timer *= PR_SLOWHZ; if (version == IGMP_VERSION_1) { igi->igi_v1_timer = old_version_timer; igi->igi_v2_timer = 0; } else if (version == IGMP_VERSION_2) { igi->igi_v1_timer = 0; igi->igi_v2_timer = old_version_timer; } } if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) { if (igi->igi_version != IGMP_VERSION_2) { igi->igi_version = IGMP_VERSION_2; igmp_v3_cancel_link_timers(igi); } } else if (igi->igi_v1_timer > 0) { if (igi->igi_version != IGMP_VERSION_1) { igi->igi_version = IGMP_VERSION_1; igmp_v3_cancel_link_timers(igi); } } } /* * Cancel pending IGMPv3 timers for the given link and all groups * joined on it; state-change, general-query, and group-query timers. * * Only ever called on a transition from v3 to Compatibility mode. Kill * the timers stone dead (this may be expensive for large N groups), they * will be restarted if Compatibility Mode deems that they must be due to * query processing. */ static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi) { struct ifmultiaddr *ifma; struct ifnet *ifp; struct in_multi *inm, *tinm; CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__, igi->igi_ifp, igi->igi_ifp->if_xname); IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); /* * Stop the v3 General Query Response on this link stone dead. * If fasttimo is woken up due to V_interface_timers_running, * the flag will be cleared if there are no pending link timers. */ igi->igi_v3_timer = 0; /* * Now clear the current-state and state-change report timers * for all memberships scoped to this link. */ ifp = igi->igi_ifp; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; inm = (struct in_multi *)ifma->ifma_protospec; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: /* * These states are either not relevant in v3 mode, * or are unreported. Do nothing. */ break; case IGMP_LEAVING_MEMBER: /* * If we are leaving the group and switching to * compatibility mode, we need to release the final * reference held for issuing the INCLUDE {}, and * transition to REPORTING to ensure the host leave * message is sent upstream to the old querier -- * transition to NOT would lose the leave and race. */ SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele); /* FALLTHROUGH */ case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: inm_clear_recorded(inm); /* FALLTHROUGH */ case IGMP_REPORTING_MEMBER: inm->inm_state = IGMP_REPORTING_MEMBER; break; } /* * Always clear state-change and group report timers. * Free any pending IGMPv3 state-change records. */ inm->inm_sctimer = 0; inm->inm_timer = 0; _IF_DRAIN(&inm->inm_scq); } IF_ADDR_RUNLOCK(ifp); SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, tinm) { SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele); inm_release_locked(inm); } } /* * Update the Older Version Querier Present timers for a link. * See Section 7.2.1 of RFC 3376. */ static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi) { IGMP_LOCK_ASSERT(); if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) { /* * IGMPv1 and IGMPv2 Querier Present timers expired. * * Revert to IGMPv3. */ if (igi->igi_version != IGMP_VERSION_3) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_3, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_version = IGMP_VERSION_3; } } else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) { /* * IGMPv1 Querier Present timer expired, * IGMPv2 Querier Present timer running. * If IGMPv2 was disabled since last timeout, * revert to IGMPv3. * If IGMPv2 is enabled, revert to IGMPv2. */ if (!V_igmp_v2enable) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_3, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_v2_timer = 0; igi->igi_version = IGMP_VERSION_3; } else { --igi->igi_v2_timer; if (igi->igi_version != IGMP_VERSION_2) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_2, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_version = IGMP_VERSION_2; } } } else if (igi->igi_v1_timer > 0) { /* * IGMPv1 Querier Present timer running. * Stop IGMPv2 timer if running. * * If IGMPv1 was disabled since last timeout, * revert to IGMPv3. * If IGMPv1 is enabled, reset IGMPv2 timer if running. */ if (!V_igmp_v1enable) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_3, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_v1_timer = 0; igi->igi_version = IGMP_VERSION_3; } else { --igi->igi_v1_timer; } if (igi->igi_v2_timer > 0) { CTR3(KTR_IGMPV3, "%s: cancel v2 timer on %p(%s)", __func__, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_v2_timer = 0; } } } /* * Global slowtimo handler. * VIMAGE: Timeout handlers are expected to service all vimages. */ void igmp_slowtimo(void) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); igmp_slowtimo_vnet(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * Per-vnet slowtimo handler. */ static void igmp_slowtimo_vnet(void) { struct igmp_ifinfo *igi; IGMP_LOCK(); LIST_FOREACH(igi, &V_igi_head, igi_link) { igmp_v1v2_process_querier_timers(igi); } IGMP_UNLOCK(); } /* * Dispatch an IGMPv1/v2 host report or leave message. * These are always small enough to fit inside a single mbuf. */ static int igmp_v1v2_queue_report(struct in_multi *inm, const int type) { struct ifnet *ifp; struct igmp *igmp; struct ip *ip; struct mbuf *m; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); ifp = inm->inm_ifp; MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return (ENOMEM); MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp)); m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp); m->m_data += sizeof(struct ip); m->m_len = sizeof(struct igmp); igmp = mtod(m, struct igmp *); igmp->igmp_type = type; igmp->igmp_code = 0; igmp->igmp_group = inm->inm_addr; igmp->igmp_cksum = 0; igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp)); m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); ip = mtod(m, struct ip *); ip->ip_tos = 0; ip->ip_len = sizeof(struct ip) + sizeof(struct igmp); ip->ip_off = 0; ip->ip_p = IPPROTO_IGMP; ip->ip_src.s_addr = INADDR_ANY; if (type == IGMP_HOST_LEAVE_MESSAGE) ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP); else ip->ip_dst = inm->inm_addr; igmp_save_context(m, ifp); m->m_flags |= M_IGMPV2; if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) m->m_flags |= M_IGMP_LOOP; CTR2(KTR_IGMPV3, "%s: netisr_dispatch(NETISR_IGMP, %p)", __func__, m); netisr_dispatch(NETISR_IGMP, m); return (0); } /* * Process a state change from the upper layer for the given IPv4 group. * * Each socket holds a reference on the in_multi in its own ip_moptions. * The socket layer will have made the necessary updates to.the group * state, it is now up to IGMP to issue a state change report if there * has been any change between T0 (when the last state-change was issued) * and T1 (now). * * We use the IGMPv3 state machine at group level. The IGMP module * however makes the decision as to which IGMP protocol version to speak. * A state change *from* INCLUDE {} always means an initial join. * A state change *to* INCLUDE {} always means a final leave. * * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can * save ourselves a bunch of work; any exclusive mode groups need not * compute source filter lists. * * VIMAGE: curvnet should have been set by caller, as this routine * is called from the socket option handlers. */ int igmp_change_state(struct in_multi *inm) { struct igmp_ifinfo *igi; struct ifnet *ifp; int error; IN_MULTI_LOCK_ASSERT(); error = 0; /* * Try to detect if the upper layer just asked us to change state * for an interface which has now gone away. */ KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); ifp = inm->inm_ifma->ifma_ifp; /* * Sanity check that netinet's notion of ifp is the * same as net's. */ KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); /* * If we detect a state transition to or from MCAST_UNDEFINED * for this group, then we are starting or finishing an IGMP * life cycle for this group. */ if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) { CTR3(KTR_IGMPV3, "%s: inm transition %d -> %d", __func__, inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode); if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) { CTR1(KTR_IGMPV3, "%s: initial join", __func__); error = igmp_initial_join(inm, igi); goto out_locked; } else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) { CTR1(KTR_IGMPV3, "%s: final leave", __func__); igmp_final_leave(inm, igi); goto out_locked; } } else { CTR1(KTR_IGMPV3, "%s: filter set change", __func__); } error = igmp_handle_state_change(inm, igi); out_locked: IGMP_UNLOCK(); return (error); } /* * Perform the initial join for an IGMP group. * * When joining a group: * If the group should have its IGMP traffic suppressed, do nothing. * IGMPv1 starts sending IGMPv1 host membership reports. * IGMPv2 starts sending IGMPv2 host membership reports. * IGMPv3 will schedule an IGMPv3 state-change report containing the * initial state of the membership. */ static int igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi) { struct ifnet *ifp; struct ifqueue *ifq; int error, retval, syncstates; CTR4(KTR_IGMPV3, "%s: initial join %s on ifp %p(%s)", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, inm->inm_ifp->if_xname); error = 0; syncstates = 1; ifp = inm->inm_ifp; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__)); /* * Groups joined on loopback or marked as 'not reported', * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and * are never reported in any IGMP protocol exchanges. * All other groups enter the appropriate IGMP state machine * for the version in use on this link. * A link marked as IGIF_SILENT causes IGMP to be completely * disabled for the link. */ if ((ifp->if_flags & IFF_LOOPBACK) || (igi->igi_flags & IGIF_SILENT) || !igmp_isgroupreported(inm->inm_addr)) { CTR1(KTR_IGMPV3, "%s: not kicking state machine for silent group", __func__); inm->inm_state = IGMP_SILENT_MEMBER; inm->inm_timer = 0; } else { /* * Deal with overlapping in_multi lifecycle. * If this group was LEAVING, then make sure * we drop the reference we picked up to keep the * group around for the final INCLUDE {} enqueue. */ if (igi->igi_version == IGMP_VERSION_3 && inm->inm_state == IGMP_LEAVING_MEMBER) inm_release_locked(inm); inm->inm_state = IGMP_REPORTING_MEMBER; switch (igi->igi_version) { case IGMP_VERSION_1: case IGMP_VERSION_2: inm->inm_state = IGMP_IDLE_MEMBER; error = igmp_v1v2_queue_report(inm, (igi->igi_version == IGMP_VERSION_2) ? IGMP_v2_HOST_MEMBERSHIP_REPORT : IGMP_v1_HOST_MEMBERSHIP_REPORT); if (error == 0) { inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_V1V2_MAX_RI * PR_FASTHZ); V_current_state_timers_running = 1; } break; case IGMP_VERSION_3: /* * Defer update of T0 to T1, until the first copy * of the state change has been transmitted. */ syncstates = 0; /* * Immediately enqueue a State-Change Report for * this interface, freeing any previous reports. * Don't kick the timers if there is nothing to do, * or if an error occurred. */ ifq = &inm->inm_scq; _IF_DRAIN(ifq); retval = igmp_v3_enqueue_group_record(ifq, inm, 1, 0, 0); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); if (retval <= 0) { error = retval * -1; break; } /* * Schedule transmission of pending state-change * report up to RV times for this link. The timer * will fire at the next igmp_fasttimo (~200ms), * giving us an opportunity to merge the reports. */ if (igi->igi_flags & IGIF_LOOPBACK) { inm->inm_scrv = 1; } else { KASSERT(igi->igi_rv > 1, ("%s: invalid robustness %d", __func__, igi->igi_rv)); inm->inm_scrv = igi->igi_rv; } inm->inm_sctimer = 1; V_state_change_timers_running = 1; error = 0; break; } } /* * Only update the T0 state if state change is atomic, * i.e. we don't need to wait for a timer to fire before we * can consider the state change to have been communicated. */ if (syncstates) { inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); } return (error); } /* * Issue an intermediate state change during the IGMP life-cycle. */ static int igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi) { struct ifnet *ifp; int retval; CTR4(KTR_IGMPV3, "%s: state change for %s on ifp %p(%s)", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, inm->inm_ifp->if_xname); ifp = inm->inm_ifp; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__)); if ((ifp->if_flags & IFF_LOOPBACK) || (igi->igi_flags & IGIF_SILENT) || !igmp_isgroupreported(inm->inm_addr) || (igi->igi_version != IGMP_VERSION_3)) { if (!igmp_isgroupreported(inm->inm_addr)) { CTR1(KTR_IGMPV3, "%s: not kicking state machine for silent group", __func__); } CTR1(KTR_IGMPV3, "%s: nothing to do", __func__); inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); return (0); } _IF_DRAIN(&inm->inm_scq); retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); if (retval <= 0) return (-retval); /* * If record(s) were enqueued, start the state-change * report timer for this group. */ inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv); inm->inm_sctimer = 1; V_state_change_timers_running = 1; return (0); } /* * Perform the final leave for an IGMP group. * * When leaving a group: * IGMPv1 does nothing. * IGMPv2 sends a host leave message, if and only if we are the reporter. * IGMPv3 enqueues a state-change report containing a transition * to INCLUDE {} for immediate transmission. */ static void igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi) { int syncstates; syncstates = 1; CTR4(KTR_IGMPV3, "%s: final leave %s on ifp %p(%s)", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, inm->inm_ifp->if_xname); IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_LEAVING_MEMBER: /* Already leaving or left; do nothing. */ CTR1(KTR_IGMPV3, "%s: not kicking state machine for silent group", __func__); break; case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: if (igi->igi_version == IGMP_VERSION_2) { #ifdef INVARIANTS if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER || inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) panic("%s: IGMPv3 state reached, not IGMPv3 mode", __func__); #endif igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE); inm->inm_state = IGMP_NOT_MEMBER; } else if (igi->igi_version == IGMP_VERSION_3) { /* * Stop group timer and all pending reports. * Immediately enqueue a state-change report * TO_IN {} to be sent on the next fast timeout, * giving us an opportunity to merge reports. */ _IF_DRAIN(&inm->inm_scq); inm->inm_timer = 0; if (igi->igi_flags & IGIF_LOOPBACK) { inm->inm_scrv = 1; } else { inm->inm_scrv = igi->igi_rv; } CTR4(KTR_IGMPV3, "%s: Leaving %s/%s with %d " "pending retransmissions.", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, inm->inm_scrv); if (inm->inm_scrv == 0) { inm->inm_state = IGMP_NOT_MEMBER; inm->inm_sctimer = 0; } else { int retval; inm_acquire_locked(inm); retval = igmp_v3_enqueue_group_record( &inm->inm_scq, inm, 1, 0, 0); KASSERT(retval != 0, ("%s: enqueue record = %d", __func__, retval)); inm->inm_state = IGMP_LEAVING_MEMBER; inm->inm_sctimer = 1; V_state_change_timers_running = 1; syncstates = 0; } break; } break; case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: /* Our reports are suppressed; do nothing. */ break; } if (syncstates) { inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); } } /* * Enqueue an IGMPv3 group record to the given output queue. * * XXX This function could do with having the allocation code * split out, and the multiple-tree-walks coalesced into a single * routine as has been done in igmp_v3_enqueue_filter_change(). * * If is_state_change is zero, a current-state record is appended. * If is_state_change is non-zero, a state-change report is appended. * * If is_group_query is non-zero, an mbuf packet chain is allocated. * If is_group_query is zero, and if there is a packet with free space * at the tail of the queue, it will be appended to providing there * is enough free space. * Otherwise a new mbuf packet chain is allocated. * * If is_source_query is non-zero, each source is checked to see if * it was recorded for a Group-Source query, and will be omitted if * it is not both in-mode and recorded. * * The function will attempt to allocate leading space in the packet * for the IP/IGMP header to be prepended without fragmenting the chain. * * If successful the size of all data appended to the queue is returned, * otherwise an error code less than zero is returned, or zero if * no record(s) were appended. */ static int igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, const int is_state_change, const int is_group_query, const int is_source_query) { struct igmp_grouprec ig; struct igmp_grouprec *pig; struct ifnet *ifp; struct ip_msource *ims, *nims; struct mbuf *m0, *m, *md; int error, is_filter_list_change; int minrec0len, m0srcs, msrcs, nbytes, off; int record_has_sources; int now; int type; in_addr_t naddr; uint8_t mode; IN_MULTI_LOCK_ASSERT(); error = 0; ifp = inm->inm_ifp; is_filter_list_change = 0; m = NULL; m0 = NULL; m0srcs = 0; msrcs = 0; nbytes = 0; nims = NULL; record_has_sources = 1; pig = NULL; type = IGMP_DO_NOTHING; mode = inm->inm_st[1].iss_fmode; /* * If we did not transition out of ASM mode during t0->t1, * and there are no source nodes to process, we can skip * the generation of source records. */ if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 && inm->inm_nsrc == 0) record_has_sources = 0; if (is_state_change) { /* * Queue a state change record. * If the mode did not change, and there are non-ASM * listeners or source filters present, * we potentially need to issue two records for the group. * If we are transitioning to MCAST_UNDEFINED, we need * not send any sources. * If there are ASM listeners, and there was no filter * mode transition of any kind, do nothing. */ if (mode != inm->inm_st[0].iss_fmode) { if (mode == MCAST_EXCLUDE) { CTR1(KTR_IGMPV3, "%s: change to EXCLUDE", __func__); type = IGMP_CHANGE_TO_EXCLUDE_MODE; } else { CTR1(KTR_IGMPV3, "%s: change to INCLUDE", __func__); type = IGMP_CHANGE_TO_INCLUDE_MODE; if (mode == MCAST_UNDEFINED) record_has_sources = 0; } } else { if (record_has_sources) { is_filter_list_change = 1; } else { type = IGMP_DO_NOTHING; } } } else { /* * Queue a current state record. */ if (mode == MCAST_EXCLUDE) { type = IGMP_MODE_IS_EXCLUDE; } else if (mode == MCAST_INCLUDE) { type = IGMP_MODE_IS_INCLUDE; KASSERT(inm->inm_st[1].iss_asm == 0, ("%s: inm %p is INCLUDE but ASM count is %d", __func__, inm, inm->inm_st[1].iss_asm)); } } /* * Generate the filter list changes using a separate function. */ if (is_filter_list_change) return (igmp_v3_enqueue_filter_change(ifq, inm)); if (type == IGMP_DO_NOTHING) { CTR3(KTR_IGMPV3, "%s: nothing to do for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); return (0); } /* * If any sources are present, we must be able to fit at least * one in the trailing space of the tail packet's mbuf, * ideally more. */ minrec0len = sizeof(struct igmp_grouprec); if (record_has_sources) minrec0len += sizeof(in_addr_t); CTR4(KTR_IGMPV3, "%s: queueing %s for %s/%s", __func__, igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); /* * Check if we have a packet in the tail of the queue for this * group into which the first group record for this group will fit. * Otherwise allocate a new packet. * Always allocate leading space for IP+RA_OPT+IGMP+REPORT. * Note: Group records for G/GSR query responses MUST be sent * in their own packet. */ m0 = ifq->ifq_tail; if (!is_group_query && m0 != NULL && (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) && (m0->m_pkthdr.len + minrec0len) < (ifp->if_mtu - IGMP_LEADINGSPACE)) { m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); m = m0; CTR1(KTR_IGMPV3, "%s: use existing packet", __func__); } else { if (_IF_QFULL(ifq)) { CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__); return (-ENOMEM); } m = NULL; m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); if (!is_state_change && !is_group_query) { m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); if (m) m->m_data += IGMP_LEADINGSPACE; } if (m == NULL) { m = m_gethdr(M_DONTWAIT, MT_DATA); if (m) MH_ALIGN(m, IGMP_LEADINGSPACE); } if (m == NULL) return (-ENOMEM); igmp_save_context(m, ifp); CTR1(KTR_IGMPV3, "%s: allocated first packet", __func__); } /* * Append group record. * If we have sources, we don't know how many yet. */ ig.ig_type = type; ig.ig_datalen = 0; ig.ig_numsrc = 0; ig.ig_group = inm->inm_addr; if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } nbytes += sizeof(struct igmp_grouprec); /* * Append as many sources as will fit in the first packet. * If we are appending to a new packet, the chain allocation * may potentially use clusters; use m_getptr() in this case. * If we are appending to an existing packet, we need to obtain * a pointer to the group record after m_append(), in case a new * mbuf was allocated. * Only append sources which are in-mode at t1. If we are * transitioning to MCAST_UNDEFINED state on the group, do not * include source entries. * Only report recorded sources in our filter set when responding * to a group-source query. */ if (record_has_sources) { if (m == m0) { md = m_last(m); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + md->m_len - nbytes); } else { md = m_getptr(m, 0, &off); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off); } msrcs = 0; RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) { CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, inet_ntoa_haddr(ims->ims_haddr)); now = ims_get_mode(inm, ims, 1); CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now); if ((now != mode) || (now == mode && mode == MCAST_UNDEFINED)) { CTR1(KTR_IGMPV3, "%s: skip node", __func__); continue; } if (is_source_query && ims->ims_stp == 0) { CTR1(KTR_IGMPV3, "%s: skip unrecorded node", __func__); continue; } CTR1(KTR_IGMPV3, "%s: append node", __func__); naddr = htonl(ims->ims_haddr); if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } nbytes += sizeof(in_addr_t); ++msrcs; if (msrcs == m0srcs) break; } CTR2(KTR_IGMPV3, "%s: msrcs is %d this packet", __func__, msrcs); pig->ig_numsrc = htons(msrcs); nbytes += (msrcs * sizeof(in_addr_t)); } if (is_source_query && msrcs == 0) { CTR1(KTR_IGMPV3, "%s: no recorded sources to report", __func__); if (m != m0) m_freem(m); return (0); } /* * We are good to go with first packet. */ if (m != m0) { CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__); m->m_pkthdr.PH_vt.vt_nrecs = 1; _IF_ENQUEUE(ifq, m); } else m->m_pkthdr.PH_vt.vt_nrecs++; /* * No further work needed if no source list in packet(s). */ if (!record_has_sources) return (nbytes); /* * Whilst sources remain to be announced, we need to allocate * a new packet and fill out as many sources as will fit. * Always try for a cluster first. */ while (nims != NULL) { if (_IF_QFULL(ifq)) { CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__); return (-ENOMEM); } m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); if (m) m->m_data += IGMP_LEADINGSPACE; if (m == NULL) { m = m_gethdr(M_DONTWAIT, MT_DATA); if (m) MH_ALIGN(m, IGMP_LEADINGSPACE); } if (m == NULL) return (-ENOMEM); igmp_save_context(m, ifp); md = m_getptr(m, 0, &off); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off); CTR1(KTR_IGMPV3, "%s: allocated next packet", __func__); if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } m->m_pkthdr.PH_vt.vt_nrecs = 1; nbytes += sizeof(struct igmp_grouprec); m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); msrcs = 0; RB_FOREACH_FROM(ims, ip_msource_tree, nims) { CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, inet_ntoa_haddr(ims->ims_haddr)); now = ims_get_mode(inm, ims, 1); if ((now != mode) || (now == mode && mode == MCAST_UNDEFINED)) { CTR1(KTR_IGMPV3, "%s: skip node", __func__); continue; } if (is_source_query && ims->ims_stp == 0) { CTR1(KTR_IGMPV3, "%s: skip unrecorded node", __func__); continue; } CTR1(KTR_IGMPV3, "%s: append node", __func__); naddr = htonl(ims->ims_haddr); if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } ++msrcs; if (msrcs == m0srcs) break; } pig->ig_numsrc = htons(msrcs); nbytes += (msrcs * sizeof(in_addr_t)); CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__); _IF_ENQUEUE(ifq, m); } return (nbytes); } /* * Type used to mark record pass completion. * We exploit the fact we can cast to this easily from the * current filter modes on each ip_msource node. */ typedef enum { REC_NONE = 0x00, /* MCAST_UNDEFINED */ REC_ALLOW = 0x01, /* MCAST_INCLUDE */ REC_BLOCK = 0x02, /* MCAST_EXCLUDE */ REC_FULL = REC_ALLOW | REC_BLOCK } rectype_t; /* * Enqueue an IGMPv3 filter list change to the given output queue. * * Source list filter state is held in an RB-tree. When the filter list * for a group is changed without changing its mode, we need to compute * the deltas between T0 and T1 for each source in the filter set, * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records. * * As we may potentially queue two record types, and the entire R-B tree * needs to be walked at once, we break this out into its own function * so we can generate a tightly packed queue of packets. * * XXX This could be written to only use one tree walk, although that makes * serializing into the mbuf chains a bit harder. For now we do two walks * which makes things easier on us, and it may or may not be harder on * the L2 cache. * * If successful the size of all data appended to the queue is returned, * otherwise an error code less than zero is returned, or zero if * no record(s) were appended. */ static int igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm) { static const int MINRECLEN = sizeof(struct igmp_grouprec) + sizeof(in_addr_t); struct ifnet *ifp; struct igmp_grouprec ig; struct igmp_grouprec *pig; struct ip_msource *ims, *nims; struct mbuf *m, *m0, *md; in_addr_t naddr; int m0srcs, nbytes, npbytes, off, rsrcs, schanged; int nallow, nblock; uint8_t mode, now, then; rectype_t crt, drt, nrt; IN_MULTI_LOCK_ASSERT(); if (inm->inm_nsrc == 0 || (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) return (0); ifp = inm->inm_ifp; /* interface */ mode = inm->inm_st[1].iss_fmode; /* filter mode at t1 */ crt = REC_NONE; /* current group record type */ drt = REC_NONE; /* mask of completed group record types */ nrt = REC_NONE; /* record type for current node */ m0srcs = 0; /* # source which will fit in current mbuf chain */ nbytes = 0; /* # of bytes appended to group's state-change queue */ npbytes = 0; /* # of bytes appended this packet */ rsrcs = 0; /* # sources encoded in current record */ schanged = 0; /* # nodes encoded in overall filter change */ nallow = 0; /* # of source entries in ALLOW_NEW */ nblock = 0; /* # of source entries in BLOCK_OLD */ nims = NULL; /* next tree node pointer */ /* * For each possible filter record mode. * The first kind of source we encounter tells us which * is the first kind of record we start appending. * If a node transitioned to UNDEFINED at t1, its mode is treated * as the inverse of the group's filter mode. */ while (drt != REC_FULL) { do { m0 = ifq->ifq_tail; if (m0 != NULL && (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) && (m0->m_pkthdr.len + MINRECLEN) < (ifp->if_mtu - IGMP_LEADINGSPACE)) { m = m0; m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); CTR1(KTR_IGMPV3, "%s: use previous packet", __func__); } else { m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); if (m) m->m_data += IGMP_LEADINGSPACE; if (m == NULL) { m = m_gethdr(M_DONTWAIT, MT_DATA); if (m) MH_ALIGN(m, IGMP_LEADINGSPACE); } if (m == NULL) { CTR1(KTR_IGMPV3, "%s: m_get*() failed", __func__); return (-ENOMEM); } m->m_pkthdr.PH_vt.vt_nrecs = 0; igmp_save_context(m, ifp); m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); npbytes = 0; CTR1(KTR_IGMPV3, "%s: allocated new packet", __func__); } /* * Append the IGMP group record header to the * current packet's data area. * Recalculate pointer to free space for next * group record, in case m_append() allocated * a new mbuf or cluster. */ memset(&ig, 0, sizeof(ig)); ig.ig_group = inm->inm_addr; if (!m_append(m, sizeof(ig), (void *)&ig)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed", __func__); return (-ENOMEM); } npbytes += sizeof(struct igmp_grouprec); if (m != m0) { /* new packet; offset in c hain */ md = m_getptr(m, npbytes - sizeof(struct igmp_grouprec), &off); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off); } else { /* current packet; offset from last append */ md = m_last(m); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + md->m_len - sizeof(struct igmp_grouprec)); } /* * Begin walking the tree for this record type * pass, or continue from where we left off * previously if we had to allocate a new packet. * Only report deltas in-mode at t1. * We need not report included sources as allowed * if we are in inclusive mode on the group, * however the converse is not true. */ rsrcs = 0; if (nims == NULL) nims = RB_MIN(ip_msource_tree, &inm->inm_srcs); RB_FOREACH_FROM(ims, ip_msource_tree, nims) { CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, inet_ntoa_haddr(ims->ims_haddr)); now = ims_get_mode(inm, ims, 1); then = ims_get_mode(inm, ims, 0); CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d", __func__, then, now); if (now == then) { CTR1(KTR_IGMPV3, "%s: skip unchanged", __func__); continue; } if (mode == MCAST_EXCLUDE && now == MCAST_INCLUDE) { CTR1(KTR_IGMPV3, "%s: skip IN src on EX group", __func__); continue; } nrt = (rectype_t)now; if (nrt == REC_NONE) nrt = (rectype_t)(~mode & REC_FULL); if (schanged++ == 0) { crt = nrt; } else if (crt != nrt) continue; naddr = htonl(ims->ims_haddr); if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed", __func__); return (-ENOMEM); } nallow += !!(crt == REC_ALLOW); nblock += !!(crt == REC_BLOCK); if (++rsrcs == m0srcs) break; } /* * If we did not append any tree nodes on this * pass, back out of allocations. */ if (rsrcs == 0) { npbytes -= sizeof(struct igmp_grouprec); if (m != m0) { CTR1(KTR_IGMPV3, "%s: m_free(m)", __func__); m_freem(m); } else { CTR1(KTR_IGMPV3, "%s: m_adj(m, -ig)", __func__); m_adj(m, -((int)sizeof( struct igmp_grouprec))); } continue; } npbytes += (rsrcs * sizeof(in_addr_t)); if (crt == REC_ALLOW) pig->ig_type = IGMP_ALLOW_NEW_SOURCES; else if (crt == REC_BLOCK) pig->ig_type = IGMP_BLOCK_OLD_SOURCES; pig->ig_numsrc = htons(rsrcs); /* * Count the new group record, and enqueue this * packet if it wasn't already queued. */ m->m_pkthdr.PH_vt.vt_nrecs++; if (m != m0) _IF_ENQUEUE(ifq, m); nbytes += npbytes; } while (nims != NULL); drt |= crt; crt = (~crt & REC_FULL); } CTR3(KTR_IGMPV3, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__, nallow, nblock); return (nbytes); } static int igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq) { struct ifqueue *gq; struct mbuf *m; /* pending state-change */ struct mbuf *m0; /* copy of pending state-change */ struct mbuf *mt; /* last state-change in packet */ int docopy, domerge; u_int recslen; docopy = 0; domerge = 0; recslen = 0; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); /* * If there are further pending retransmissions, make a writable * copy of each queued state-change message before merging. */ if (inm->inm_scrv > 0) docopy = 1; gq = &inm->inm_scq; #ifdef KTR if (gq->ifq_head == NULL) { CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty", __func__, inm); } #endif m = gq->ifq_head; while (m != NULL) { /* * Only merge the report into the current packet if * there is sufficient space to do so; an IGMPv3 report * packet may only contain 65,535 group records. * Always use a simple mbuf chain concatentation to do this, * as large state changes for single groups may have * allocated clusters. */ domerge = 0; mt = ifscq->ifq_tail; if (mt != NULL) { recslen = m_length(m, NULL); if ((mt->m_pkthdr.PH_vt.vt_nrecs + m->m_pkthdr.PH_vt.vt_nrecs <= IGMP_V3_REPORT_MAXRECS) && (mt->m_pkthdr.len + recslen <= (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) domerge = 1; } if (!domerge && _IF_QFULL(gq)) { CTR2(KTR_IGMPV3, "%s: outbound queue full, skipping whole packet %p", __func__, m); mt = m->m_nextpkt; if (!docopy) m_freem(m); m = mt; continue; } if (!docopy) { CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m); _IF_DEQUEUE(gq, m0); m = m0->m_nextpkt; } else { CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m); m0 = m_dup(m, M_NOWAIT); if (m0 == NULL) return (ENOMEM); m0->m_nextpkt = NULL; m = m->m_nextpkt; } if (!domerge) { CTR3(KTR_IGMPV3, "%s: queueing %p to ifscq %p)", __func__, m0, ifscq); _IF_ENQUEUE(ifscq, m0); } else { struct mbuf *mtl; /* last mbuf of packet mt */ CTR3(KTR_IGMPV3, "%s: merging %p with ifscq tail %p)", __func__, m0, mt); mtl = m_last(mt); m0->m_flags &= ~M_PKTHDR; mt->m_pkthdr.len += recslen; mt->m_pkthdr.PH_vt.vt_nrecs += m0->m_pkthdr.PH_vt.vt_nrecs; mtl->m_next = m0; } } return (0); } /* * Respond to a pending IGMPv3 General Query. */ static void igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi) { struct ifmultiaddr *ifma; struct ifnet *ifp; struct in_multi *inm; int retval, loop; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); KASSERT(igi->igi_version == IGMP_VERSION_3, ("%s: called when version %d", __func__, igi->igi_version)); ifp = igi->igi_ifp; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; inm = (struct in_multi *)ifma->ifma_protospec; KASSERT(ifp == inm->inm_ifp, ("%s: inconsistent ifp", __func__)); switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: inm->inm_state = IGMP_REPORTING_MEMBER; retval = igmp_v3_enqueue_group_record(&igi->igi_gq, inm, 0, 0, 0); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } IF_ADDR_RUNLOCK(ifp); loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop); /* * Slew transmission of bursts over 500ms intervals. */ if (igi->igi_gq.ifq_head != NULL) { igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY( IGMP_RESPONSE_BURST_INTERVAL); V_interface_timers_running = 1; } } /* * Transmit the next pending IGMP message in the output queue. * * We get called from netisr_processqueue(). A mutex private to igmpoq * will be acquired and released around this routine. * * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis. * MRT: Nothing needs to be done, as IGMP traffic is always local to * a link and uses a link-scope multicast address. */ static void igmp_intr(struct mbuf *m) { struct ip_moptions imo; struct ifnet *ifp; struct mbuf *ipopts, *m0; int error; uint32_t ifindex; CTR2(KTR_IGMPV3, "%s: transmit %p", __func__, m); /* * Set VNET image pointer from enqueued mbuf chain * before doing anything else. Whilst we use interface * indexes to guard against interface detach, they are * unique to each VIMAGE and must be retrieved. */ CURVNET_SET((struct vnet *)(m->m_pkthdr.header)); ifindex = igmp_restore_context(m); /* * Check if the ifnet still exists. This limits the scope of * any race in the absence of a global ifp lock for low cost * (an array lookup). */ ifp = ifnet_byindex(ifindex); if (ifp == NULL) { CTR3(KTR_IGMPV3, "%s: dropped %p as ifindex %u went away.", __func__, m, ifindex); m_freem(m); IPSTAT_INC(ips_noroute); goto out; } ipopts = V_igmp_sendra ? m_raopt : NULL; imo.imo_multicast_ttl = 1; imo.imo_multicast_vif = -1; imo.imo_multicast_loop = (V_ip_mrouter != NULL); /* * If the user requested that IGMP traffic be explicitly * redirected to the loopback interface (e.g. they are running a * MANET interface and the routing protocol needs to see the * updates), handle this now. */ if (m->m_flags & M_IGMP_LOOP) imo.imo_multicast_ifp = V_loif; else imo.imo_multicast_ifp = ifp; if (m->m_flags & M_IGMPV2) { m0 = m; } else { m0 = igmp_v3_encap_report(ifp, m); if (m0 == NULL) { CTR2(KTR_IGMPV3, "%s: dropped %p", __func__, m); m_freem(m); IPSTAT_INC(ips_odropped); goto out; } } igmp_scrub_context(m0); m->m_flags &= ~(M_PROTOFLAGS); m0->m_pkthdr.rcvif = V_loif; #ifdef MAC mac_netinet_igmp_send(ifp, m0); #endif error = ip_output(m0, ipopts, NULL, 0, &imo, NULL); if (error) { CTR3(KTR_IGMPV3, "%s: ip_output(%p) = %d", __func__, m0, error); goto out; } IGMPSTAT_INC(igps_snd_reports); out: /* * We must restore the existing vnet pointer before * continuing as we are run from netisr context. */ CURVNET_RESTORE(); } /* * Encapsulate an IGMPv3 report. * * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf * chain has already had its IP/IGMPv3 header prepended. In this case * the function will not attempt to prepend; the lengths and checksums * will however be re-computed. * * Returns a pointer to the new mbuf chain head, or NULL if the * allocation failed. */ static struct mbuf * igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m) { struct igmp_report *igmp; struct ip *ip; int hdrlen, igmpreclen; KASSERT((m->m_flags & M_PKTHDR), ("%s: mbuf chain %p is !M_PKTHDR", __func__, m)); igmpreclen = m_length(m, NULL); hdrlen = sizeof(struct ip) + sizeof(struct igmp_report); if (m->m_flags & M_IGMPV3_HDR) { igmpreclen -= hdrlen; } else { M_PREPEND(m, hdrlen, M_DONTWAIT); if (m == NULL) return (NULL); m->m_flags |= M_IGMPV3_HDR; } CTR2(KTR_IGMPV3, "%s: igmpreclen is %d", __func__, igmpreclen); m->m_data += sizeof(struct ip); m->m_len -= sizeof(struct ip); igmp = mtod(m, struct igmp_report *); igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT; igmp->ir_rsv1 = 0; igmp->ir_rsv2 = 0; igmp->ir_numgrps = htons(m->m_pkthdr.PH_vt.vt_nrecs); igmp->ir_cksum = 0; igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen); m->m_pkthdr.PH_vt.vt_nrecs = 0; m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); ip = mtod(m, struct ip *); ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; ip->ip_len = hdrlen + igmpreclen; ip->ip_off = IP_DF; ip->ip_p = IPPROTO_IGMP; ip->ip_sum = 0; ip->ip_src.s_addr = INADDR_ANY; if (m->m_flags & M_IGMP_LOOP) { struct in_ifaddr *ia; IFP_TO_IA(ifp, ia); if (ia != NULL) { ip->ip_src = ia->ia_addr.sin_addr; ifa_free(&ia->ia_ifa); } } ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP); return (m); } #ifdef KTR static char * igmp_rec_type_to_str(const int type) { switch (type) { case IGMP_CHANGE_TO_EXCLUDE_MODE: return "TO_EX"; break; case IGMP_CHANGE_TO_INCLUDE_MODE: return "TO_IN"; break; case IGMP_MODE_IS_EXCLUDE: return "MODE_EX"; break; case IGMP_MODE_IS_INCLUDE: return "MODE_IN"; break; case IGMP_ALLOW_NEW_SOURCES: return "ALLOW_NEW"; break; case IGMP_BLOCK_OLD_SOURCES: return "BLOCK_OLD"; break; default: break; } return "unknown"; } #endif static void igmp_init(void *unused __unused) { CTR1(KTR_IGMPV3, "%s: initializing", __func__); IGMP_LOCK_INIT(); m_raopt = igmp_ra_alloc(); netisr_register(&igmp_nh); } SYSINIT(igmp_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_init, NULL); static void igmp_uninit(void *unused __unused) { CTR1(KTR_IGMPV3, "%s: tearing down", __func__); netisr_unregister(&igmp_nh); m_free(m_raopt); m_raopt = NULL; IGMP_LOCK_DESTROY(); } SYSUNINIT(igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_uninit, NULL); static void vnet_igmp_init(const void *unused __unused) { CTR1(KTR_IGMPV3, "%s: initializing", __func__); LIST_INIT(&V_igi_head); } VNET_SYSINIT(vnet_igmp_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_igmp_init, NULL); static void vnet_igmp_uninit(const void *unused __unused) { CTR1(KTR_IGMPV3, "%s: tearing down", __func__); KASSERT(LIST_EMPTY(&V_igi_head), ("%s: igi list not empty; ifnets not detached?", __func__)); } VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_igmp_uninit, NULL); static int igmp_modevent(module_t mod, int type, void *unused __unused) { switch (type) { case MOD_LOAD: case MOD_UNLOAD: break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t igmp_mod = { "igmp", igmp_modevent, 0 }; DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); Index: stable/8/sys/netinet6/nd6_rtr.c =================================================================== --- stable/8/sys/netinet6/nd6_rtr.c (revision 281230) +++ stable/8/sys/netinet6/nd6_rtr.c (revision 281231) @@ -1,2200 +1,2208 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int rtpref(struct nd_defrouter *); static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *); static int prelist_update __P((struct nd_prefixctl *, struct nd_defrouter *, struct mbuf *, int)); static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int); static struct nd_pfxrouter *pfxrtr_lookup __P((struct nd_prefix *, struct nd_defrouter *)); static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *); static void pfxrtr_del(struct nd_pfxrouter *); static struct nd_pfxrouter *find_pfxlist_reachable_router (struct nd_prefix *); static void defrouter_delreq(struct nd_defrouter *); static void nd6_rtmsg(int, struct rtentry *); static int in6_init_prefix_ltimes(struct nd_prefix *); static void in6_init_address_ltimes __P((struct nd_prefix *, struct in6_addrlifetime *)); static int nd6_prefix_onlink(struct nd_prefix *); static int nd6_prefix_offlink(struct nd_prefix *); static int rt6_deleteroute(struct radix_node *, void *); VNET_DECLARE(int, nd6_recalc_reachtm_interval); #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) static VNET_DEFINE(struct ifnet *, nd6_defifp); VNET_DEFINE(int, nd6_defifindex); #define V_nd6_defifp VNET(nd6_defifp) VNET_DEFINE(int, ip6_use_tempaddr) = 0; VNET_DEFINE(int, ip6_desync_factor); VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME; VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME; VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE; /* RTPREF_MEDIUM has to be 0! */ #define RTPREF_HIGH 1 #define RTPREF_MEDIUM 0 #define RTPREF_LOW (-1) #define RTPREF_RESERVED (-2) #define RTPREF_INVALID (-3) /* internal */ /* * Receive Router Solicitation Message - just for routers. * Router solicitation/advertisement is mostly managed by userland program * (rtadvd) so here we have no function like nd6_ra_output(). * * Based on RFC 2461 */ void nd6_rs_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_router_solicit *nd_rs; struct in6_addr saddr6 = ip6->ip6_src; char *lladdr = NULL; int lladdrlen = 0; union nd_opts ndopts; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; /* If I'm not a router, ignore it. */ if (V_ip6_accept_rtadv != 0 || V_ip6_forwarding != 1) goto freeit; /* Sanity checks */ if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } /* * Don't update the neighbor cache, if src = ::. * This indicates that the src has no IP address assigned yet. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) goto freeit; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len); if (nd_rs == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif icmp6len -= sizeof(*nd_rs); nd6_option_init(nd_rs + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_rs_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_rs_input: lladdrlen mismatch for %s " "(if %d, RS packet %d)\n", ip6_sprintf(ip6bufs, &saddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0); freeit: m_freem(m); return; bad: ICMP6STAT_INC(icp6s_badrs); m_freem(m); } /* * Receive Router Advertisement Message. * * Based on RFC 2461 * TODO: on-link bit on prefix information * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing */ void nd6_ra_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct nd_ifinfo *ndi = ND_IFINFO(ifp); struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_router_advert *nd_ra; struct in6_addr saddr6 = ip6->ip6_src; int mcast = 0; union nd_opts ndopts; struct nd_defrouter *dr; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; /* * We only accept RAs only when * the system-wide variable allows the acceptance, and * per-interface variable allows RAs on the receiving interface. */ if (V_ip6_accept_rtadv == 0) goto freeit; if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV)) goto freeit; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) { nd6log((LOG_ERR, "nd6_ra_input: src %s is not link-local\n", ip6_sprintf(ip6bufs, &saddr6))); goto bad; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len); if (nd_ra == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif icmp6len -= sizeof(*nd_ra); nd6_option_init(nd_ra + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_ra_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } { struct nd_defrouter dr0; u_int32_t advreachable = nd_ra->nd_ra_reachable; /* remember if this is a multicasted advertisement */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) mcast = 1; bzero(&dr0, sizeof(dr0)); dr0.rtaddr = saddr6; dr0.flags = nd_ra->nd_ra_flags_reserved; dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime); dr0.expire = time_second + dr0.rtlifetime; dr0.ifp = ifp; /* unspecified or not? (RFC 2461 6.3.4) */ if (advreachable) { advreachable = ntohl(advreachable); if (advreachable <= MAX_REACHABLE_TIME && ndi->basereachable != advreachable) { ndi->basereachable = advreachable; ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable); ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */ } } if (nd_ra->nd_ra_retransmit) ndi->retrans = ntohl(nd_ra->nd_ra_retransmit); - if (nd_ra->nd_ra_curhoplimit) - ndi->chlim = nd_ra->nd_ra_curhoplimit; + if (nd_ra->nd_ra_curhoplimit) { + if (ndi->chlim < nd_ra->nd_ra_curhoplimit) + ndi->chlim = nd_ra->nd_ra_curhoplimit; + else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) { + log(LOG_ERR, "RA with a lower CurHopLimit sent from " + "%s on %s (current = %d, received = %d). " + "Ignored.\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), + if_name(ifp), ndi->chlim, nd_ra->nd_ra_curhoplimit); + } + } dr = defrtrlist_update(&dr0); } /* * prefix */ if (ndopts.nd_opts_pi) { struct nd_opt_hdr *pt; struct nd_opt_prefix_info *pi = NULL; struct nd_prefixctl pr; for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi; pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end; pt = (struct nd_opt_hdr *)((caddr_t)pt + (pt->nd_opt_len << 3))) { if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION) continue; pi = (struct nd_opt_prefix_info *)pt; if (pi->nd_opt_pi_len != 4) { nd6log((LOG_INFO, "nd6_ra_input: invalid option " "len %d for prefix information option, " "ignored\n", pi->nd_opt_pi_len)); continue; } if (128 < pi->nd_opt_pi_prefix_len) { nd6log((LOG_INFO, "nd6_ra_input: invalid prefix " "len %d for prefix information option, " "ignored\n", pi->nd_opt_pi_prefix_len)); continue; } if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix) || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) { nd6log((LOG_INFO, "nd6_ra_input: invalid prefix " "%s, ignored\n", ip6_sprintf(ip6bufs, &pi->nd_opt_pi_prefix))); continue; } bzero(&pr, sizeof(pr)); pr.ndpr_prefix.sin6_family = AF_INET6; pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix); pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix; pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif; pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved & ND_OPT_PI_FLAG_ONLINK) ? 1 : 0; pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved & ND_OPT_PI_FLAG_AUTO) ? 1 : 0; pr.ndpr_plen = pi->nd_opt_pi_prefix_len; pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time); pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time); (void)prelist_update(&pr, dr, m, mcast); } } /* * MTU */ if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) { u_long mtu; u_long maxmtu; mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu); /* lower bound */ if (mtu < IPV6_MMTU) { nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option " "mtu=%lu sent from %s, ignoring\n", mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src))); goto skip; } /* upper bound */ maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu) ? ndi->maxmtu : ifp->if_mtu; if (mtu <= maxmtu) { int change = (ndi->linkmtu != mtu); ndi->linkmtu = mtu; if (change) /* in6_maxmtu may change */ in6_setmaxmtu(); } else { nd6log((LOG_INFO, "nd6_ra_input: bogus mtu " "mtu=%lu sent from %s; " "exceeds maxmtu %lu, ignoring\n", mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu)); } } skip: /* * Source link layer address */ { char *lladdr = NULL; int lladdrlen = 0; if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_ra_input: lladdrlen mismatch for %s " "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_ADVERT, 0); /* * Installing a link-layer address might change the state of the * router's neighbor cache, which might also affect our on-link * detection of adveritsed prefixes. */ pfxlist_onlink_check(); } freeit: m_freem(m); return; bad: ICMP6STAT_INC(icp6s_badra); m_freem(m); } /* * default router list proccessing sub routines */ /* tell the change to user processes watching the routing socket. */ static void nd6_rtmsg(int cmd, struct rtentry *rt) { struct rt_addrinfo info; struct ifnet *ifp; struct ifaddr *ifa; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); ifp = rt->rt_ifp; if (ifp != NULL) { IF_ADDR_RLOCK(ifp); ifa = TAILQ_FIRST(&ifp->if_addrhead); info.rti_info[RTAX_IFP] = ifa->ifa_addr; ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; } else ifa = NULL; rt_missmsg_fib(cmd, &info, rt->rt_flags, 0, rt->rt_fibnum); if (ifa != NULL) ifa_free(ifa); } static void defrouter_addreq(struct nd_defrouter *new) { struct sockaddr_in6 def, mask, gate; struct rtentry *newrt = NULL; int s; int error; bzero(&def, sizeof(def)); bzero(&mask, sizeof(mask)); bzero(&gate, sizeof(gate)); def.sin6_len = mask.sin6_len = gate.sin6_len = sizeof(struct sockaddr_in6); def.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = new->rtaddr; s = splnet(); error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &newrt, RT_DEFAULT_FIB); if (newrt) { nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ RTFREE(newrt); } if (error == 0) new->installed = 1; splx(s); return; } struct nd_defrouter * defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp) { struct nd_defrouter *dr; TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) return (dr); } return (NULL); /* search failed */ } /* * Remove the default route for a given router. * This is just a subroutine function for defrouter_select(), and should * not be called from anywhere else. */ static void defrouter_delreq(struct nd_defrouter *dr) { struct sockaddr_in6 def, mask, gate; struct rtentry *oldrt = NULL; bzero(&def, sizeof(def)); bzero(&mask, sizeof(mask)); bzero(&gate, sizeof(gate)); def.sin6_len = mask.sin6_len = gate.sin6_len = sizeof(struct sockaddr_in6); def.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = dr->rtaddr; in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, RT_DEFAULT_FIB); if (oldrt) { nd6_rtmsg(RTM_DELETE, oldrt); RTFREE(oldrt); } dr->installed = 0; } /* * remove all default routes from default router list */ void defrouter_reset(void) { struct nd_defrouter *dr; TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) defrouter_delreq(dr); /* * XXX should we also nuke any default routers in the kernel, by * going through them by rtalloc1()? */ } void defrtrlist_del(struct nd_defrouter *dr) { struct nd_defrouter *deldr = NULL; struct nd_prefix *pr; /* * Flush all the routing table entries that use the router * as a next hop. */ if (!V_ip6_forwarding && V_ip6_accept_rtadv) /* XXX: better condition? */ rt6_flush(&dr->rtaddr, dr->ifp); if (dr->installed) { deldr = dr; defrouter_delreq(dr); } TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); /* * Also delete all the pointers to the router in each prefix lists. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { struct nd_pfxrouter *pfxrtr; if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL) pfxrtr_del(pfxrtr); } pfxlist_onlink_check(); /* * If the router is the primary one, choose a new one. * Note that defrouter_select() will remove the current gateway * from the routing table. */ if (deldr) defrouter_select(); free(dr, M_IP6NDP); } /* * Default Router Selection according to Section 6.3.6 of RFC 2461 and * draft-ietf-ipngwg-router-selection: * 1) Routers that are reachable or probably reachable should be preferred. * If we have more than one (probably) reachable router, prefer ones * with the highest router preference. * 2) When no routers on the list are known to be reachable or * probably reachable, routers SHOULD be selected in a round-robin * fashion, regardless of router preference values. * 3) If the Default Router List is empty, assume that all * destinations are on-link. * * We assume nd_defrouter is sorted by router preference value. * Since the code below covers both with and without router preference cases, * we do not need to classify the cases by ifdef. * * At this moment, we do not try to install more than one default router, * even when the multipath routing is available, because we're not sure about * the benefits for stub hosts comparing to the risk of making the code * complicated and the possibility of introducing bugs. */ void defrouter_select(void) { int s = splnet(); struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL; struct llentry *ln = NULL; /* * This function should be called only when acting as an autoconfigured * host. Although the remaining part of this function is not effective * if the node is not an autoconfigured host, we explicitly exclude * such cases here for safety. */ if (V_ip6_forwarding || !V_ip6_accept_rtadv) { nd6log((LOG_WARNING, "defrouter_select: called unexpectedly (forwarding=%d, " "accept_rtadv=%d)\n", V_ip6_forwarding, V_ip6_accept_rtadv)); splx(s); return; } /* * Let's handle easy case (3) first: * If default router list is empty, there's nothing to be done. */ if (TAILQ_EMPTY(&V_nd_defrouter)) { splx(s); return; } /* * Search for a (probably) reachable router from the list. * We just pick up the first reachable one (if any), assuming that * the ordering rule of the list described in defrtrlist_update(). */ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { IF_AFDATA_RLOCK(dr->ifp); if (selected_dr == NULL && (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln)) { selected_dr = dr; } IF_AFDATA_RUNLOCK(dr->ifp); if (ln != NULL) { LLE_RUNLOCK(ln); ln = NULL; } if (dr->installed && installed_dr == NULL) installed_dr = dr; else if (dr->installed && installed_dr) { /* this should not happen. warn for diagnosis. */ log(LOG_ERR, "defrouter_select: more than one router" " is installed\n"); } } /* * If none of the default routers was found to be reachable, * round-robin the list regardless of preference. * Otherwise, if we have an installed router, check if the selected * (reachable) router should really be preferred to the installed one. * We only prefer the new router when the old one is not reachable * or when the new one has a really higher preference value. */ if (selected_dr == NULL) { if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry)) selected_dr = TAILQ_FIRST(&V_nd_defrouter); else selected_dr = TAILQ_NEXT(installed_dr, dr_entry); } else if (installed_dr) { IF_AFDATA_RLOCK(installed_dr->ifp); if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln) && rtpref(selected_dr) <= rtpref(installed_dr)) { selected_dr = installed_dr; } IF_AFDATA_RUNLOCK(installed_dr->ifp); if (ln != NULL) LLE_RUNLOCK(ln); } /* * If the selected router is different than the installed one, * remove the installed router and install the selected one. * Note that the selected router is never NULL here. */ if (installed_dr != selected_dr) { if (installed_dr) defrouter_delreq(installed_dr); defrouter_addreq(selected_dr); } splx(s); return; } /* * for default router selection * regards router-preference field as a 2-bit signed integer */ static int rtpref(struct nd_defrouter *dr) { switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) { case ND_RA_FLAG_RTPREF_HIGH: return (RTPREF_HIGH); case ND_RA_FLAG_RTPREF_MEDIUM: case ND_RA_FLAG_RTPREF_RSV: return (RTPREF_MEDIUM); case ND_RA_FLAG_RTPREF_LOW: return (RTPREF_LOW); default: /* * This case should never happen. If it did, it would mean a * serious bug of kernel internal. We thus always bark here. * Or, can we even panic? */ log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags); return (RTPREF_INVALID); } /* NOTREACHED */ } static struct nd_defrouter * defrtrlist_update(struct nd_defrouter *new) { struct nd_defrouter *dr, *n; int s = splnet(); if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) { /* entry exists */ if (new->rtlifetime == 0) { defrtrlist_del(dr); dr = NULL; } else { int oldpref = rtpref(dr); /* override */ dr->flags = new->flags; /* xxx flag check */ dr->rtlifetime = new->rtlifetime; dr->expire = new->expire; /* * If the preference does not change, there's no need * to sort the entries. Also make sure the selected * router is still installed in the kernel. */ if (dr->installed && rtpref(new) == oldpref) { splx(s); return (dr); } /* * preferred router may be changed, so relocate * this router. * XXX: calling TAILQ_REMOVE directly is a bad manner. * However, since defrtrlist_del() has many side * effects, we intentionally do so here. * defrouter_select() below will handle routing * changes later. */ TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); n = dr; goto insert; } splx(s); return (dr); } /* entry does not exist */ if (new->rtlifetime == 0) { splx(s); return (NULL); } n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT); if (n == NULL) { splx(s); return (NULL); } bzero(n, sizeof(*n)); *n = *new; insert: /* * Insert the new router in the Default Router List; * The Default Router List should be in the descending order * of router-preferece. Routers with the same preference are * sorted in the arriving time order. */ /* insert at the end of the group */ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { if (rtpref(n) > rtpref(dr)) break; } if (dr) TAILQ_INSERT_BEFORE(dr, n, dr_entry); else TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry); defrouter_select(); splx(s); return (n); } static struct nd_pfxrouter * pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr) { struct nd_pfxrouter *search; LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) { if (search->router == dr) break; } return (search); } static void pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr) { struct nd_pfxrouter *new; new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT); if (new == NULL) return; bzero(new, sizeof(*new)); new->router = dr; LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry); pfxlist_onlink_check(); } static void pfxrtr_del(struct nd_pfxrouter *pfr) { LIST_REMOVE(pfr, pfr_entry); free(pfr, M_IP6NDP); } struct nd_prefix * nd6_prefix_lookup(struct nd_prefixctl *key) { struct nd_prefix *search; LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) { if (key->ndpr_ifp == search->ndpr_ifp && key->ndpr_plen == search->ndpr_plen && in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr, &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) { break; } } return (search); } int nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr, struct nd_prefix **newp) { struct nd_prefix *new = NULL; int error = 0; int i, s; char ip6buf[INET6_ADDRSTRLEN]; new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT); if (new == NULL) return(ENOMEM); bzero(new, sizeof(*new)); new->ndpr_ifp = pr->ndpr_ifp; new->ndpr_prefix = pr->ndpr_prefix; new->ndpr_plen = pr->ndpr_plen; new->ndpr_vltime = pr->ndpr_vltime; new->ndpr_pltime = pr->ndpr_pltime; new->ndpr_flags = pr->ndpr_flags; if ((error = in6_init_prefix_ltimes(new)) != 0) { free(new, M_IP6NDP); return(error); } new->ndpr_lastupdate = time_second; if (newp != NULL) *newp = new; /* initialization */ LIST_INIT(&new->ndpr_advrtrs); in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen); /* make prefix in the canonical form */ for (i = 0; i < 4; i++) new->ndpr_prefix.sin6_addr.s6_addr32[i] &= new->ndpr_mask.s6_addr32[i]; s = splnet(); /* link ndpr_entry to nd_prefix list */ LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry); splx(s); /* ND_OPT_PI_FLAG_ONLINK processing */ if (new->ndpr_raf_onlink) { int e; if ((e = nd6_prefix_onlink(new)) != 0) { nd6log((LOG_ERR, "nd6_prelist_add: failed to make " "the prefix %s/%d on-link on %s (errno=%d)\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* proceed anyway. XXX: is it correct? */ } } if (dr) pfxrtr_add(new, dr); return 0; } void prelist_remove(struct nd_prefix *pr) { struct nd_pfxrouter *pfr, *next; int e, s; char ip6buf[INET6_ADDRSTRLEN]; /* make sure to invalidate the prefix until it is really freed. */ pr->ndpr_vltime = 0; pr->ndpr_pltime = 0; /* * Though these flags are now meaningless, we'd rather keep the value * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users * when executing "ndp -p". */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 && (e = nd6_prefix_offlink(pr)) != 0) { nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink " "on %s, errno=%d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* what should we do? */ } if (pr->ndpr_refcnt > 0) return; /* notice here? */ s = splnet(); /* unlink ndpr_entry from nd_prefix list */ LIST_REMOVE(pr, ndpr_entry); /* free list of routers that adversed the prefix */ LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) { free(pfr, M_IP6NDP); } splx(s); free(pr, M_IP6NDP); pfxlist_onlink_check(); } /* * dr - may be NULL */ static int prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, struct mbuf *m, int mcast) { struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL; struct ifaddr *ifa; struct ifnet *ifp = new->ndpr_ifp; struct nd_prefix *pr; int s = splnet(); int error = 0; int newprefix = 0; int auth; struct in6_addrlifetime lt6_tmp; char ip6buf[INET6_ADDRSTRLEN]; auth = 0; if (m) { /* * Authenticity for NA consists authentication for * both IP header and IP datagrams, doesn't it ? */ #if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM) auth = ((m->m_flags & M_AUTHIPHDR) && (m->m_flags & M_AUTHIPDGM)); #endif } if ((pr = nd6_prefix_lookup(new)) != NULL) { /* * nd6_prefix_lookup() ensures that pr and new have the same * prefix on a same interface. */ /* * Update prefix information. Note that the on-link (L) bit * and the autonomous (A) bit should NOT be changed from 1 * to 0. */ if (new->ndpr_raf_onlink == 1) pr->ndpr_raf_onlink = 1; if (new->ndpr_raf_auto == 1) pr->ndpr_raf_auto = 1; if (new->ndpr_raf_onlink) { pr->ndpr_vltime = new->ndpr_vltime; pr->ndpr_pltime = new->ndpr_pltime; (void)in6_init_prefix_ltimes(pr); /* XXX error case? */ pr->ndpr_lastupdate = time_second; } if (new->ndpr_raf_onlink && (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { int e; if ((e = nd6_prefix_onlink(pr)) != 0) { nd6log((LOG_ERR, "prelist_update: failed to make " "the prefix %s/%d on-link on %s " "(errno=%d)\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* proceed anyway. XXX: is it correct? */ } } if (dr && pfxrtr_lookup(pr, dr) == NULL) pfxrtr_add(pr, dr); } else { struct nd_prefix *newpr = NULL; newprefix = 1; if (new->ndpr_vltime == 0) goto end; if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0) goto end; error = nd6_prelist_add(new, dr, &newpr); if (error != 0 || newpr == NULL) { nd6log((LOG_NOTICE, "prelist_update: " "nd6_prelist_add failed for %s/%d on %s " "errno=%d, returnpr=%p\n", ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr), new->ndpr_plen, if_name(new->ndpr_ifp), error, newpr)); goto end; /* we should just give up in this case. */ } /* * XXX: from the ND point of view, we can ignore a prefix * with the on-link bit being zero. However, we need a * prefix structure for references from autoconfigured * addresses. Thus, we explicitly make sure that the prefix * itself expires now. */ if (newpr->ndpr_raf_onlink == 0) { newpr->ndpr_vltime = 0; newpr->ndpr_pltime = 0; in6_init_prefix_ltimes(newpr); } pr = newpr; } /* * Address autoconfiguration based on Section 5.5.3 of RFC 2462. * Note that pr must be non NULL at this point. */ /* 5.5.3 (a). Ignore the prefix without the A bit set. */ if (!new->ndpr_raf_auto) goto end; /* * 5.5.3 (b). the link-local prefix should have been ignored in * nd6_ra_input. */ /* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */ if (new->ndpr_pltime > new->ndpr_vltime) { error = EINVAL; /* XXX: won't be used */ goto end; } /* * 5.5.3 (d). If the prefix advertised is not equal to the prefix of * an address configured by stateless autoconfiguration already in the * list of addresses associated with the interface, and the Valid * Lifetime is not 0, form an address. We first check if we have * a matching prefix. * Note: we apply a clarification in rfc2462bis-02 here. We only * consider autoconfigured addresses while RFC2462 simply said * "address". */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in6_ifaddr *ifa6; u_int32_t remaininglifetime; if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; /* * We only consider autoconfigured addresses as per rfc2462bis. */ if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF)) continue; /* * Spec is not clear here, but I believe we should concentrate * on unicast (i.e. not anycast) addresses. * XXX: other ia6_flags? detached or duplicated? */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0) continue; /* * Ignore the address if it is not associated with a prefix * or is associated with a prefix that is different from this * one. (pr is never NULL here) */ if (ifa6->ia6_ndpr != pr) continue; if (ia6_match == NULL) /* remember the first one */ ia6_match = ifa6; /* * An already autoconfigured address matched. Now that we * are sure there is at least one matched address, we can * proceed to 5.5.3. (e): update the lifetimes according to the * "two hours" rule and the privacy extension. * We apply some clarifications in rfc2462bis: * - use remaininglifetime instead of storedlifetime as a * variable name * - remove the dead code in the "two-hour" rule */ #define TWOHOUR (120*60) lt6_tmp = ifa6->ia6_lifetime; if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME) remaininglifetime = ND6_INFINITE_LIFETIME; else if (time_second - ifa6->ia6_updatetime > lt6_tmp.ia6t_vltime) { /* * The case of "invalid" address. We should usually * not see this case. */ remaininglifetime = 0; } else remaininglifetime = lt6_tmp.ia6t_vltime - (time_second - ifa6->ia6_updatetime); /* when not updating, keep the current stored lifetime. */ lt6_tmp.ia6t_vltime = remaininglifetime; if (TWOHOUR < new->ndpr_vltime || remaininglifetime < new->ndpr_vltime) { lt6_tmp.ia6t_vltime = new->ndpr_vltime; } else if (remaininglifetime <= TWOHOUR) { if (auth) { lt6_tmp.ia6t_vltime = new->ndpr_vltime; } } else { /* * new->ndpr_vltime <= TWOHOUR && * TWOHOUR < remaininglifetime */ lt6_tmp.ia6t_vltime = TWOHOUR; } /* The 2 hour rule is not imposed for preferred lifetime. */ lt6_tmp.ia6t_pltime = new->ndpr_pltime; in6_init_address_ltimes(pr, <6_tmp); /* * We need to treat lifetimes for temporary addresses * differently, according to * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1); * we only update the lifetimes when they are in the maximum * intervals. */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { u_int32_t maxvltime, maxpltime; if (V_ip6_temp_valid_lifetime > (u_int32_t)((time_second - ifa6->ia6_createtime) + V_ip6_desync_factor)) { maxvltime = V_ip6_temp_valid_lifetime - (time_second - ifa6->ia6_createtime) - V_ip6_desync_factor; } else maxvltime = 0; if (V_ip6_temp_preferred_lifetime > (u_int32_t)((time_second - ifa6->ia6_createtime) + V_ip6_desync_factor)) { maxpltime = V_ip6_temp_preferred_lifetime - (time_second - ifa6->ia6_createtime) - V_ip6_desync_factor; } else maxpltime = 0; if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME || lt6_tmp.ia6t_vltime > maxvltime) { lt6_tmp.ia6t_vltime = maxvltime; } if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME || lt6_tmp.ia6t_pltime > maxpltime) { lt6_tmp.ia6t_pltime = maxpltime; } } ifa6->ia6_lifetime = lt6_tmp; ifa6->ia6_updatetime = time_second; } IF_ADDR_RUNLOCK(ifp); if (ia6_match == NULL && new->ndpr_vltime) { int ifidlen; /* * 5.5.3 (d) (continued) * No address matched and the valid lifetime is non-zero. * Create a new address. */ /* * Prefix Length check: * If the sum of the prefix length and interface identifier * length does not equal 128 bits, the Prefix Information * option MUST be ignored. The length of the interface * identifier is defined in a separate link-type specific * document. */ ifidlen = in6_if2idlen(ifp); if (ifidlen < 0) { /* this should not happen, so we always log it. */ log(LOG_ERR, "prelist_update: IFID undefined (%s)\n", if_name(ifp)); goto end; } if (ifidlen + pr->ndpr_plen != 128) { nd6log((LOG_INFO, "prelist_update: invalid prefixlen " "%d for %s, ignored\n", pr->ndpr_plen, if_name(ifp))); goto end; } if ((ia6 = in6_ifadd(new, mcast)) != NULL) { /* * note that we should use pr (not new) for reference. */ pr->ndpr_refcnt++; ia6->ia6_ndpr = pr; /* * RFC 3041 3.3 (2). * When a new public address is created as described * in RFC2462, also create a new temporary address. * * RFC 3041 3.5. * When an interface connects to a new link, a new * randomized interface identifier should be generated * immediately together with a new set of temporary * addresses. Thus, we specifiy 1 as the 2nd arg of * in6_tmpifadd(). */ if (V_ip6_use_tempaddr) { int e; if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) { nd6log((LOG_NOTICE, "prelist_update: " "failed to create a temporary " "address, errno=%d\n", e)); } } ifa_free(&ia6->ia_ifa); /* * A newly added address might affect the status * of other addresses, so we check and update it. * XXX: what if address duplication happens? */ pfxlist_onlink_check(); } else { /* just set an error. do not bark here. */ error = EADDRNOTAVAIL; /* XXX: might be unused. */ } } end: splx(s); return error; } /* * A supplement function used in the on-link detection below; * detect if a given prefix has a (probably) reachable advertising router. * XXX: lengthy function name... */ static struct nd_pfxrouter * find_pfxlist_reachable_router(struct nd_prefix *pr) { struct nd_pfxrouter *pfxrtr; struct llentry *ln; int canreach; LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) { IF_AFDATA_RLOCK(pfxrtr->router->ifp); ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp); IF_AFDATA_RUNLOCK(pfxrtr->router->ifp); if (ln == NULL) continue; canreach = ND6_IS_LLINFO_PROBREACH(ln); LLE_RUNLOCK(ln); if (canreach) break; } return (pfxrtr); } /* * Check if each prefix in the prefix list has at least one available router * that advertised the prefix (a router is "available" if its neighbor cache * entry is reachable or probably reachable). * If the check fails, the prefix may be off-link, because, for example, * we have moved from the network but the lifetime of the prefix has not * expired yet. So we should not use the prefix if there is another prefix * that has an available router. * But, if there is no prefix that has an available router, we still regards * all the prefixes as on-link. This is because we can't tell if all the * routers are simply dead or if we really moved from the network and there * is no router around us. */ void pfxlist_onlink_check() { struct nd_prefix *pr; struct in6_ifaddr *ifa; struct nd_defrouter *dr; struct nd_pfxrouter *pfxrtr = NULL; /* * Check if there is a prefix that has a reachable advertising * router. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr)) break; } /* * If we have no such prefix, check whether we still have a router * that does not advertise any prefixes. */ if (pr == NULL) { TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { struct nd_prefix *pr0; LIST_FOREACH(pr0, &V_nd_prefix, ndpr_entry) { if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL) break; } if (pfxrtr != NULL) break; } } if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) { /* * There is at least one prefix that has a reachable router, * or at least a router which probably does not advertise * any prefixes. The latter would be the case when we move * to a new link where we have a router that does not provide * prefixes and we configure an address by hand. * Detach prefixes which have no reachable advertising * router, and attach other prefixes. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { /* XXX: a link-local prefix should never be detached */ if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; /* * we aren't interested in prefixes without the L bit * set. */ if (pr->ndpr_raf_onlink == 0) continue; if (pr->ndpr_raf_auto == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && find_pfxlist_reachable_router(pr) == NULL) pr->ndpr_stateflags |= NDPRF_DETACHED; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && find_pfxlist_reachable_router(pr) != 0) pr->ndpr_stateflags &= ~NDPRF_DETACHED; } } else { /* there is no prefix that has a reachable router */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; if (pr->ndpr_raf_onlink == 0) continue; if (pr->ndpr_raf_auto == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0) pr->ndpr_stateflags &= ~NDPRF_DETACHED; } } /* * Remove each interface route associated with a (just) detached * prefix, and reinstall the interface route for a (just) attached * prefix. Note that all attempt of reinstallation does not * necessarily success, when a same prefix is shared among multiple * interfaces. Such cases will be handled in nd6_prefix_onlink, * so we don't have to care about them. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { int e; char ip6buf[INET6_ADDRSTRLEN]; if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; if (pr->ndpr_raf_onlink == 0) continue; if (pr->ndpr_raf_auto == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { if ((e = nd6_prefix_offlink(pr)) != 0) { nd6log((LOG_ERR, "pfxlist_onlink_check: failed to " "make %s/%d offlink, errno=%d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, e)); } } if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 && pr->ndpr_raf_onlink) { if ((e = nd6_prefix_onlink(pr)) != 0) { nd6log((LOG_ERR, "pfxlist_onlink_check: failed to " "make %s/%d onlink, errno=%d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, e)); } } } /* * Changes on the prefix status might affect address status as well. * Make sure that all addresses derived from an attached prefix are * attached, and that all addresses derived from a detached prefix are * detached. Note, however, that a manually configured address should * always be attached. * The precise detection logic is same as the one for prefixes. * * XXXRW: in6_ifaddrhead locking. */ TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF)) continue; if (ifa->ia6_ndpr == NULL) { /* * This can happen when we first configure the address * (i.e. the address exists, but the prefix does not). * XXX: complicated relationships... */ continue; } if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) break; } if (ifa) { TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ifa->ia6_ndpr == NULL) /* XXX: see above. */ continue; if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) { if (ifa->ia6_flags & IN6_IFF_DETACHED) { ifa->ia6_flags &= ~IN6_IFF_DETACHED; ifa->ia6_flags |= IN6_IFF_TENTATIVE; nd6_dad_start((struct ifaddr *)ifa, 0); } } else { ifa->ia6_flags |= IN6_IFF_DETACHED; } } } else { TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ifa->ia6_flags & IN6_IFF_DETACHED) { ifa->ia6_flags &= ~IN6_IFF_DETACHED; ifa->ia6_flags |= IN6_IFF_TENTATIVE; /* Do we need a delay in this case? */ nd6_dad_start((struct ifaddr *)ifa, 0); } } } } static int nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) { static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; struct radix_node_head *rnh; struct rtentry *rt; struct sockaddr_in6 mask6; u_long rtflags; int error, a_failure, fibnum; /* * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs. * ifa->ifa_rtrequest = nd6_rtrequest; */ bzero(&mask6, sizeof(mask6)); mask6.sin6_len = sizeof(mask6); mask6.sin6_addr = pr->ndpr_mask; rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP; a_failure = 0; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { rt = NULL; error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt, fibnum); if (error == 0) { KASSERT(rt != NULL, ("%s: in6_rtrequest return no " "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__, error, pr, ifa)); rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6); /* XXX what if rhn == NULL? */ RADIX_NODE_HEAD_LOCK(rnh); RT_LOCK(rt); if (rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl) == 0) { struct sockaddr_dl *dl; dl = (struct sockaddr_dl *)rt->rt_gateway; dl->sdl_type = rt->rt_ifp->if_type; dl->sdl_index = rt->rt_ifp->if_index; } RADIX_NODE_HEAD_UNLOCK(rnh); nd6_rtmsg(RTM_ADD, rt); RT_UNLOCK(rt); pr->ndpr_stateflags |= NDPRF_ONLINK; } else { char ip6buf[INET6_ADDRSTRLEN]; char ip6bufg[INET6_ADDRSTRLEN]; char ip6bufm[INET6_ADDRSTRLEN]; struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add " "route for a prefix (%s/%d) on %s, gw=%s, mask=%s, " "flags=%lx errno = %d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), ip6_sprintf(ip6bufg, &sin6->sin6_addr), ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error)); /* Save last error to return, see rtinit(). */ a_failure = error; } if (rt != NULL) { RT_LOCK(rt); RT_REMREF(rt); RT_UNLOCK(rt); } } /* Return the last error we got. */ return (a_failure); } static int nd6_prefix_onlink(struct nd_prefix *pr) { struct ifaddr *ifa; struct ifnet *ifp = pr->ndpr_ifp; struct nd_prefix *opr; int error = 0; char ip6buf[INET6_ADDRSTRLEN]; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { nd6log((LOG_ERR, "nd6_prefix_onlink: %s/%d is already on-link\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen)); return (EEXIST); } /* * Add the interface route associated with the prefix. Before * installing the route, check if there's the same prefix on another * interface, and the prefix has already installed the interface route. * Although such a configuration is expected to be rare, we explicitly * allow it. */ LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) { if (opr == pr) continue; if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0) continue; if (opr->ndpr_plen == pr->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) return (0); } /* * We prefer link-local addresses as the associated interface address. */ /* search for a link-local addr */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST); if (ifa == NULL) { /* XXX: freebsd does not have ifa_ifwithaf */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET6) break; } if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); /* should we care about ia6_flags? */ } if (ifa == NULL) { /* * This can still happen, when, for example, we receive an RA * containing a prefix with the L bit set and the A bit clear, * after removing all IPv6 addresses on the receiving * interface. This should, of course, be rare though. */ nd6log((LOG_NOTICE, "nd6_prefix_onlink: failed to find any ifaddr" " to add route for a prefix(%s/%d) on %s\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(ifp))); return (0); } error = nd6_prefix_onlink_rtrequest(pr, ifa); if (ifa != NULL) ifa_free(ifa); return (error); } static int nd6_prefix_offlink(struct nd_prefix *pr) { int error = 0; struct ifnet *ifp = pr->ndpr_ifp; struct nd_prefix *opr; struct sockaddr_in6 sa6, mask6; struct rtentry *rt; char ip6buf[INET6_ADDRSTRLEN]; int fibnum, a_failure; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { nd6log((LOG_ERR, "nd6_prefix_offlink: %s/%d is already off-link\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen)); return (EEXIST); } bzero(&sa6, sizeof(sa6)); sa6.sin6_family = AF_INET6; sa6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr, sizeof(struct in6_addr)); bzero(&mask6, sizeof(mask6)); mask6.sin6_family = AF_INET6; mask6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); a_failure = 0; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { rt = NULL; error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, (struct sockaddr *)&mask6, 0, &rt, fibnum); if (error == 0) { /* report the route deletion to the routing socket. */ if (rt != NULL) nd6_rtmsg(RTM_DELETE, rt); } else { /* Save last error to return, see rtinit(). */ a_failure = error; } if (rt != NULL) { RTFREE(rt); } } error = a_failure; if (error == 0) { pr->ndpr_stateflags &= ~NDPRF_ONLINK; /* * There might be the same prefix on another interface, * the prefix which could not be on-link just because we have * the interface route (see comments in nd6_prefix_onlink). * If there's one, try to make the prefix on-link on the * interface. */ LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) { if (opr == pr) continue; if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0) continue; /* * KAME specific: detached prefixes should not be * on-link. */ if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0) continue; if (opr->ndpr_plen == pr->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) { int e; if ((e = nd6_prefix_onlink(opr)) != 0) { nd6log((LOG_ERR, "nd6_prefix_offlink: failed to " "recover a prefix %s/%d from %s " "to %s (errno = %d)\n", ip6_sprintf(ip6buf, &opr->ndpr_prefix.sin6_addr), opr->ndpr_plen, if_name(ifp), if_name(opr->ndpr_ifp), e)); } } } } else { /* XXX: can we still set the NDPRF_ONLINK flag? */ nd6log((LOG_ERR, "nd6_prefix_offlink: failed to delete route: " "%s/%d on %s (errno = %d)\n", ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen, if_name(ifp), error)); } return (error); } static struct in6_ifaddr * in6_ifadd(struct nd_prefixctl *pr, int mcast) { struct ifnet *ifp = pr->ndpr_ifp; struct ifaddr *ifa; struct in6_aliasreq ifra; struct in6_ifaddr *ia, *ib; int error, plen0; struct in6_addr mask; int prefixlen = pr->ndpr_plen; int updateflags; char ip6buf[INET6_ADDRSTRLEN]; in6_prefixlen2mask(&mask, prefixlen); /* * find a link-local address (will be interface ID). * Is it really mandatory? Theoretically, a global or a site-local * address can be configured without a link-local address, if we * have a unique interface identifier... * * it is not mandatory to have a link-local address, we can generate * interface identifier on the fly. we do this because: * (1) it should be the easiest way to find interface identifier. * (2) RFC2462 5.4 suggesting the use of the same interface identifier * for multiple addresses on a single interface, and possible shortcut * of DAD. we omitted DAD for this reason in the past. * (3) a user can prevent autoconfiguration of global address * by removing link-local address by hand (this is partly because we * don't have other way to control the use of IPv6 on an interface. * this has been our design choice - cf. NRL's "ifconfig auto"). * (4) it is easier to manage when an interface has addresses * with the same interface identifier, than to have multiple addresses * with different interface identifiers. */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */ if (ifa) ib = (struct in6_ifaddr *)ifa; else return NULL; /* prefixlen + ifidlen must be equal to 128 */ plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL); if (prefixlen != plen0) { ifa_free(ifa); nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s " "(prefix=%d ifid=%d)\n", if_name(ifp), prefixlen, 128 - plen0)); return NULL; } /* make ifaddr */ bzero(&ifra, sizeof(ifra)); /* * in6_update_ifa() does not use ifra_name, but we accurately set it * for safety. */ strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); ifra.ifra_addr.sin6_family = AF_INET6; ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); /* prefix */ ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr; ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0]; ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1]; ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2]; ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3]; /* interface ID */ ifra.ifra_addr.sin6_addr.s6_addr32[0] |= (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]); ifra.ifra_addr.sin6_addr.s6_addr32[1] |= (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]); ifra.ifra_addr.sin6_addr.s6_addr32[2] |= (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]); ifra.ifra_addr.sin6_addr.s6_addr32[3] |= (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]); ifa_free(ifa); /* new prefix mask. */ ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_prefixmask.sin6_family = AF_INET6; bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr, sizeof(ifra.ifra_prefixmask.sin6_addr)); /* lifetimes. */ ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime; ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime; /* XXX: scope zone ID? */ ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */ /* * Make sure that we do not have this address already. This should * usually not happen, but we can still see this case, e.g., if we * have manually configured the exact address to be configured. */ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); if (ifa != NULL) { ifa_free(ifa); /* this should be rare enough to make an explicit log */ log(LOG_INFO, "in6_ifadd: %s is already configured\n", ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr)); return (NULL); } /* * Allocate ifaddr structure, link into chain, etc. * If we are going to create a new address upon receiving a multicasted * RA, we need to impose a random delay before starting DAD. * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2] */ updateflags = 0; if (mcast) updateflags |= IN6_IFAUPDATE_DADDELAY; if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) { nd6log((LOG_ERR, "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n", ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr), if_name(ifp), error)); return (NULL); /* ifaddr must not have been allocated. */ } ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); /* * XXXRW: Assumption of non-NULLness here might not be true with * fine-grained locking -- should we validate it? Or just return * earlier ifa rather than looking it up again? */ return (ia); /* this is always non-NULL and referenced. */ } /* * ia0 - corresponding public address */ int in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay) { struct ifnet *ifp = ia0->ia_ifa.ifa_ifp; struct in6_ifaddr *newia, *ia; struct in6_aliasreq ifra; int i, error; int trylimit = 3; /* XXX: adhoc value */ int updateflags; u_int32_t randid[2]; time_t vltime0, pltime0; bzero(&ifra, sizeof(ifra)); strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); ifra.ifra_addr = ia0->ia_addr; /* copy prefix mask */ ifra.ifra_prefixmask = ia0->ia_prefixmask; /* clear the old IFID */ for (i = 0; i < 4; i++) { ifra.ifra_addr.sin6_addr.s6_addr32[i] &= ifra.ifra_prefixmask.sin6_addr.s6_addr32[i]; } again: if (in6_get_tmpifid(ifp, (u_int8_t *)randid, (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) { nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good " "random IFID\n")); return (EINVAL); } ifra.ifra_addr.sin6_addr.s6_addr32[2] |= (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2])); ifra.ifra_addr.sin6_addr.s6_addr32[3] |= (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3])); /* * in6_get_tmpifid() quite likely provided a unique interface ID. * However, we may still have a chance to see collision, because * there may be a time lag between generation of the ID and generation * of the address. So, we'll do one more sanity check. */ IN6_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &ifra.ifra_addr.sin6_addr)) { if (trylimit-- == 0) { IN6_IFADDR_RUNLOCK(); /* * Give up. Something strange should have * happened. */ nd6log((LOG_NOTICE, "in6_tmpifadd: failed to " "find a unique random IFID\n")); return (EEXIST); } IN6_IFADDR_RUNLOCK(); forcegen = 1; goto again; } } IN6_IFADDR_RUNLOCK(); /* * The Valid Lifetime is the lower of the Valid Lifetime of the * public address or TEMP_VALID_LIFETIME. * The Preferred Lifetime is the lower of the Preferred Lifetime * of the public address or TEMP_PREFERRED_LIFETIME - * DESYNC_FACTOR. */ if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { vltime0 = IFA6_IS_INVALID(ia0) ? 0 : (ia0->ia6_lifetime.ia6t_vltime - (time_second - ia0->ia6_updatetime)); if (vltime0 > V_ip6_temp_valid_lifetime) vltime0 = V_ip6_temp_valid_lifetime; } else vltime0 = V_ip6_temp_valid_lifetime; if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 : (ia0->ia6_lifetime.ia6t_pltime - (time_second - ia0->ia6_updatetime)); if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){ pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor; } } else pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor; ifra.ifra_lifetime.ia6t_vltime = vltime0; ifra.ifra_lifetime.ia6t_pltime = pltime0; /* * A temporary address is created only if this calculated Preferred * Lifetime is greater than REGEN_ADVANCE time units. */ if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance) return (0); /* XXX: scope zone ID? */ ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY); /* allocate ifaddr structure, link into chain, etc. */ updateflags = 0; if (delay) updateflags |= IN6_IFAUPDATE_DADDELAY; if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) return (error); newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); if (newia == NULL) { /* XXX: can it happen? */ nd6log((LOG_ERR, "in6_tmpifadd: ifa update succeeded, but we got " "no ifaddr\n")); return (EINVAL); /* XXX */ } newia->ia6_ndpr = ia0->ia6_ndpr; newia->ia6_ndpr->ndpr_refcnt++; ifa_free(&newia->ia_ifa); /* * A newly added address might affect the status of other addresses. * XXX: when the temporary address is generated with a new public * address, the onlink check is redundant. However, it would be safe * to do the check explicitly everywhere a new address is generated, * and, in fact, we surely need the check when we create a new * temporary address due to deprecation of an old temporary address. */ pfxlist_onlink_check(); return (0); } static int in6_init_prefix_ltimes(struct nd_prefix *ndpr) { if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_preferred = 0; else ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime; if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_expire = 0; else ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime; return 0; } static void in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) { /* init ia6t_expire */ if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME) lt6->ia6t_expire = 0; else { lt6->ia6t_expire = time_second; lt6->ia6t_expire += lt6->ia6t_vltime; } /* init ia6t_preferred */ if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME) lt6->ia6t_preferred = 0; else { lt6->ia6t_preferred = time_second; lt6->ia6t_preferred += lt6->ia6t_pltime; } } /* * Delete all the routing table entries that use the specified gateway. * XXX: this function causes search through all entries of routing table, so * it shouldn't be called when acting as a router. */ void rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) { struct radix_node_head *rnh; u_int fibnum; int s = splnet(); /* We'll care only link-local addresses */ if (!IN6_IS_ADDR_LINKLOCAL(gateway)) { splx(s); return; } /* XXX Do we really need to walk any but the default FIB? */ for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { rnh = rt_tables_get_rnh(fibnum, AF_INET6); if (rnh == NULL) continue; RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); RADIX_NODE_HEAD_UNLOCK(rnh); } splx(s); } static int rt6_deleteroute(struct radix_node *rn, void *arg) { #define SIN6(s) ((struct sockaddr_in6 *)s) struct rtentry *rt = (struct rtentry *)rn; struct in6_addr *gate = (struct in6_addr *)arg; if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) return (0); if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) { return (0); } /* * Do not delete a static route. * XXX: this seems to be a bit ad-hoc. Should we consider the * 'cloned' bit instead? */ if ((rt->rt_flags & RTF_STATIC) != 0) return (0); /* * We delete only host route. This means, in particular, we don't * delete default route. */ if ((rt->rt_flags & RTF_HOST) == 0) return (0); return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum)); #undef SIN6 } int nd6_setdefaultiface(int ifindex) { int error = 0; if (ifindex < 0 || V_if_index < ifindex) return (EINVAL); if (ifindex != 0 && !ifnet_byindex(ifindex)) return (EINVAL); if (V_nd6_defifindex != ifindex) { V_nd6_defifindex = ifindex; if (V_nd6_defifindex > 0) V_nd6_defifp = ifnet_byindex(V_nd6_defifindex); else V_nd6_defifp = NULL; /* * Our current implementation assumes one-to-one maping between * interfaces and links, so it would be natural to use the * default interface as the default link. */ scope6_setdefault(V_nd6_defifp); } return (error); } Index: stable/9/contrib/ntp/ntpd/ntp_crypto.c =================================================================== --- stable/9/contrib/ntp/ntpd/ntp_crypto.c (revision 281230) +++ stable/9/contrib/ntp/ntpd/ntp_crypto.c (revision 281231) @@ -1,4201 +1,4235 @@ /* * ntp_crypto.c - NTP version 4 public key routines */ #ifdef HAVE_CONFIG_H #include #endif #ifdef OPENSSL #include #include #include #include #include #include "ntpd.h" #include "ntp_stdlib.h" #include "ntp_unixtime.h" #include "ntp_string.h" #include #include "openssl/asn1_mac.h" #include "openssl/bn.h" #include "openssl/err.h" #include "openssl/evp.h" #include "openssl/pem.h" #include "openssl/rand.h" #include "openssl/x509v3.h" #ifdef KERNEL_PLL #include "ntp_syscall.h" #endif /* KERNEL_PLL */ /* * Extension field message format * * These are always signed and saved before sending in network byte * order. They must be converted to and from host byte order for * processing. * * +-------+-------+ * | op | len | <- extension pointer * +-------+-------+ * | assocID | * +---------------+ * | timestamp | <- value pointer * +---------------+ * | filestamp | * +---------------+ * | value len | * +---------------+ * | | * = value = * | | * +---------------+ * | signature len | * +---------------+ * | | * = signature = * | | * +---------------+ * * The CRYPTO_RESP bit is set to 0 for requests, 1 for responses. * Requests carry the association ID of the receiver; responses carry * the association ID of the sender. Some messages include only the * operation/length and association ID words and so have length 8 * octets. Ohers include the value structure and associated value and * signature fields. These messages include the timestamp, filestamp, * value and signature words and so have length at least 24 octets. The * signature and/or value fields can be empty, in which case the * respective length words are zero. An empty value with nonempty * signature is syntactically valid, but semantically questionable. * * The filestamp represents the time when a cryptographic data file such * as a public/private key pair is created. It follows every reference * depending on that file and serves as a means to obsolete earlier data * of the same type. The timestamp represents the time when the * cryptographic data of the message were last signed. Creation of a * cryptographic data file or signing a message can occur only when the * creator or signor is synchronized to an authoritative source and * proventicated to a trusted authority. * * Note there are four conditions required for server trust. First, the * public key on the certificate must be verified, which involves a * number of format, content and consistency checks. Next, the server * identity must be confirmed by one of four schemes: private * certificate, IFF scheme, GQ scheme or certificate trail hike to a * self signed trusted certificate. Finally, the server signature must * be verified. */ /* * Cryptodefines */ #define TAI_1972 10 /* initial TAI offset (s) */ #define MAX_LEAP 100 /* max UTC leapseconds (s) */ #define VALUE_LEN (6 * 4) /* min response field length */ +#define MAX_VALLEN (65535 - VALUE_LEN) #define YEAR (60 * 60 * 24 * 365) /* seconds in year */ /* * Global cryptodata in host byte order */ u_int32 crypto_flags = 0x0; /* status word */ /* * Global cryptodata in network byte order */ struct cert_info *cinfo = NULL; /* certificate info/value */ struct value hostval; /* host value */ struct value pubkey; /* public key */ struct value tai_leap; /* leapseconds table */ EVP_PKEY *iffpar_pkey = NULL; /* IFF parameters */ EVP_PKEY *gqpar_pkey = NULL; /* GQ parameters */ EVP_PKEY *mvpar_pkey = NULL; /* MV parameters */ char *iffpar_file = NULL; /* IFF parameters file */ char *gqpar_file = NULL; /* GQ parameters file */ char *mvpar_file = NULL; /* MV parameters file */ /* * Private cryptodata in host byte order */ static char *passwd = NULL; /* private key password */ static EVP_PKEY *host_pkey = NULL; /* host key */ static EVP_PKEY *sign_pkey = NULL; /* sign key */ static const EVP_MD *sign_digest = NULL; /* sign digest */ static u_int sign_siglen; /* sign key length */ static char *rand_file = NULL; /* random seed file */ static char *host_file = NULL; /* host key file */ static char *sign_file = NULL; /* sign key file */ static char *cert_file = NULL; /* certificate file */ static char *leap_file = NULL; /* leapseconds file */ static tstamp_t if_fstamp = 0; /* IFF filestamp */ static tstamp_t gq_fstamp = 0; /* GQ file stamp */ static tstamp_t mv_fstamp = 0; /* MV filestamp */ static u_int ident_scheme = 0; /* server identity scheme */ /* * Cryptotypes */ static int crypto_verify P((struct exten *, struct value *, struct peer *)); -static int crypto_encrypt P((struct exten *, struct value *, - keyid_t *)); +static int crypto_encrypt P((const u_char *, u_int, keyid_t *, + struct value *)); static int crypto_alice P((struct peer *, struct value *)); static int crypto_alice2 P((struct peer *, struct value *)); static int crypto_alice3 P((struct peer *, struct value *)); static int crypto_bob P((struct exten *, struct value *)); static int crypto_bob2 P((struct exten *, struct value *)); static int crypto_bob3 P((struct exten *, struct value *)); static int crypto_iff P((struct exten *, struct peer *)); static int crypto_gq P((struct exten *, struct peer *)); static int crypto_mv P((struct exten *, struct peer *)); static u_int crypto_send P((struct exten *, struct value *)); static tstamp_t crypto_time P((void)); static u_long asn2ntp P((ASN1_TIME *)); static struct cert_info *cert_parse P((u_char *, u_int, tstamp_t)); static int cert_sign P((struct exten *, struct value *)); static int cert_valid P((struct cert_info *, EVP_PKEY *)); static int cert_install P((struct exten *, struct peer *)); static void cert_free P((struct cert_info *)); static EVP_PKEY *crypto_key P((char *, tstamp_t *)); static int bighash P((BIGNUM *, BIGNUM *)); static struct cert_info *crypto_cert P((char *)); static void crypto_tai P((char *)); #ifdef SYS_WINNT int readlink(char * link, char * file, int len) { return (-1); } #endif /* * session_key - generate session key * * This routine generates a session key from the source address, * destination address, key ID and private value. The value of the * session key is the MD5 hash of these values, while the next key ID is * the first four octets of the hash. * * Returns the next key ID */ keyid_t session_key( struct sockaddr_storage *srcadr, /* source address */ struct sockaddr_storage *dstadr, /* destination address */ keyid_t keyno, /* key ID */ keyid_t private, /* private value */ u_long lifetime /* key lifetime */ ) { EVP_MD_CTX ctx; /* message digest context */ u_char dgst[EVP_MAX_MD_SIZE]; /* message digest */ keyid_t keyid; /* key identifer */ u_int32 header[10]; /* data in network byte order */ u_int hdlen, len; if (!dstadr) return 0; /* * Generate the session key and key ID. If the lifetime is * greater than zero, install the key and call it trusted. */ hdlen = 0; switch(srcadr->ss_family) { case AF_INET: header[0] = ((struct sockaddr_in *)srcadr)->sin_addr.s_addr; header[1] = ((struct sockaddr_in *)dstadr)->sin_addr.s_addr; header[2] = htonl(keyno); header[3] = htonl(private); hdlen = 4 * sizeof(u_int32); break; case AF_INET6: memcpy(&header[0], &GET_INADDR6(*srcadr), sizeof(struct in6_addr)); memcpy(&header[4], &GET_INADDR6(*dstadr), sizeof(struct in6_addr)); header[8] = htonl(keyno); header[9] = htonl(private); hdlen = 10 * sizeof(u_int32); break; } EVP_DigestInit(&ctx, EVP_md5()); EVP_DigestUpdate(&ctx, (u_char *)header, hdlen); EVP_DigestFinal(&ctx, dgst, &len); memcpy(&keyid, dgst, 4); keyid = ntohl(keyid); if (lifetime != 0) { MD5auth_setkey(keyno, dgst, len); authtrust(keyno, lifetime); } #ifdef DEBUG if (debug > 1) printf( "session_key: %s > %s %08x %08x hash %08x life %lu\n", stoa(srcadr), stoa(dstadr), keyno, private, keyid, lifetime); #endif return (keyid); } /* * make_keylist - generate key list * * Returns * XEVNT_OK success * XEVNT_PER host certificate expired * * This routine constructs a pseudo-random sequence by repeatedly * hashing the session key starting from a given source address, * destination address, private value and the next key ID of the * preceeding session key. The last entry on the list is saved along * with its sequence number and public signature. */ int make_keylist( struct peer *peer, /* peer structure pointer */ struct interface *dstadr /* interface */ ) { EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; /* NTP timestamp */ struct autokey *ap; /* autokey pointer */ struct value *vp; /* value pointer */ keyid_t keyid = 0; /* next key ID */ keyid_t cookie; /* private value */ u_long lifetime; u_int len, mpoll; int i; if (!dstadr) return XEVNT_OK; /* * Allocate the key list if necessary. */ tstamp = crypto_time(); if (peer->keylist == NULL) peer->keylist = emalloc(sizeof(keyid_t) * NTP_MAXSESSION); /* * Generate an initial key ID which is unique and greater than * NTP_MAXKEY. */ while (1) { keyid = (ntp_random() + NTP_MAXKEY + 1) & ((1 << sizeof(keyid_t)) - 1); if (authhavekey(keyid)) continue; break; } /* * Generate up to NTP_MAXSESSION session keys. Stop if the * next one would not be unique or not a session key ID or if * it would expire before the next poll. The private value * included in the hash is zero if broadcast mode, the peer * cookie if client mode or the host cookie if symmetric modes. */ mpoll = 1 << min(peer->ppoll, peer->hpoll); lifetime = min(sys_automax, NTP_MAXSESSION * mpoll); if (peer->hmode == MODE_BROADCAST) cookie = 0; else cookie = peer->pcookie; for (i = 0; i < NTP_MAXSESSION; i++) { peer->keylist[i] = keyid; peer->keynumber = i; keyid = session_key(&dstadr->sin, &peer->srcadr, keyid, cookie, lifetime); lifetime -= mpoll; if (auth_havekey(keyid) || keyid <= NTP_MAXKEY || lifetime <= mpoll) break; } /* * Save the last session key ID, sequence number and timestamp, * then sign these values for later retrieval by the clients. Be * careful not to use invalid key media. Use the public values * timestamp as filestamp. */ vp = &peer->sndval; if (vp->ptr == NULL) vp->ptr = emalloc(sizeof(struct autokey)); ap = (struct autokey *)vp->ptr; ap->seq = htonl(peer->keynumber); ap->key = htonl(keyid); vp->tstamp = htonl(tstamp); vp->fstamp = hostval.tstamp; vp->vallen = htonl(sizeof(struct autokey)); vp->siglen = 0; if (tstamp != 0) { if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); if (vp->sig == NULL) vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)vp, 12); EVP_SignUpdate(&ctx, vp->ptr, sizeof(struct autokey)); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); else msyslog(LOG_ERR, "make_keys %s\n", ERR_error_string(ERR_get_error(), NULL)); peer->flags |= FLAG_ASSOC; } #ifdef DEBUG if (debug) printf("make_keys: %d %08x %08x ts %u fs %u poll %d\n", ntohl(ap->seq), ntohl(ap->key), cookie, ntohl(vp->tstamp), ntohl(vp->fstamp), peer->hpoll); #endif return (XEVNT_OK); } /* * crypto_recv - parse extension fields * * This routine is called when the packet has been matched to an * association and passed sanity, format and MAC checks. We believe the * extension field values only if the field has proper format and * length, the timestamp and filestamp are valid and the signature has * valid length and is verified. There are a few cases where some values * are believed even if the signature fails, but only if the proventic * bit is not set. */ int crypto_recv( struct peer *peer, /* peer structure pointer */ struct recvbuf *rbufp /* packet buffer pointer */ ) { const EVP_MD *dp; /* message digest algorithm */ u_int32 *pkt; /* receive packet pointer */ struct autokey *ap, *bp; /* autokey pointer */ struct exten *ep, *fp; /* extension pointers */ int has_mac; /* length of MAC field */ int authlen; /* offset of MAC field */ associd_t associd; /* association ID */ tstamp_t tstamp = 0; /* timestamp */ tstamp_t fstamp = 0; /* filestamp */ u_int len; /* extension field length */ u_int code; /* extension field opcode */ u_int vallen = 0; /* value length */ X509 *cert; /* X509 certificate */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ keyid_t cookie; /* crumbles */ int hismode; /* packet mode */ int rval = XEVNT_OK; u_char *ptr; u_int32 temp32; /* * Initialize. Note that the packet has already been checked for * valid format and extension field lengths. First extract the * field length, command code and association ID in host byte * order. These are used with all commands and modes. Then check * the version number, which must be 2, and length, which must * be at least 8 for requests and VALUE_LEN (24) for responses. * Packets that fail either test sink without a trace. The * association ID is saved only if nonzero. */ authlen = LEN_PKT_NOMAC; hismode = (int)PKT_MODE((&rbufp->recv_pkt)->li_vn_mode); while ((has_mac = rbufp->recv_length - authlen) > MAX_MAC_LEN) { pkt = (u_int32 *)&rbufp->recv_pkt + authlen / 4; ep = (struct exten *)pkt; code = ntohl(ep->opcode) & 0xffff0000; len = ntohl(ep->opcode) & 0x0000ffff; associd = (associd_t) ntohl(pkt[1]); rval = XEVNT_OK; #ifdef DEBUG if (debug) printf( "crypto_recv: flags 0x%x ext offset %d len %u code 0x%x assocID %d\n", peer->crypto, authlen, len, code >> 16, associd); #endif /* * Check version number and field length. If bad, * quietly ignore the packet. */ if (((code >> 24) & 0x3f) != CRYPTO_VN || len < 8) { sys_unknownversion++; code |= CRYPTO_ERROR; } /* * Little vulnerability bandage here. If a perp tosses a * fake association ID over the fence, we better toss it * out. Only the first one counts. */ if (code & CRYPTO_RESP) { if (peer->assoc == 0) peer->assoc = associd; else if (peer->assoc != associd) code |= CRYPTO_ERROR; } if (len >= VALUE_LEN) { tstamp = ntohl(ep->tstamp); fstamp = ntohl(ep->fstamp); vallen = ntohl(ep->vallen); + /* + * Bug 2761: I hope this isn't too early... + */ + if ( vallen == 0 + || len - VALUE_LEN < vallen) + return XEVNT_LEN; } switch (code) { /* * Install status word, host name, signature scheme and * association ID. In OpenSSL the signature algorithm is * bound to the digest algorithm, so the NID completely * defines the signature scheme. Note the request and * response are identical, but neither is validated by * signature. The request is processed here only in * symmetric modes. The server name field might be * useful to implement access controls in future. */ case CRYPTO_ASSOC: /* * If the machine is running when this message * arrives, the other fellow has reset and so * must we. Otherwise, pass the extension field * to the transmit side. */ if (peer->crypto) { rval = XEVNT_ERR; break; } fp = emalloc(len); memcpy(fp, ep, len); temp32 = CRYPTO_RESP; fp->opcode |= htonl(temp32); peer->cmmd = fp; /* fall through */ case CRYPTO_ASSOC | CRYPTO_RESP: /* * Discard the message if it has already been * stored or the message has been amputated. */ if (peer->crypto) break; if (vallen == 0 || vallen > MAXHOSTNAME || - len < VALUE_LEN + vallen) { + len - VALUE_LEN < vallen) { rval = XEVNT_LEN; break; } /* * Check the identity schemes are compatible. If * the client has PC, the server must have PC, * in which case the server public key and * identity are presumed valid, so we skip the * certificate and identity exchanges and move * immediately to the cookie exchange which * confirms the server signature. */ #ifdef DEBUG if (debug) printf( "crypto_recv: ident host 0x%x server 0x%x\n", crypto_flags, fstamp); #endif temp32 = (crypto_flags | ident_scheme) & fstamp & CRYPTO_FLAG_MASK; if (crypto_flags & CRYPTO_FLAG_PRIV) { if (!(fstamp & CRYPTO_FLAG_PRIV)) { rval = XEVNT_KEY; break; } else { fstamp |= CRYPTO_FLAG_VALID | CRYPTO_FLAG_VRFY | CRYPTO_FLAG_SIGN; } /* * In symmetric modes it is an error if either * peer requests identity and the other peer * does not support it. */ } else if ((hismode == MODE_ACTIVE || hismode == MODE_PASSIVE) && ((crypto_flags | fstamp) & CRYPTO_FLAG_MASK) && !temp32) { rval = XEVNT_KEY; break; /* * It is an error if the client requests * identity and the server does not support it. */ } else if (hismode == MODE_CLIENT && (fstamp & CRYPTO_FLAG_MASK) && !temp32) { rval = XEVNT_KEY; break; } /* * Otherwise, the identity scheme(s) are those * that both client and server support. */ fstamp = temp32 | (fstamp & ~CRYPTO_FLAG_MASK); /* * Discard the message if the signature digest * NID is not supported. */ temp32 = (fstamp >> 16) & 0xffff; dp = (const EVP_MD *)EVP_get_digestbynid(temp32); if (dp == NULL) { rval = XEVNT_MD; break; } /* * Save status word, host name and message * digest/signature type. */ peer->crypto = fstamp; peer->digest = dp; peer->subject = emalloc(vallen + 1); memcpy(peer->subject, ep->pkt, vallen); peer->subject[vallen] = '\0'; peer->issuer = emalloc(vallen + 1); strcpy(peer->issuer, peer->subject); temp32 = (fstamp >> 16) & 0xffff; snprintf(statstr, NTP_MAXSTRLEN, "flags 0x%x host %s signature %s", fstamp, peer->subject, OBJ_nid2ln(temp32)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Decode X509 certificate in ASN.1 format and extract * the data containing, among other things, subject * name and public key. In the default identification * scheme, the certificate trail is followed to a self * signed trusted certificate. */ case CRYPTO_CERT | CRYPTO_RESP: /* * Discard the message if invalid. */ if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * Scan the certificate list to delete old * versions and link the newest version first on * the list. */ if ((rval = cert_install(ep, peer)) != XEVNT_OK) break; /* * If we snatch the certificate before the * server certificate has been signed by its * server, it will be self signed. When it is, * we chase the certificate issuer, which the * server has, and keep going until a self * signed trusted certificate is found. Be sure * to update the issuer field, since it may * change. */ if (peer->issuer != NULL) free(peer->issuer); peer->issuer = emalloc(strlen(cinfo->issuer) + 1); strcpy(peer->issuer, cinfo->issuer); /* * We plug in the public key and lifetime from * the first certificate received. However, note * that this certificate might not be signed by * the server, so we can't check the * signature/digest NID. */ if (peer->pkey == NULL) { ptr = (u_char *)cinfo->cert.ptr; cert = d2i_X509(NULL, &ptr, ntohl(cinfo->cert.vallen)); peer->pkey = X509_get_pubkey(cert); X509_free(cert); } peer->flash &= ~TEST8; temp32 = cinfo->nid; snprintf(statstr, NTP_MAXSTRLEN, "cert %s 0x%x %s (%u) fs %u", cinfo->subject, cinfo->flags, OBJ_nid2ln(temp32), temp32, ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Schnorr (IFF)identity scheme. This scheme is designed * for use with shared secret group keys and where the * certificate may be generated by a third party. The * client sends a challenge to the server, which * performs a calculation and returns the result. A * positive result is possible only if both client and * server contain the same secret group key. */ case CRYPTO_IFF | CRYPTO_RESP: /* * Discard the message if invalid or certificate * trail not trusted. */ if (!(peer->crypto & CRYPTO_FLAG_VALID)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * If the the challenge matches the response, * the certificate public key, as well as the * server public key, signatyre and identity are * all verified at the same time. The server is * declared trusted, so we skip further * certificate stages and move immediately to * the cookie stage. */ if ((rval = crypto_iff(ep, peer)) != XEVNT_OK) break; peer->crypto |= CRYPTO_FLAG_VRFY | CRYPTO_FLAG_PROV; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "iff fs %u", ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Guillou-Quisquater (GQ) identity scheme. This scheme * is designed for use with public certificates carrying * the GQ public key in an extension field. The client * sends a challenge to the server, which performs a * calculation and returns the result. A positive result * is possible only if both client and server contain * the same group key and the server has the matching GQ * private key. */ case CRYPTO_GQ | CRYPTO_RESP: /* * Discard the message if invalid or certificate * trail not trusted. */ if (!(peer->crypto & CRYPTO_FLAG_VALID)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * If the the challenge matches the response, * the certificate public key, as well as the * server public key, signatyre and identity are * all verified at the same time. The server is * declared trusted, so we skip further * certificate stages and move immediately to * the cookie stage. */ if ((rval = crypto_gq(ep, peer)) != XEVNT_OK) break; peer->crypto |= CRYPTO_FLAG_VRFY | CRYPTO_FLAG_PROV; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "gq fs %u", ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * MV */ case CRYPTO_MV | CRYPTO_RESP: /* * Discard the message if invalid or certificate * trail not trusted. */ if (!(peer->crypto & CRYPTO_FLAG_VALID)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * If the the challenge matches the response, * the certificate public key, as well as the * server public key, signatyre and identity are * all verified at the same time. The server is * declared trusted, so we skip further * certificate stages and move immediately to * the cookie stage. */ if ((rval = crypto_mv(ep, peer)) != XEVNT_OK) break; peer->crypto |= CRYPTO_FLAG_VRFY | CRYPTO_FLAG_PROV; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "mv fs %u", ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Cookie request in symmetric modes. Roll a random * cookie and install in symmetric mode. Encrypt for the * response, which is transmitted later. */ case CRYPTO_COOK: /* * Discard the message if invalid or certificate * trail not trusted. */ if (!(peer->crypto & CRYPTO_FLAG_VALID)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * Pass the extension field to the transmit * side. If already agreed, walk away. */ fp = emalloc(len); memcpy(fp, ep, len); temp32 = CRYPTO_RESP; fp->opcode |= htonl(temp32); peer->cmmd = fp; if (peer->crypto & CRYPTO_FLAG_AGREE) { peer->flash &= ~TEST8; break; } /* * Install cookie values and light the cookie * bit. The transmit side will pick up and * encrypt it for the response. */ key_expire(peer); peer->cookval.tstamp = ep->tstamp; peer->cookval.fstamp = ep->fstamp; RAND_bytes((u_char *)&peer->pcookie, 4); peer->crypto &= ~CRYPTO_FLAG_AUTO; peer->crypto |= CRYPTO_FLAG_AGREE; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "cook %x ts %u fs %u", peer->pcookie, ntohl(ep->tstamp), ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Cookie response in client and symmetric modes. If the * cookie bit is set, the working cookie is the EXOR of * the current and new values. */ case CRYPTO_COOK | CRYPTO_RESP: /* * Discard the message if invalid or identity * not confirmed or signature not verified with * respect to the cookie values. */ if (!(peer->crypto & CRYPTO_FLAG_VRFY)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, &peer->cookval, peer)) != XEVNT_OK) break; /* * Decrypt the cookie, hunting all the time for * errors. */ if (vallen == (u_int) EVP_PKEY_size(host_pkey)) { u_int32 *cookiebuf = malloc( RSA_size(host_pkey->pkey.rsa)); if (cookiebuf == NULL) { rval = XEVNT_CKY; break; } if (RSA_private_decrypt(vallen, (u_char *)ep->pkt, (u_char *)cookiebuf, host_pkey->pkey.rsa, RSA_PKCS1_OAEP_PADDING) != 4) { rval = XEVNT_CKY; free(cookiebuf); break; } else { cookie = ntohl(*cookiebuf); free(cookiebuf); } } else { rval = XEVNT_CKY; break; } /* * Install cookie values and light the cookie * bit. If this is not broadcast client mode, we * are done here. */ key_expire(peer); peer->cookval.tstamp = ep->tstamp; peer->cookval.fstamp = ep->fstamp; if (peer->crypto & CRYPTO_FLAG_AGREE) peer->pcookie ^= cookie; else peer->pcookie = cookie; if (peer->hmode == MODE_CLIENT && !(peer->cast_flags & MDF_BCLNT)) peer->crypto |= CRYPTO_FLAG_AUTO; else peer->crypto &= ~CRYPTO_FLAG_AUTO; peer->crypto |= CRYPTO_FLAG_AGREE; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "cook %x ts %u fs %u", peer->pcookie, ntohl(ep->tstamp), ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Install autokey values in broadcast client and * symmetric modes. We have to do this every time the * sever/peer cookie changes or a new keylist is * rolled. Ordinarily, this is automatic as this message * is piggybacked on the first NTP packet sent upon * either of these events. Note that a broadcast client * or symmetric peer can receive this response without a * matching request. */ case CRYPTO_AUTO | CRYPTO_RESP: /* * Discard the message if invalid or identity * not confirmed or signature not verified with * respect to the receive autokey values. */ if (!(peer->crypto & CRYPTO_FLAG_VRFY)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, &peer->recval, peer)) != XEVNT_OK) break; /* * Install autokey values and light the * autokey bit. This is not hard. */ if (peer->recval.ptr == NULL) peer->recval.ptr = emalloc(sizeof(struct autokey)); bp = (struct autokey *)peer->recval.ptr; peer->recval.tstamp = ep->tstamp; peer->recval.fstamp = ep->fstamp; ap = (struct autokey *)ep->pkt; bp->seq = ntohl(ap->seq); bp->key = ntohl(ap->key); peer->pkeyid = bp->key; peer->crypto |= CRYPTO_FLAG_AUTO; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "auto seq %d key %x ts %u fs %u", bp->seq, bp->key, ntohl(ep->tstamp), ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * X509 certificate sign response. Validate the * certificate signed by the server and install. Later * this can be provided to clients of this server in * lieu of the self signed certificate in order to * validate the public key. */ case CRYPTO_SIGN | CRYPTO_RESP: /* * Discard the message if invalid or not * proventic. */ if (!(peer->crypto & CRYPTO_FLAG_PROV)) { rval = XEVNT_ERR; break; } if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * Scan the certificate list to delete old * versions and link the newest version first on * the list. */ if ((rval = cert_install(ep, peer)) != XEVNT_OK) break; peer->crypto |= CRYPTO_FLAG_SIGN; peer->flash &= ~TEST8; temp32 = cinfo->nid; snprintf(statstr, NTP_MAXSTRLEN, "sign %s 0x%x %s (%u) fs %u", cinfo->issuer, cinfo->flags, OBJ_nid2ln(temp32), temp32, ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * Install leapseconds table in symmetric modes. This * table is proventicated to the NIST primary servers, * either by copying the file containing the table from * a NIST server to a trusted server or directly using * this protocol. While the entire table is installed at * the server, presently only the current TAI offset is * provided via the kernel to other applications. */ case CRYPTO_TAI: /* * Discard the message if invalid. */ if ((rval = crypto_verify(ep, NULL, peer)) != XEVNT_OK) break; /* * Pass the extension field to the transmit * side. Continue below if a leapseconds table * accompanies the message. */ fp = emalloc(len); memcpy(fp, ep, len); temp32 = CRYPTO_RESP; fp->opcode |= htonl(temp32); peer->cmmd = fp; if (len <= VALUE_LEN) { peer->flash &= ~TEST8; break; } /* fall through */ case CRYPTO_TAI | CRYPTO_RESP: /* * If this is a response, discard the message if * signature not verified with respect to the * leapsecond table values. */ if (peer->cmmd == NULL) { if ((rval = crypto_verify(ep, &peer->tai_leap, peer)) != XEVNT_OK) break; } /* * Initialize peer variables with latest update. */ peer->tai_leap.tstamp = ep->tstamp; peer->tai_leap.fstamp = ep->fstamp; peer->tai_leap.vallen = ep->vallen; /* * Install the new table if there is no stored * table or the new table is more recent than * the stored table. Since a filestamp may have * changed, recompute the signatures. */ if (ntohl(peer->tai_leap.fstamp) > ntohl(tai_leap.fstamp)) { tai_leap.fstamp = ep->fstamp; tai_leap.vallen = ep->vallen; if (tai_leap.ptr != NULL) free(tai_leap.ptr); tai_leap.ptr = emalloc(vallen); memcpy(tai_leap.ptr, ep->pkt, vallen); crypto_update(); } crypto_flags |= CRYPTO_FLAG_TAI; peer->crypto |= CRYPTO_FLAG_LEAP; peer->flash &= ~TEST8; snprintf(statstr, NTP_MAXSTRLEN, "leap %u ts %u fs %u", vallen, ntohl(ep->tstamp), ntohl(ep->fstamp)); record_crypto_stats(&peer->srcadr, statstr); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; /* * We come here in symmetric modes for miscellaneous * commands that have value fields but are processed on * the transmit side. All we need do here is check for * valid field length. Remaining checks are below and on * the transmit side. */ case CRYPTO_CERT: case CRYPTO_IFF: case CRYPTO_GQ: case CRYPTO_MV: case CRYPTO_SIGN: if (len < VALUE_LEN) { rval = XEVNT_LEN; break; } /* fall through */ /* * We come here for miscellaneous requests and unknown * requests and responses. If an unknown response or * error, forget it. If a request, save the extension * field for later. Unknown requests will be caught on * the transmit side. */ default: if (code & (CRYPTO_RESP | CRYPTO_ERROR)) { rval = XEVNT_ERR; } else if ((rval = crypto_verify(ep, NULL, peer)) == XEVNT_OK) { fp = emalloc(len); memcpy(fp, ep, len); temp32 = CRYPTO_RESP; fp->opcode |= htonl(temp32); peer->cmmd = fp; } } /* * We don't log length/format/timestamp errors and * duplicates, which are log clogging vulnerabilities. * The first error found terminates the extension field * scan and we return the laundry to the caller. A * length/format/timestamp error on transmit is * cheerfully ignored, as the message is not sent. */ if (rval > XEVNT_TSP) { snprintf(statstr, NTP_MAXSTRLEN, "error %x opcode %x ts %u fs %u", rval, code, tstamp, fstamp); record_crypto_stats(&peer->srcadr, statstr); report_event(rval, peer); #ifdef DEBUG if (debug) printf("crypto_recv: %s\n", statstr); #endif break; } else if (rval > XEVNT_OK && (code & CRYPTO_RESP)) { rval = XEVNT_OK; } authlen += len; } return (rval); } /* * crypto_xmit - construct extension fields * * This routine is called both when an association is configured and * when one is not. The only case where this matters is to retrieve the * autokey information, in which case the caller has to provide the * association ID to match the association. * * Returns length of extension field. */ int crypto_xmit( struct pkt *xpkt, /* transmit packet pointer */ struct sockaddr_storage *srcadr_sin, /* active runway */ int start, /* offset to extension field */ struct exten *ep, /* extension pointer */ keyid_t cookie /* session cookie */ ) { u_int32 *pkt; /* packet pointer */ struct peer *peer; /* peer structure pointer */ u_int opcode; /* extension field opcode */ struct exten *fp; /* extension pointers */ struct cert_info *cp, *xp; /* certificate info/value pointer */ char certname[MAXHOSTNAME + 1]; /* subject name buffer */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ tstamp_t tstamp; u_int vallen; u_int len; struct value vtemp; associd_t associd; int rval; keyid_t tcookie; /* * Generate the requested extension field request code, length * and association ID. If this is a response and the host is not * synchronized, light the error bit and go home. */ pkt = (u_int32 *)xpkt + start / 4; fp = (struct exten *)pkt; opcode = ntohl(ep->opcode); associd = (associd_t) ntohl(ep->associd); fp->associd = htonl(associd); len = 8; rval = XEVNT_OK; tstamp = crypto_time(); switch (opcode & 0xffff0000) { /* * Send association request and response with status word and * host name. Note, this message is not signed and the filestamp * contains only the status word. */ case CRYPTO_ASSOC | CRYPTO_RESP: len += crypto_send(fp, &hostval); fp->fstamp = htonl(crypto_flags); break; case CRYPTO_ASSOC: len += crypto_send(fp, &hostval); fp->fstamp = htonl(crypto_flags | ident_scheme); break; /* * Send certificate request. Use the values from the extension * field. */ case CRYPTO_CERT: memset(&vtemp, 0, sizeof(vtemp)); vtemp.tstamp = ep->tstamp; vtemp.fstamp = ep->fstamp; vtemp.vallen = ep->vallen; vtemp.ptr = (u_char *)ep->pkt; len += crypto_send(fp, &vtemp); break; /* * Send certificate response or sign request. Use the values * from the certificate cache. If the request contains no * subject name, assume the name of this host. This is for * backwards compatibility. Private certificates are never sent. */ case CRYPTO_SIGN: case CRYPTO_CERT | CRYPTO_RESP: vallen = ntohl(ep->vallen); if (vallen == 8) { strcpy(certname, sys_hostname); - } else if (vallen == 0 || vallen > MAXHOSTNAME) { + } else if (vallen == 0 || vallen > MAXHOSTNAME || + len - VALUE_LEN < vallen) { rval = XEVNT_LEN; break; } else { memcpy(certname, ep->pkt, vallen); certname[vallen] = '\0'; } /* * Find all certificates with matching subject. If a * self-signed, trusted certificate is found, use that. * If not, use the first one with matching subject. A * private certificate is never divulged or signed. */ xp = NULL; for (cp = cinfo; cp != NULL; cp = cp->link) { if (cp->flags & CERT_PRIV) continue; if (strcmp(certname, cp->subject) == 0) { if (xp == NULL) xp = cp; if (strcmp(certname, cp->issuer) == 0 && cp->flags & CERT_TRUST) { xp = cp; break; } } } /* * Be careful who you trust. If not yet synchronized, * give back an empty response. If certificate not found * or beyond the lifetime, return an error. This is to * avoid a bad dude trying to get an expired certificate * re-signed. Otherwise, send it. * * Note the timestamp and filestamp are taken from the * certificate value structure. For all certificates the * timestamp is the latest signature update time. For * host and imported certificates the filestamp is the * creation epoch. For signed certificates the filestamp * is the creation epoch of the trusted certificate at * the base of the certificate trail. In principle, this * allows strong checking for signature masquerade. */ if (tstamp == 0) break; if (xp == NULL) rval = XEVNT_CRT; else if (tstamp < xp->first || tstamp > xp->last) rval = XEVNT_SRV; else len += crypto_send(fp, &xp->cert); break; /* * Send challenge in Schnorr (IFF) identity scheme. */ case CRYPTO_IFF: if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) { rval = XEVNT_ERR; break; } if ((rval = crypto_alice(peer, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send response in Schnorr (IFF) identity scheme. */ case CRYPTO_IFF | CRYPTO_RESP: if ((rval = crypto_bob(ep, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send challenge in Guillou-Quisquater (GQ) identity scheme. */ case CRYPTO_GQ: if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) { rval = XEVNT_ERR; break; } if ((rval = crypto_alice2(peer, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send response in Guillou-Quisquater (GQ) identity scheme. */ case CRYPTO_GQ | CRYPTO_RESP: if ((rval = crypto_bob2(ep, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send challenge in MV identity scheme. */ case CRYPTO_MV: if ((peer = findpeerbyassoc(ep->pkt[0])) == NULL) { rval = XEVNT_ERR; break; } if ((rval = crypto_alice3(peer, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send response in MV identity scheme. */ case CRYPTO_MV | CRYPTO_RESP: if ((rval = crypto_bob3(ep, &vtemp)) == XEVNT_OK) { len += crypto_send(fp, &vtemp); value_free(&vtemp); } break; /* * Send certificate sign response. The integrity of the request * certificate has already been verified on the receive side. * Sign the response using the local server key. Use the * filestamp from the request and use the timestamp as the * current time. Light the error bit if the certificate is * invalid or contains an unverified signature. */ case CRYPTO_SIGN | CRYPTO_RESP: if ((rval = cert_sign(ep, &vtemp)) == XEVNT_OK) len += crypto_send(fp, &vtemp); value_free(&vtemp); break; /* * Send public key and signature. Use the values from the public * key. */ case CRYPTO_COOK: len += crypto_send(fp, &pubkey); break; /* * Encrypt and send cookie and signature. Light the error bit if * anything goes wrong. */ case CRYPTO_COOK | CRYPTO_RESP: - if ((opcode & 0xffff) < VALUE_LEN) { + vallen = ntohl(ep->vallen); /* Must be <64k */ + if ( vallen == 0 + || (vallen >= MAX_VALLEN) + || (opcode & 0x0000ffff) < VALUE_LEN + vallen) { rval = XEVNT_LEN; break; } if (PKT_MODE(xpkt->li_vn_mode) == MODE_SERVER) { tcookie = cookie; } else { if ((peer = findpeerbyassoc(associd)) == NULL) { rval = XEVNT_ERR; break; } tcookie = peer->pcookie; } - if ((rval = crypto_encrypt(ep, &vtemp, &tcookie)) == - XEVNT_OK) + if ((rval = crypto_encrypt((const u_char *)ep->pkt, vallen, &tcookie, &vtemp)) + == XEVNT_OK) { len += crypto_send(fp, &vtemp); - value_free(&vtemp); + value_free(&vtemp); + } break; /* * Find peer and send autokey data and signature in broadcast * server and symmetric modes. Use the values in the autokey * structure. If no association is found, either the server has * restarted with new associations or some perp has replayed an * old message, in which case light the error bit. */ case CRYPTO_AUTO | CRYPTO_RESP: if ((peer = findpeerbyassoc(associd)) == NULL) { rval = XEVNT_ERR; break; } peer->flags &= ~FLAG_ASSOC; len += crypto_send(fp, &peer->sndval); break; /* * Send leapseconds table and signature. Use the values from the * tai structure. If no table has been loaded, just send an * empty request. */ case CRYPTO_TAI: case CRYPTO_TAI | CRYPTO_RESP: if (crypto_flags & CRYPTO_FLAG_TAI) len += crypto_send(fp, &tai_leap); break; /* * Default - Fall through for requests; for unknown responses, * flag as error. */ default: if (opcode & CRYPTO_RESP) rval = XEVNT_ERR; } /* * In case of error, flame the log. If a request, toss the * puppy; if a response, return so the sender can flame, too. */ if (rval != XEVNT_OK) { opcode |= CRYPTO_ERROR; snprintf(statstr, NTP_MAXSTRLEN, "error %x opcode %x", rval, opcode); record_crypto_stats(srcadr_sin, statstr); report_event(rval, NULL); #ifdef DEBUG if (debug) printf("crypto_xmit: %s\n", statstr); #endif if (!(opcode & CRYPTO_RESP)) return (0); } /* * Round up the field length to a multiple of 8 bytes and save * the request code and length. */ len = ((len + 7) / 8) * 8; fp->opcode = htonl((opcode & 0xffff0000) | len); #ifdef DEBUG if (debug) printf( "crypto_xmit: flags 0x%x ext offset %d len %u code 0x%x assocID %d\n", crypto_flags, start, len, opcode >> 16, associd); #endif return (len); } /* * crypto_verify - parse and verify the extension field and value * * Returns * XEVNT_OK success * XEVNT_LEN bad field format or length * XEVNT_TSP bad timestamp * XEVNT_FSP bad filestamp * XEVNT_PUB bad or missing public key * XEVNT_SGL bad signature length * XEVNT_SIG signature not verified * XEVNT_ERR protocol error */ static int crypto_verify( struct exten *ep, /* extension pointer */ struct value *vp, /* value pointer */ struct peer *peer /* peer structure pointer */ ) { EVP_PKEY *pkey; /* server public key */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp, tstamp1 = 0; /* timestamp */ tstamp_t fstamp, fstamp1 = 0; /* filestamp */ u_int vallen; /* value length */ u_int siglen; /* signature length */ u_int opcode, len; int i; /* * We require valid opcode and field lengths, timestamp, * filestamp, public key, digest, signature length and * signature, where relevant. Note that preliminary length * checks are done in the main loop. */ len = ntohl(ep->opcode) & 0x0000ffff; opcode = ntohl(ep->opcode) & 0xffff0000; /* * Check for valid operation code and protocol. The opcode must * not have the error bit set. If a response, it must have a * value header. If a request and does not contain a value * header, no need for further checking. */ if (opcode & CRYPTO_ERROR) return (XEVNT_ERR); if (opcode & CRYPTO_RESP) { if (len < VALUE_LEN) return (XEVNT_LEN); } else { if (len < VALUE_LEN) return (XEVNT_OK); } /* * We have a value header. Check for valid field lengths. The * field length must be long enough to contain the value header, * value and signature. Note both the value and signature fields * are rounded up to the next word. */ vallen = ntohl(ep->vallen); + if ( vallen == 0 + || vallen > MAX_VALLEN) + return (XEVNT_LEN); i = (vallen + 3) / 4; siglen = ntohl(ep->pkt[i++]); - if (len < VALUE_LEN + ((vallen + 3) / 4) * 4 + ((siglen + 3) / - 4) * 4) + if ( siglen > MAX_VALLEN + || len - VALUE_LEN < ((vallen + 3) / 4) * 4 + || len - VALUE_LEN - ((vallen + 3) / 4) * 4 + < ((siglen + 3) / 4) * 4) return (XEVNT_LEN); /* * Punt if this is a response with no data. Punt if this is a * request and a previous response is pending. */ if (opcode & CRYPTO_RESP) { if (vallen == 0) return (XEVNT_LEN); } else { if (peer->cmmd != NULL) return (XEVNT_LEN); } /* * Check for valid timestamp and filestamp. If the timestamp is * zero, the sender is not synchronized and signatures are * disregarded. If not, the timestamp must not precede the * filestamp. The timestamp and filestamp must not precede the * corresponding values in the value structure, if present. Once * the autokey values have been installed, the timestamp must * always be later than the corresponding value in the value * structure. Duplicate timestamps are illegal once the cookie * has been validated. */ tstamp = ntohl(ep->tstamp); fstamp = ntohl(ep->fstamp); if (tstamp == 0) return (XEVNT_OK); if (tstamp < fstamp) return (XEVNT_TSP); if (vp != NULL) { tstamp1 = ntohl(vp->tstamp); fstamp1 = ntohl(vp->fstamp); if ((tstamp < tstamp1 || (tstamp == tstamp1 && (peer->crypto & CRYPTO_FLAG_AUTO)))) return (XEVNT_TSP); if ((tstamp < fstamp1 || fstamp < fstamp1)) return (XEVNT_FSP); } /* * Check for valid signature length, public key and digest * algorithm. */ if (crypto_flags & peer->crypto & CRYPTO_FLAG_PRIV) pkey = sign_pkey; else pkey = peer->pkey; if (siglen == 0 || pkey == NULL || peer->digest == NULL) return (XEVNT_OK); if (siglen != (u_int)EVP_PKEY_size(pkey)) return (XEVNT_SGL); /* * Darn, I thought we would never get here. Verify the * signature. If the identity exchange is verified, light the * proventic bit. If no client identity scheme is specified, * avoid doing the sign exchange. */ EVP_VerifyInit(&ctx, peer->digest); + /* XXX: the "+ 12" needs to be at least documented... */ EVP_VerifyUpdate(&ctx, (u_char *)&ep->tstamp, vallen + 12); if (EVP_VerifyFinal(&ctx, (u_char *)&ep->pkt[i], siglen, pkey) <= 0) return (XEVNT_SIG); if (peer->crypto & CRYPTO_FLAG_VRFY) { peer->crypto |= CRYPTO_FLAG_PROV; if (!(crypto_flags & CRYPTO_FLAG_MASK)) peer->crypto |= CRYPTO_FLAG_SIGN; } return (XEVNT_OK); } /* - * crypto_encrypt - construct encrypted cookie and signature from - * extension field and cookie + * crypto_encrypt - construct vp (encrypted cookie and signature) from + * the public key and cookie. * - * Returns + * Returns: * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_CKY bad or missing cookie * XEVNT_PER host certificate expired */ static int crypto_encrypt( - struct exten *ep, /* extension pointer */ - struct value *vp, /* value pointer */ - keyid_t *cookie /* server cookie */ + const u_char *ptr, /* Public Key */ + u_int vallen, /* Length of Public Key */ + keyid_t *cookie, /* server cookie */ + struct value *vp /* value pointer */ ) { EVP_PKEY *pkey; /* public key */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; /* NTP timestamp */ u_int32 temp32; - u_int len; - u_char *ptr; /* * Extract the public key from the request. */ - len = ntohl(ep->vallen); - ptr = (u_char *)ep->pkt; - pkey = d2i_PublicKey(EVP_PKEY_RSA, NULL, &ptr, len); + pkey = d2i_PublicKey(EVP_PKEY_RSA, NULL, &ptr, vallen); if (pkey == NULL) { msyslog(LOG_ERR, "crypto_encrypt %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_PUB); } /* * Encrypt the cookie, encode in ASN.1 and sign. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = hostval.tstamp; - len = EVP_PKEY_size(pkey); - vp->vallen = htonl(len); - vp->ptr = emalloc(len); + vallen = EVP_PKEY_size(pkey); + vp->vallen = htonl(vallen); + vp->ptr = emalloc(vallen); temp32 = htonl(*cookie); if (!RSA_public_encrypt(4, (u_char *)&temp32, vp->ptr, pkey->pkey.rsa, RSA_PKCS1_OAEP_PADDING)) { msyslog(LOG_ERR, "crypto_encrypt %s\n", ERR_error_string(ERR_get_error(), NULL)); EVP_PKEY_free(pkey); return (XEVNT_CKY); } EVP_PKEY_free(pkey); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); - EVP_SignUpdate(&ctx, vp->ptr, len); - if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) - vp->siglen = htonl(len); + EVP_SignUpdate(&ctx, vp->ptr, vallen); + if (EVP_SignFinal(&ctx, vp->sig, &vallen, sign_pkey)) + vp->siglen = htonl(sign_siglen); return (XEVNT_OK); } /* * crypto_ident - construct extension field for identity scheme * * This routine determines which identity scheme is in use and * constructs an extension field for that scheme. */ u_int crypto_ident( struct peer *peer /* peer structure pointer */ ) { char filename[MAXFILENAME + 1]; /* * If the server identity has already been verified, no further * action is necessary. Otherwise, try to load the identity file * of the certificate issuer. If the issuer file is not found, * try the host file. If nothing found, declare a cryptobust. * Note we can't get here unless the trusted certificate has * been found and the CRYPTO_FLAG_VALID bit is set, so the * certificate issuer is valid. */ if (peer->ident_pkey != NULL) EVP_PKEY_free(peer->ident_pkey); if (peer->crypto & CRYPTO_FLAG_GQ) { snprintf(filename, MAXFILENAME, "ntpkey_gq_%s", peer->issuer); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_GQ); snprintf(filename, MAXFILENAME, "ntpkey_gq_%s", sys_hostname); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_GQ); } if (peer->crypto & CRYPTO_FLAG_IFF) { snprintf(filename, MAXFILENAME, "ntpkey_iff_%s", peer->issuer); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_IFF); snprintf(filename, MAXFILENAME, "ntpkey_iff_%s", sys_hostname); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_IFF); } if (peer->crypto & CRYPTO_FLAG_MV) { snprintf(filename, MAXFILENAME, "ntpkey_mv_%s", peer->issuer); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_MV); snprintf(filename, MAXFILENAME, "ntpkey_mv_%s", sys_hostname); peer->ident_pkey = crypto_key(filename, &peer->fstamp); if (peer->ident_pkey != NULL) return (CRYPTO_MV); } /* * No compatible identity scheme is available. Life is hard. */ msyslog(LOG_INFO, "crypto_ident: no compatible identity scheme found"); return (0); } /* * crypto_args - construct extension field from arguments * * This routine creates an extension field with current timestamps and * specified opcode, association ID and optional string. Note that the * extension field is created here, but freed after the crypto_xmit() * call in the protocol module. * * Returns extension field pointer (no errors). + * + * XXX: opcode and len should really be 32-bit quantities and + * we should make sure that str is not too big. */ struct exten * crypto_args( struct peer *peer, /* peer structure pointer */ u_int opcode, /* operation code */ char *str /* argument string */ ) { tstamp_t tstamp; /* NTP timestamp */ struct exten *ep; /* extension field pointer */ u_int len; /* extension field length */ + size_t slen; tstamp = crypto_time(); len = sizeof(struct exten); - if (str != NULL) - len += strlen(str); + if (str != NULL) { + slen = strlen(str); + len += slen; + } ep = emalloc(len); memset(ep, 0, len); if (opcode == 0) return (ep); ep->opcode = htonl(opcode + len); /* * If a response, send our ID; if a request, send the * responder's ID. */ if (opcode & CRYPTO_RESP) ep->associd = htonl(peer->associd); else ep->associd = htonl(peer->assoc); ep->tstamp = htonl(tstamp); ep->fstamp = hostval.tstamp; ep->vallen = 0; if (str != NULL) { - ep->vallen = htonl(strlen(str)); - memcpy((char *)ep->pkt, str, strlen(str)); + ep->vallen = htonl(slen); + memcpy((char *)ep->pkt, str, slen); } else { ep->pkt[0] = peer->associd; } return (ep); } /* * crypto_send - construct extension field from value components * * Returns extension field length. Note: it is not polite to send a * nonempty signature with zero timestamp or a nonzero timestamp with * empty signature, but these rules are not enforced here. + * + * XXX This code won't work on a box with 16-bit ints. */ u_int crypto_send( struct exten *ep, /* extension field pointer */ struct value *vp /* value pointer */ ) { u_int len, temp32; int i; /* * Copy data. If the data field is empty or zero length, encode * an empty value with length zero. */ ep->tstamp = vp->tstamp; ep->fstamp = vp->fstamp; ep->vallen = vp->vallen; len = 12; temp32 = ntohl(vp->vallen); if (temp32 > 0 && vp->ptr != NULL) memcpy(ep->pkt, vp->ptr, temp32); /* * Copy signature. If the signature field is empty or zero * length, encode an empty signature with length zero. */ i = (temp32 + 3) / 4; len += i * 4 + 4; ep->pkt[i++] = vp->siglen; temp32 = ntohl(vp->siglen); if (temp32 > 0 && vp->sig != NULL) memcpy(&ep->pkt[i], vp->sig, temp32); len += temp32; return (len); } /* * crypto_update - compute new public value and sign extension fields * * This routine runs periodically, like once a day, and when something * changes. It updates the timestamps on three value structures and one * value structure list, then signs all the structures: * * hostval host name (not signed) * pubkey public key * cinfo certificate info/value list * tai_leap leapseconds file * * Filestamps are proventicated data, so this routine is run only when * the host has been synchronized to a proventicated source. Thus, the * timestamp is proventicated, too, and can be used to deflect * clogging attacks and even cook breakfast. * * Returns void (no errors) */ void crypto_update(void) { EVP_MD_CTX ctx; /* message digest context */ struct cert_info *cp, *cpn; /* certificate info/value */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ tstamp_t tstamp; /* NTP timestamp */ u_int len; if ((tstamp = crypto_time()) == 0) return; hostval.tstamp = htonl(tstamp); /* * Sign public key and timestamps. The filestamp is derived from * the host key file extension from wherever the file was * generated. */ if (pubkey.vallen != 0) { pubkey.tstamp = hostval.tstamp; pubkey.siglen = 0; if (pubkey.sig == NULL) pubkey.sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&pubkey, 12); EVP_SignUpdate(&ctx, pubkey.ptr, ntohl(pubkey.vallen)); if (EVP_SignFinal(&ctx, pubkey.sig, &len, sign_pkey)) pubkey.siglen = htonl(len); } /* * Sign certificates and timestamps. The filestamp is derived * from the certificate file extension from wherever the file * was generated. Note we do not throw expired certificates * away; they may have signed younger ones. */ for (cp = cinfo; cp != NULL; cp = cpn) { cpn = cp->link; cp->cert.tstamp = hostval.tstamp; cp->cert.siglen = 0; if (cp->cert.sig == NULL) cp->cert.sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&cp->cert, 12); EVP_SignUpdate(&ctx, cp->cert.ptr, ntohl(cp->cert.vallen)); if (EVP_SignFinal(&ctx, cp->cert.sig, &len, sign_pkey)) cp->cert.siglen = htonl(len); } /* * Sign leapseconds table and timestamps. The filestamp is * derived from the leapsecond file extension from wherever the * file was generated. */ if (tai_leap.vallen != 0) { tai_leap.tstamp = hostval.tstamp; tai_leap.siglen = 0; if (tai_leap.sig == NULL) tai_leap.sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&tai_leap, 12); EVP_SignUpdate(&ctx, tai_leap.ptr, ntohl(tai_leap.vallen)); if (EVP_SignFinal(&ctx, tai_leap.sig, &len, sign_pkey)) tai_leap.siglen = htonl(len); } snprintf(statstr, NTP_MAXSTRLEN, "update ts %u", ntohl(hostval.tstamp)); record_crypto_stats(NULL, statstr); #ifdef DEBUG if (debug) printf("crypto_update: %s\n", statstr); #endif } /* * value_free - free value structure components. * * Returns void (no errors) */ void value_free( struct value *vp /* value structure */ ) { if (vp->ptr != NULL) free(vp->ptr); if (vp->sig != NULL) free(vp->sig); memset(vp, 0, sizeof(struct value)); } /* * crypto_time - returns current NTP time in seconds. */ tstamp_t crypto_time() { l_fp tstamp; /* NTP time */ L_CLR(&tstamp); L_CLR(&tstamp); if (sys_leap != LEAP_NOTINSYNC) get_systime(&tstamp); return (tstamp.l_ui); } /* * asn2ntp - convert ASN1_TIME time structure to NTP time in seconds. */ u_long asn2ntp ( ASN1_TIME *asn1time /* pointer to ASN1_TIME structure */ ) { char *v; /* pointer to ASN1_TIME string */ struct tm tm; /* used to convert to NTP time */ /* * Extract time string YYMMDDHHMMSSZ from ASN1 time structure. * Note that the YY, MM, DD fields start with one, the HH, MM, * SS fiels start with zero and the Z character should be 'Z' * for UTC. Also note that years less than 50 map to years * greater than 100. Dontcha love ASN.1? Better than MIL-188. */ if (asn1time->length > 13) return ((u_long)(~0)); /* We can't use -1 here. It's invalid */ v = (char *)asn1time->data; tm.tm_year = (v[0] - '0') * 10 + v[1] - '0'; if (tm.tm_year < 50) tm.tm_year += 100; tm.tm_mon = (v[2] - '0') * 10 + v[3] - '0' - 1; tm.tm_mday = (v[4] - '0') * 10 + v[5] - '0'; tm.tm_hour = (v[6] - '0') * 10 + v[7] - '0'; tm.tm_min = (v[8] - '0') * 10 + v[9] - '0'; tm.tm_sec = (v[10] - '0') * 10 + v[11] - '0'; tm.tm_wday = 0; tm.tm_yday = 0; tm.tm_isdst = 0; return (timegm(&tm) + JAN_1970); } /* * bigdig() - compute a BIGNUM MD5 hash of a BIGNUM number. */ static int bighash( BIGNUM *bn, /* BIGNUM * from */ BIGNUM *bk /* BIGNUM * to */ ) { EVP_MD_CTX ctx; /* message digest context */ u_char dgst[EVP_MAX_MD_SIZE]; /* message digest */ u_char *ptr; /* a BIGNUM as binary string */ u_int len; len = BN_num_bytes(bn); ptr = emalloc(len); BN_bn2bin(bn, ptr); EVP_DigestInit(&ctx, EVP_md5()); EVP_DigestUpdate(&ctx, ptr, len); EVP_DigestFinal(&ctx, dgst, &len); BN_bin2bn(dgst, len, bk); /* XXX MEMLEAK? free ptr? */ return (1); } /* *********************************************************************** * * * The following routines implement the Schnorr (IFF) identity scheme * * * *********************************************************************** * * The Schnorr (IFF) identity scheme is intended for use when * the ntp-genkeys program does not generate the certificates used in * the protocol and the group key cannot be conveyed in the certificate * itself. For this purpose, new generations of IFF values must be * securely transmitted to all members of the group before use. The * scheme is self contained and independent of new generations of host * keys, sign keys and certificates. * * The IFF identity scheme is based on DSA cryptography and algorithms * described in Stinson p. 285. The IFF values hide in a DSA cuckoo * structure, but only the primes and generator are used. The p is a * 512-bit prime, q a 160-bit prime that divides p - 1 and is a qth root * of 1 mod p; that is, g^q = 1 mod p. The TA rolls primvate random * group key b disguised as a DSA structure member, then computes public * key g^(q - b). These values are shared only among group members and * never revealed in messages. Alice challenges Bob to confirm identity * using the protocol described below. * * How it works * * The scheme goes like this. Both Alice and Bob have the public primes * p, q and generator g. The TA gives private key b to Bob and public * key v = g^(q - a) mod p to Alice. * * Alice rolls new random challenge r and sends to Bob in the IFF * request message. Bob rolls new random k, then computes y = k + b r * mod q and x = g^k mod p and sends (y, hash(x)) to Alice in the * response message. Besides making the response shorter, the hash makes * it effectivey impossible for an intruder to solve for b by observing * a number of these messages. * * Alice receives the response and computes g^y v^r mod p. After a bit * of algebra, this simplifies to g^k. If the hash of this result * matches hash(x), Alice knows that Bob has the group key b. The signed * response binds this knowledge to Bob's private key and the public key * previously received in his certificate. * * crypto_alice - construct Alice's challenge in IFF scheme * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group key */ static int crypto_alice( struct peer *peer, /* peer pointer */ struct value *vp /* value pointer */ ) { DSA *dsa; /* IFF parameters */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; u_int len; /* * The identity parameters must have correct format and content. */ if (peer->ident_pkey == NULL) return (XEVNT_ID); if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) { msyslog(LOG_INFO, "crypto_alice: defective key"); return (XEVNT_PUB); } /* * Roll new random r (0 < r < q). The OpenSSL library has a bug * omitting BN_rand_range, so we have to do it the hard way. */ bctx = BN_CTX_new(); len = BN_num_bytes(dsa->q); if (peer->iffval != NULL) BN_free(peer->iffval); peer->iffval = BN_new(); BN_rand(peer->iffval, len * 8, -1, 1); /* r */ BN_mod(peer->iffval, peer->iffval, dsa->q, bctx); BN_CTX_free(bctx); /* * Sign and send to Bob. The filestamp is from the local file. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = htonl(peer->fstamp); vp->vallen = htonl(len); vp->ptr = emalloc(len); BN_bn2bin(peer->iffval, vp->ptr); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_bob - construct Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_ID bad or missing group key * XEVNT_ERR protocol error * XEVNT_PER host expired certificate */ static int crypto_bob( struct exten *ep, /* extension pointer */ struct value *vp /* value pointer */ ) { DSA *dsa; /* IFF parameters */ DSA_SIG *sdsa; /* DSA signature context fake */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; /* NTP timestamp */ BIGNUM *bn, *bk, *r; u_char *ptr; - u_int len; + u_int len; /* extension field length */ + u_int vallen = 0; /* value length */ /* * If the IFF parameters are not valid, something awful * happened or we are being tormented. */ if (iffpar_pkey == NULL) { msyslog(LOG_INFO, "crypto_bob: scheme unavailable"); return (XEVNT_ID); } dsa = iffpar_pkey->pkey.dsa; /* * Extract r from the challenge. */ - len = ntohl(ep->vallen); - if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) { + vallen = ntohl(ep->vallen); + len = ntohl(ep->opcode) & 0x0000ffff; + if (vallen == 0 || len < VALUE_LEN || len - VALUE_LEN < vallen) + return XEVNT_LEN; + if ((r = BN_bin2bn((u_char *)ep->pkt, vallen, NULL)) == NULL) { msyslog(LOG_ERR, "crypto_bob %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Bob rolls random k (0 < k < q), computes y = k + b r mod q * and x = g^k mod p, then sends (y, hash(x)) to Alice. */ bctx = BN_CTX_new(); bk = BN_new(); bn = BN_new(); sdsa = DSA_SIG_new(); - BN_rand(bk, len * 8, -1, 1); /* k */ + BN_rand(bk, vallen * 8, -1, 1); /* k */ BN_mod_mul(bn, dsa->priv_key, r, dsa->q, bctx); /* b r mod q */ BN_add(bn, bn, bk); BN_mod(bn, bn, dsa->q, bctx); /* k + b r mod q */ sdsa->r = BN_dup(bn); BN_mod_exp(bk, dsa->g, bk, dsa->p, bctx); /* g^k mod p */ bighash(bk, bk); sdsa->s = BN_dup(bk); BN_CTX_free(bctx); BN_free(r); BN_free(bn); BN_free(bk); /* * Encode the values in ASN.1 and sign. */ - tstamp = crypto_time(); - memset(vp, 0, sizeof(struct value)); - vp->tstamp = htonl(tstamp); - vp->fstamp = htonl(if_fstamp); - len = i2d_DSA_SIG(sdsa, NULL); - if (len <= 0) { + vallen = i2d_DSA_SIG(sdsa, NULL); + if (vallen == 0) { msyslog(LOG_ERR, "crypto_bob %s\n", ERR_error_string(ERR_get_error(), NULL)); DSA_SIG_free(sdsa); return (XEVNT_ERR); } - vp->vallen = htonl(len); - ptr = emalloc(len); + if (vallen > MAX_VALLEN) { + msyslog(LOG_ERR, "crypto_bob: signature is too big: %d", + vallen); + DSA_SIG_free(sdsa); + return (XEVNT_LEN); + } + memset(vp, 0, sizeof(struct value)); + tstamp = crypto_time(); + vp->tstamp = htonl(tstamp); + vp->fstamp = htonl(if_fstamp); + vp->vallen = htonl(vallen); + ptr = emalloc(vallen); vp->ptr = ptr; i2d_DSA_SIG(sdsa, &ptr); DSA_SIG_free(sdsa); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); + /* XXX: more validation to make sure the sign fits... */ vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); - EVP_SignUpdate(&ctx, vp->ptr, len); - if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) + EVP_SignUpdate(&ctx, vp->ptr, vallen); + if (EVP_SignFinal(&ctx, vp->sig, &vallen, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_iff - verify Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group key * XEVNT_FSP bad filestamp */ int crypto_iff( struct exten *ep, /* extension pointer */ struct peer *peer /* peer structure pointer */ ) { DSA *dsa; /* IFF parameters */ BN_CTX *bctx; /* BIGNUM context */ DSA_SIG *sdsa; /* DSA parameters */ BIGNUM *bn, *bk; u_int len; const u_char *ptr; int temp; /* * If the IFF parameters are not valid or no challenge was sent, * something awful happened or we are being tormented. */ if (peer->ident_pkey == NULL) { msyslog(LOG_INFO, "crypto_iff: scheme unavailable"); return (XEVNT_ID); } if (ntohl(ep->fstamp) != peer->fstamp) { msyslog(LOG_INFO, "crypto_iff: invalid filestamp %u", ntohl(ep->fstamp)); return (XEVNT_FSP); } if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) { msyslog(LOG_INFO, "crypto_iff: defective key"); return (XEVNT_PUB); } if (peer->iffval == NULL) { msyslog(LOG_INFO, "crypto_iff: missing challenge"); return (XEVNT_ID); } /* * Extract the k + b r and g^k values from the response. */ bctx = BN_CTX_new(); bk = BN_new(); bn = BN_new(); len = ntohl(ep->vallen); ptr = (const u_char *)ep->pkt; if ((sdsa = d2i_DSA_SIG(NULL, &ptr, len)) == NULL) { msyslog(LOG_ERR, "crypto_iff %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Compute g^(k + b r) g^(q - b)r mod p. */ BN_mod_exp(bn, dsa->pub_key, peer->iffval, dsa->p, bctx); BN_mod_exp(bk, dsa->g, sdsa->r, dsa->p, bctx); BN_mod_mul(bn, bn, bk, dsa->p, bctx); /* * Verify the hash of the result matches hash(x). */ bighash(bn, bn); temp = BN_cmp(bn, sdsa->s); BN_free(bn); BN_free(bk); BN_CTX_free(bctx); BN_free(peer->iffval); peer->iffval = NULL; DSA_SIG_free(sdsa); if (temp == 0) return (XEVNT_OK); else return (XEVNT_ID); } /* *********************************************************************** * * * The following routines implement the Guillou-Quisquater (GQ) * * identity scheme * * * *********************************************************************** * * The Guillou-Quisquater (GQ) identity scheme is intended for use when * the ntp-genkeys program generates the certificates used in the * protocol and the group key can be conveyed in a certificate extension * field. The scheme is self contained and independent of new * generations of host keys, sign keys and certificates. * * The GQ identity scheme is based on RSA cryptography and algorithms * described in Stinson p. 300 (with errors). The GQ values hide in a * RSA cuckoo structure, but only the modulus is used. The 512-bit * public modulus is n = p q, where p and q are secret large primes. The * TA rolls random group key b disguised as a RSA structure member. * Except for the public key, these values are shared only among group * members and never revealed in messages. * * When rolling new certificates, Bob recomputes the private and * public keys. The private key u is a random roll, while the public key * is the inverse obscured by the group key v = (u^-1)^b. These values * replace the private and public keys normally generated by the RSA * scheme. Alice challenges Bob to confirm identity using the protocol * described below. * * How it works * * The scheme goes like this. Both Alice and Bob have the same modulus n * and some random b as the group key. These values are computed and * distributed in advance via secret means, although only the group key * b is truly secret. Each has a private random private key u and public * key (u^-1)^b, although not necessarily the same ones. Bob and Alice * can regenerate the key pair from time to time without affecting * operations. The public key is conveyed on the certificate in an * extension field; the private key is never revealed. * * Alice rolls new random challenge r and sends to Bob in the GQ * request message. Bob rolls new random k, then computes y = k u^r mod * n and x = k^b mod n and sends (y, hash(x)) to Alice in the response * message. Besides making the response shorter, the hash makes it * effectivey impossible for an intruder to solve for b by observing * a number of these messages. * * Alice receives the response and computes y^b v^r mod n. After a bit * of algebra, this simplifies to k^b. If the hash of this result * matches hash(x), Alice knows that Bob has the group key b. The signed * response binds this knowledge to Bob's private key and the public key * previously received in his certificate. * * crypto_alice2 - construct Alice's challenge in GQ scheme * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group key * XEVNT_PER host certificate expired */ static int crypto_alice2( struct peer *peer, /* peer pointer */ struct value *vp /* value pointer */ ) { RSA *rsa; /* GQ parameters */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; u_int len; /* * The identity parameters must have correct format and content. */ if (peer->ident_pkey == NULL) return (XEVNT_ID); if ((rsa = peer->ident_pkey->pkey.rsa) == NULL) { msyslog(LOG_INFO, "crypto_alice2: defective key"); return (XEVNT_PUB); } /* * Roll new random r (0 < r < n). The OpenSSL library has a bug * omitting BN_rand_range, so we have to do it the hard way. */ bctx = BN_CTX_new(); len = BN_num_bytes(rsa->n); if (peer->iffval != NULL) BN_free(peer->iffval); peer->iffval = BN_new(); BN_rand(peer->iffval, len * 8, -1, 1); /* r mod n */ BN_mod(peer->iffval, peer->iffval, rsa->n, bctx); BN_CTX_free(bctx); /* * Sign and send to Bob. The filestamp is from the local file. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = htonl(peer->fstamp); vp->vallen = htonl(len); vp->ptr = emalloc(len); BN_bn2bin(peer->iffval, vp->ptr); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_bob2 - construct Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_ID bad or missing group key * XEVNT_ERR protocol error * XEVNT_PER host certificate expired */ static int crypto_bob2( struct exten *ep, /* extension pointer */ struct value *vp /* value pointer */ ) { RSA *rsa; /* GQ parameters */ DSA_SIG *sdsa; /* DSA parameters */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; /* NTP timestamp */ BIGNUM *r, *k, *g, *y; u_char *ptr; u_int len; /* * If the GQ parameters are not valid, something awful * happened or we are being tormented. */ if (gqpar_pkey == NULL) { msyslog(LOG_INFO, "crypto_bob2: scheme unavailable"); return (XEVNT_ID); } rsa = gqpar_pkey->pkey.rsa; /* * Extract r from the challenge. */ len = ntohl(ep->vallen); if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) { msyslog(LOG_ERR, "crypto_bob2 %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Bob rolls random k (0 < k < n), computes y = k u^r mod n and * x = k^b mod n, then sends (y, hash(x)) to Alice. */ bctx = BN_CTX_new(); k = BN_new(); g = BN_new(); y = BN_new(); sdsa = DSA_SIG_new(); BN_rand(k, len * 8, -1, 1); /* k */ BN_mod(k, k, rsa->n, bctx); BN_mod_exp(y, rsa->p, r, rsa->n, bctx); /* u^r mod n */ BN_mod_mul(y, k, y, rsa->n, bctx); /* k u^r mod n */ sdsa->r = BN_dup(y); BN_mod_exp(g, k, rsa->e, rsa->n, bctx); /* k^b mod n */ bighash(g, g); sdsa->s = BN_dup(g); BN_CTX_free(bctx); BN_free(r); BN_free(k); BN_free(g); BN_free(y); /* * Encode the values in ASN.1 and sign. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = htonl(gq_fstamp); len = i2d_DSA_SIG(sdsa, NULL); if (len <= 0) { msyslog(LOG_ERR, "crypto_bob2 %s\n", ERR_error_string(ERR_get_error(), NULL)); DSA_SIG_free(sdsa); return (XEVNT_ERR); } vp->vallen = htonl(len); ptr = emalloc(len); vp->ptr = ptr; i2d_DSA_SIG(sdsa, &ptr); DSA_SIG_free(sdsa); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_gq - verify Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group keys * XEVNT_ERR protocol error * XEVNT_FSP bad filestamp */ int crypto_gq( struct exten *ep, /* extension pointer */ struct peer *peer /* peer structure pointer */ ) { RSA *rsa; /* GQ parameters */ BN_CTX *bctx; /* BIGNUM context */ DSA_SIG *sdsa; /* RSA signature context fake */ BIGNUM *y, *v; const u_char *ptr; u_int len; int temp; /* * If the GQ parameters are not valid or no challenge was sent, * something awful happened or we are being tormented. */ if (peer->ident_pkey == NULL) { msyslog(LOG_INFO, "crypto_gq: scheme unavailable"); return (XEVNT_ID); } if (ntohl(ep->fstamp) != peer->fstamp) { msyslog(LOG_INFO, "crypto_gq: invalid filestamp %u", ntohl(ep->fstamp)); return (XEVNT_FSP); } if ((rsa = peer->ident_pkey->pkey.rsa) == NULL) { msyslog(LOG_INFO, "crypto_gq: defective key"); return (XEVNT_PUB); } if (peer->iffval == NULL) { msyslog(LOG_INFO, "crypto_gq: missing challenge"); return (XEVNT_ID); } /* * Extract the y = k u^r and hash(x = k^b) values from the * response. */ bctx = BN_CTX_new(); y = BN_new(); v = BN_new(); len = ntohl(ep->vallen); ptr = (const u_char *)ep->pkt; if ((sdsa = d2i_DSA_SIG(NULL, &ptr, len)) == NULL) { msyslog(LOG_ERR, "crypto_gq %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Compute v^r y^b mod n. */ BN_mod_exp(v, peer->grpkey, peer->iffval, rsa->n, bctx); /* v^r mod n */ BN_mod_exp(y, sdsa->r, rsa->e, rsa->n, bctx); /* y^b mod n */ BN_mod_mul(y, v, y, rsa->n, bctx); /* v^r y^b mod n */ /* * Verify the hash of the result matches hash(x). */ bighash(y, y); temp = BN_cmp(y, sdsa->s); BN_CTX_free(bctx); BN_free(y); BN_free(v); BN_free(peer->iffval); peer->iffval = NULL; DSA_SIG_free(sdsa); if (temp == 0) return (XEVNT_OK); else return (XEVNT_ID); } /* *********************************************************************** * * * The following routines implement the Mu-Varadharajan (MV) identity * * scheme * * * *********************************************************************** */ /* * The Mu-Varadharajan (MV) cryptosystem was originally intended when * servers broadcast messages to clients, but clients never send * messages to servers. There is one encryption key for the server and a * separate decryption key for each client. It operated something like a * pay-per-view satellite broadcasting system where the session key is * encrypted by the broadcaster and the decryption keys are held in a * tamperproof set-top box. * * The MV parameters and private encryption key hide in a DSA cuckoo * structure which uses the same parameters, but generated in a * different way. The values are used in an encryption scheme similar to * El Gamal cryptography and a polynomial formed from the expansion of * product terms (x - x[j]), as described in Mu, Y., and V. * Varadharajan: Robust and Secure Broadcasting, Proc. Indocrypt 2001, * 223-231. The paper has significant errors and serious omissions. * * Let q be the product of n distinct primes s'[j] (j = 1...n), where * each s'[j] has m significant bits. Let p be a prime p = 2 * q + 1, so * that q and each s'[j] divide p - 1 and p has M = n * m + 1 * significant bits. The elements x mod q of Zq with the elements 2 and * the primes removed form a field Zq* valid for polynomial arithetic. * Let g be a generator of Zp; that is, gcd(g, p - 1) = 1 and g^q = 1 * mod p. We expect M to be in the 500-bit range and n relatively small, * like 25, so the likelihood of a randomly generated element of x mod q * of Zq colliding with a factor of p - 1 is very small and can be * avoided. Associated with each s'[j] is an element s[j] such that s[j] * s'[j] = s'[j] mod q. We find s[j] as the quotient (q + s'[j]) / * s'[j]. These are the parameters of the scheme and they are expensive * to compute. * * We set up an instance of the scheme as follows. A set of random * values x[j] mod q (j = 1...n), are generated as the zeros of a * polynomial of order n. The product terms (x - x[j]) are expanded to * form coefficients a[i] mod q (i = 0...n) in powers of x. These are * used as exponents of the generator g mod p to generate the private * encryption key A. The pair (gbar, ghat) of public server keys and the * pairs (xbar[j], xhat[j]) (j = 1...n) of private client keys are used * to construct the decryption keys. The devil is in the details. * * The distinguishing characteristic of this scheme is the capability to * revoke keys. Included in the calculation of E, gbar and ghat is the * product s = prod(s'[j]) (j = 1...n) above. If the factor s'[j] is * subsequently removed from the product and E, gbar and ghat * recomputed, the jth client will no longer be able to compute E^-1 and * thus unable to decrypt the block. * * How it works * * The scheme goes like this. Bob has the server values (p, A, q, gbar, * ghat) and Alice the client values (p, xbar, xhat). * * Alice rolls new random challenge r (0 < r < p) and sends to Bob in * the MV request message. Bob rolls new random k (0 < k < q), encrypts * y = A^k mod p (a permutation) and sends (hash(y), gbar^k, ghat^k) to * Alice. * * Alice receives the response and computes the decryption key (the * inverse permutation) from previously obtained (xbar, xhat) and * (gbar^k, ghat^k) in the message. She computes the inverse, which is * unique by reasons explained in the ntp-keygen.c program sources. If * the hash of this result matches hash(y), Alice knows that Bob has the * group key b. The signed response binds this knowledge to Bob's * private key and the public key previously received in his * certificate. * * crypto_alice3 - construct Alice's challenge in MV scheme * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group key * XEVNT_PER host certificate expired */ static int crypto_alice3( struct peer *peer, /* peer pointer */ struct value *vp /* value pointer */ ) { DSA *dsa; /* MV parameters */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; u_int len; /* * The identity parameters must have correct format and content. */ if (peer->ident_pkey == NULL) return (XEVNT_ID); if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) { msyslog(LOG_INFO, "crypto_alice3: defective key"); return (XEVNT_PUB); } /* * Roll new random r (0 < r < q). The OpenSSL library has a bug * omitting BN_rand_range, so we have to do it the hard way. */ bctx = BN_CTX_new(); len = BN_num_bytes(dsa->p); if (peer->iffval != NULL) BN_free(peer->iffval); peer->iffval = BN_new(); BN_rand(peer->iffval, len * 8, -1, 1); /* r */ BN_mod(peer->iffval, peer->iffval, dsa->p, bctx); BN_CTX_free(bctx); /* * Sign and send to Bob. The filestamp is from the local file. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = htonl(peer->fstamp); vp->vallen = htonl(len); vp->ptr = emalloc(len); BN_bn2bin(peer->iffval, vp->ptr); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_bob3 - construct Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_ERR protocol error * XEVNT_PER host certificate expired */ static int crypto_bob3( struct exten *ep, /* extension pointer */ struct value *vp /* value pointer */ ) { DSA *dsa; /* MV parameters */ DSA *sdsa; /* DSA signature context fake */ BN_CTX *bctx; /* BIGNUM context */ EVP_MD_CTX ctx; /* signature context */ tstamp_t tstamp; /* NTP timestamp */ BIGNUM *r, *k, *u; u_char *ptr; u_int len; /* * If the MV parameters are not valid, something awful * happened or we are being tormented. */ if (mvpar_pkey == NULL) { msyslog(LOG_INFO, "crypto_bob3: scheme unavailable"); return (XEVNT_ID); } dsa = mvpar_pkey->pkey.dsa; /* * Extract r from the challenge. */ len = ntohl(ep->vallen); if ((r = BN_bin2bn((u_char *)ep->pkt, len, NULL)) == NULL) { msyslog(LOG_ERR, "crypto_bob3 %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Bob rolls random k (0 < k < q), making sure it is not a * factor of q. He then computes y = A^k r and sends (hash(y), * gbar^k, ghat^k) to Alice. */ bctx = BN_CTX_new(); k = BN_new(); u = BN_new(); sdsa = DSA_new(); sdsa->p = BN_new(); sdsa->q = BN_new(); sdsa->g = BN_new(); while (1) { BN_rand(k, BN_num_bits(dsa->q), 0, 0); BN_mod(k, k, dsa->q, bctx); BN_gcd(u, k, dsa->q, bctx); if (BN_is_one(u)) break; } BN_mod_exp(u, dsa->g, k, dsa->p, bctx); /* A r */ BN_mod_mul(u, u, r, dsa->p, bctx); bighash(u, sdsa->p); BN_mod_exp(sdsa->q, dsa->priv_key, k, dsa->p, bctx); /* gbar */ BN_mod_exp(sdsa->g, dsa->pub_key, k, dsa->p, bctx); /* ghat */ BN_CTX_free(bctx); BN_free(k); BN_free(r); BN_free(u); /* * Encode the values in ASN.1 and sign. */ tstamp = crypto_time(); memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = htonl(mv_fstamp); len = i2d_DSAparams(sdsa, NULL); if (len <= 0) { msyslog(LOG_ERR, "crypto_bob3 %s\n", ERR_error_string(ERR_get_error(), NULL)); DSA_free(sdsa); return (XEVNT_ERR); } vp->vallen = htonl(len); ptr = emalloc(len); vp->ptr = ptr; i2d_DSAparams(sdsa, &ptr); DSA_free(sdsa); vp->siglen = 0; if (tstamp == 0) return (XEVNT_OK); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)&vp->tstamp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); return (XEVNT_OK); } /* * crypto_mv - verify Bob's response to Alice's challenge * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_ID bad or missing group key * XEVNT_ERR protocol error * XEVNT_FSP bad filestamp */ int crypto_mv( struct exten *ep, /* extension pointer */ struct peer *peer /* peer structure pointer */ ) { DSA *dsa; /* MV parameters */ DSA *sdsa; /* DSA parameters */ BN_CTX *bctx; /* BIGNUM context */ BIGNUM *k, *u, *v; u_int len; const u_char *ptr; int temp; /* * If the MV parameters are not valid or no challenge was sent, * something awful happened or we are being tormented. */ if (peer->ident_pkey == NULL) { msyslog(LOG_INFO, "crypto_mv: scheme unavailable"); return (XEVNT_ID); } if (ntohl(ep->fstamp) != peer->fstamp) { msyslog(LOG_INFO, "crypto_mv: invalid filestamp %u", ntohl(ep->fstamp)); return (XEVNT_FSP); } if ((dsa = peer->ident_pkey->pkey.dsa) == NULL) { msyslog(LOG_INFO, "crypto_mv: defective key"); return (XEVNT_PUB); } if (peer->iffval == NULL) { msyslog(LOG_INFO, "crypto_mv: missing challenge"); return (XEVNT_ID); } /* * Extract the (hash(y), gbar, ghat) values from the response. */ bctx = BN_CTX_new(); k = BN_new(); u = BN_new(); v = BN_new(); len = ntohl(ep->vallen); ptr = (const u_char *)ep->pkt; if ((sdsa = d2i_DSAparams(NULL, &ptr, len)) == NULL) { msyslog(LOG_ERR, "crypto_mv %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_ERR); } /* * Compute (gbar^xhat ghat^xbar)^-1 mod p. */ BN_mod_exp(u, sdsa->q, dsa->pub_key, dsa->p, bctx); BN_mod_exp(v, sdsa->g, dsa->priv_key, dsa->p, bctx); BN_mod_mul(u, u, v, dsa->p, bctx); BN_mod_inverse(u, u, dsa->p, bctx); BN_mod_mul(v, u, peer->iffval, dsa->p, bctx); /* * The result should match the hash of r mod p. */ bighash(v, v); temp = BN_cmp(v, sdsa->p); BN_CTX_free(bctx); BN_free(k); BN_free(u); BN_free(v); BN_free(peer->iffval); peer->iffval = NULL; DSA_free(sdsa); if (temp == 0) return (XEVNT_OK); else return (XEVNT_ID); } /* *********************************************************************** * * * The following routines are used to manipulate certificates * * * *********************************************************************** */ /* * cert_parse - parse x509 certificate and create info/value structures. * * The server certificate includes the version number, issuer name, * subject name, public key and valid date interval. If the issuer name * is the same as the subject name, the certificate is self signed and * valid only if the server is configured as trustable. If the names are * different, another issuer has signed the server certificate and * vouched for it. In this case the server certificate is valid if * verified by the issuer public key. * * Returns certificate info/value pointer if valid, NULL if not. */ struct cert_info * /* certificate information structure */ cert_parse( u_char *asn1cert, /* X509 certificate */ u_int len, /* certificate length */ tstamp_t fstamp /* filestamp */ ) { X509 *cert; /* X509 certificate */ X509_EXTENSION *ext; /* X509v3 extension */ struct cert_info *ret; /* certificate info/value */ BIO *bp; X509V3_EXT_METHOD *method; char pathbuf[MAXFILENAME]; u_char *uptr; char *ptr; int temp, cnt, i; /* * Decode ASN.1 objects and construct certificate structure. */ uptr = asn1cert; if ((cert = d2i_X509(NULL, &uptr, len)) == NULL) { msyslog(LOG_ERR, "cert_parse %s\n", ERR_error_string(ERR_get_error(), NULL)); return (NULL); } /* * Extract version, subject name and public key. */ ret = emalloc(sizeof(struct cert_info)); memset(ret, 0, sizeof(struct cert_info)); if ((ret->pkey = X509_get_pubkey(cert)) == NULL) { msyslog(LOG_ERR, "cert_parse %s\n", ERR_error_string(ERR_get_error(), NULL)); cert_free(ret); X509_free(cert); return (NULL); } ret->version = X509_get_version(cert); X509_NAME_oneline(X509_get_subject_name(cert), pathbuf, MAXFILENAME - 1); ptr = strstr(pathbuf, "CN="); if (ptr == NULL) { msyslog(LOG_INFO, "cert_parse: invalid subject %s", pathbuf); cert_free(ret); X509_free(cert); return (NULL); } ret->subject = emalloc(strlen(ptr) + 1); strcpy(ret->subject, ptr + 3); /* * Extract remaining objects. Note that the NTP serial number is * the NTP seconds at the time of signing, but this might not be * the case for other authority. We don't bother to check the * objects at this time, since the real crunch can happen only * when the time is valid but not yet certificated. */ ret->nid = OBJ_obj2nid(cert->cert_info->signature->algorithm); ret->digest = (const EVP_MD *)EVP_get_digestbynid(ret->nid); ret->serial = (u_long)ASN1_INTEGER_get(X509_get_serialNumber(cert)); X509_NAME_oneline(X509_get_issuer_name(cert), pathbuf, MAXFILENAME); if ((ptr = strstr(pathbuf, "CN=")) == NULL) { msyslog(LOG_INFO, "cert_parse: invalid issuer %s", pathbuf); cert_free(ret); X509_free(cert); return (NULL); } ret->issuer = emalloc(strlen(ptr) + 1); strcpy(ret->issuer, ptr + 3); ret->first = asn2ntp(X509_get_notBefore(cert)); ret->last = asn2ntp(X509_get_notAfter(cert)); /* * Extract extension fields. These are ad hoc ripoffs of * currently assigned functions and will certainly be changed * before prime time. */ cnt = X509_get_ext_count(cert); for (i = 0; i < cnt; i++) { ext = X509_get_ext(cert, i); method = X509V3_EXT_get(ext); temp = OBJ_obj2nid(ext->object); switch (temp) { /* * If a key_usage field is present, we decode whether * this is a trusted or private certificate. This is * dorky; all we want is to compare NIDs, but OpenSSL * insists on BIO text strings. */ case NID_ext_key_usage: bp = BIO_new(BIO_s_mem()); X509V3_EXT_print(bp, ext, 0, 0); BIO_gets(bp, pathbuf, MAXFILENAME); BIO_free(bp); #if DEBUG if (debug) printf("cert_parse: %s: %s\n", OBJ_nid2ln(temp), pathbuf); #endif if (strcmp(pathbuf, "Trust Root") == 0) ret->flags |= CERT_TRUST; else if (strcmp(pathbuf, "Private") == 0) ret->flags |= CERT_PRIV; break; /* * If a NID_subject_key_identifier field is present, it * contains the GQ public key. */ case NID_subject_key_identifier: ret->grplen = ext->value->length - 2; ret->grpkey = emalloc(ret->grplen); memcpy(ret->grpkey, &ext->value->data[2], ret->grplen); break; } } /* * If certificate is self signed, verify signature. */ if (strcmp(ret->subject, ret->issuer) == 0) { if (!X509_verify(cert, ret->pkey)) { msyslog(LOG_INFO, "cert_parse: signature not verified %s", pathbuf); cert_free(ret); X509_free(cert); return (NULL); } } /* * Verify certificate valid times. Note that certificates cannot * be retroactive. */ if (ret->first > ret->last || ret->first < fstamp) { msyslog(LOG_INFO, "cert_parse: invalid certificate %s first %u last %u fstamp %u", ret->subject, ret->first, ret->last, fstamp); cert_free(ret); X509_free(cert); return (NULL); } /* * Build the value structure to sign and send later. */ ret->cert.fstamp = htonl(fstamp); ret->cert.vallen = htonl(len); ret->cert.ptr = emalloc(len); memcpy(ret->cert.ptr, asn1cert, len); #ifdef DEBUG if (debug > 1) X509_print_fp(stdout, cert); #endif X509_free(cert); return (ret); } /* * cert_sign - sign x509 certificate equest and update value structure. * * The certificate request includes a copy of the host certificate, * which includes the version number, subject name and public key of the * host. The resulting certificate includes these values plus the * serial number, issuer name and valid interval of the server. The * valid interval extends from the current time to the same time one * year hence. This may extend the life of the signed certificate beyond * that of the signer certificate. * * It is convenient to use the NTP seconds of the current time as the * serial number. In the value structure the timestamp is the current * time and the filestamp is taken from the extension field. Note this * routine is called only when the client clock is synchronized to a * proventic source, so timestamp comparisons are valid. * * The host certificate is valid from the time it was generated for a * period of one year. A signed certificate is valid from the time of * signature for a period of one year, but only the host certificate (or * sign certificate if used) is actually used to encrypt and decrypt * signatures. The signature trail is built from the client via the * intermediate servers to the trusted server. Each signature on the * trail must be valid at the time of signature, but it could happen * that a signer certificate expire before the signed certificate, which * remains valid until its expiration. * * Returns * XEVNT_OK success * XEVNT_PUB bad or missing public key * XEVNT_CRT bad or missing certificate * XEVNT_VFY certificate not verified * XEVNT_PER host certificate expired */ static int cert_sign( struct exten *ep, /* extension field pointer */ struct value *vp /* value pointer */ ) { X509 *req; /* X509 certificate request */ X509 *cert; /* X509 certificate */ X509_EXTENSION *ext; /* certificate extension */ ASN1_INTEGER *serial; /* serial number */ X509_NAME *subj; /* distinguished (common) name */ EVP_PKEY *pkey; /* public key */ EVP_MD_CTX ctx; /* message digest context */ tstamp_t tstamp; /* NTP timestamp */ u_int len; u_char *ptr; int i, temp; /* * Decode ASN.1 objects and construct certificate structure. * Make sure the system clock is synchronized to a proventic * source. */ tstamp = crypto_time(); if (tstamp == 0) return (XEVNT_TSP); if (tstamp < cinfo->first || tstamp > cinfo->last) return (XEVNT_PER); ptr = (u_char *)ep->pkt; if ((req = d2i_X509(NULL, &ptr, ntohl(ep->vallen))) == NULL) { msyslog(LOG_ERR, "cert_sign %s\n", ERR_error_string(ERR_get_error(), NULL)); return (XEVNT_CRT); } /* * Extract public key and check for errors. */ if ((pkey = X509_get_pubkey(req)) == NULL) { msyslog(LOG_ERR, "cert_sign %s\n", ERR_error_string(ERR_get_error(), NULL)); X509_free(req); return (XEVNT_PUB); } /* * Generate X509 certificate signed by this server. For this * purpose the issuer name is the server name. Also copy any * extensions that might be present. */ cert = X509_new(); X509_set_version(cert, X509_get_version(req)); serial = ASN1_INTEGER_new(); ASN1_INTEGER_set(serial, tstamp); X509_set_serialNumber(cert, serial); X509_gmtime_adj(X509_get_notBefore(cert), 0L); X509_gmtime_adj(X509_get_notAfter(cert), YEAR); subj = X509_get_issuer_name(cert); X509_NAME_add_entry_by_txt(subj, "commonName", MBSTRING_ASC, (u_char *)sys_hostname, strlen(sys_hostname), -1, 0); subj = X509_get_subject_name(req); X509_set_subject_name(cert, subj); X509_set_pubkey(cert, pkey); ext = X509_get_ext(req, 0); temp = X509_get_ext_count(req); for (i = 0; i < temp; i++) { ext = X509_get_ext(req, i); X509_add_ext(cert, ext, -1); } X509_free(req); /* * Sign and verify the certificate. */ X509_sign(cert, sign_pkey, sign_digest); if (!X509_verify(cert, sign_pkey)) { printf("cert_sign\n%s\n", ERR_error_string(ERR_get_error(), NULL)); X509_free(cert); return (XEVNT_VFY); } len = i2d_X509(cert, NULL); /* * Build and sign the value structure. We have to sign it here, * since the response has to be returned right away. This is a * clogging hazard. */ memset(vp, 0, sizeof(struct value)); vp->tstamp = htonl(tstamp); vp->fstamp = ep->fstamp; vp->vallen = htonl(len); vp->ptr = emalloc(len); ptr = vp->ptr; i2d_X509(cert, &ptr); vp->siglen = 0; vp->sig = emalloc(sign_siglen); EVP_SignInit(&ctx, sign_digest); EVP_SignUpdate(&ctx, (u_char *)vp, 12); EVP_SignUpdate(&ctx, vp->ptr, len); if (EVP_SignFinal(&ctx, vp->sig, &len, sign_pkey)) vp->siglen = htonl(len); #ifdef DEBUG if (debug > 1) X509_print_fp(stdout, cert); #endif X509_free(cert); return (XEVNT_OK); } /* * cert_valid - verify certificate with given public key * * This is pretty ugly, as the certificate has to be verified in the * OpenSSL X509 structure, not in the DER format in the info/value * structure. * * Returns * XEVNT_OK success * XEVNT_VFY certificate not verified */ int cert_valid( struct cert_info *cinf, /* certificate information structure */ EVP_PKEY *pkey /* public key */ ) { X509 *cert; /* X509 certificate */ u_char *ptr; if (cinf->flags & CERT_SIGN) return (XEVNT_OK); ptr = (u_char *)cinf->cert.ptr; cert = d2i_X509(NULL, &ptr, ntohl(cinf->cert.vallen)); if (cert == NULL || !X509_verify(cert, pkey)) return (XEVNT_VFY); X509_free(cert); return (XEVNT_OK); } /* * cert - install certificate in certificate list * * This routine encodes an extension field into a certificate info/value * structure. It searches the certificate list for duplicates and * expunges whichever is older. It then searches the list for other * certificates that might be verified by this latest one. Finally, it * inserts this certificate first on the list. * * Returns * XEVNT_OK success * XEVNT_FSP bad or missing filestamp * XEVNT_CRT bad or missing certificate */ int cert_install( struct exten *ep, /* cert info/value */ struct peer *peer /* peer structure */ ) { struct cert_info *cp, *xp, *yp, **zp; /* * Parse and validate the signed certificate. If valid, * construct the info/value structure; otherwise, scamper home. */ if ((cp = cert_parse((u_char *)ep->pkt, ntohl(ep->vallen), ntohl(ep->fstamp))) == NULL) return (XEVNT_CRT); /* * Scan certificate list looking for another certificate with * the same subject and issuer. If another is found with the * same or older filestamp, unlink it and return the goodies to * the heap. If another is found with a later filestamp, discard * the new one and leave the building. * * Make a note to study this issue again. An earlier certificate * with a long lifetime might be overtaken by a later * certificate with a short lifetime, thus invalidating the * earlier signature. However, we gotta find a way to leak old * stuff from the cache, so we do it anyway. */ yp = cp; zp = &cinfo; for (xp = cinfo; xp != NULL; xp = xp->link) { if (strcmp(cp->subject, xp->subject) == 0 && strcmp(cp->issuer, xp->issuer) == 0) { if (ntohl(cp->cert.fstamp) <= ntohl(xp->cert.fstamp)) { *zp = xp->link;; cert_free(xp); } else { cert_free(cp); return (XEVNT_FSP); } break; } zp = &xp->link; } yp->link = cinfo; cinfo = yp; /* * Scan the certificate list to see if Y is signed by X. This is * independent of order. */ for (yp = cinfo; yp != NULL; yp = yp->link) { for (xp = cinfo; xp != NULL; xp = xp->link) { /* * If the issuer of certificate Y matches the * subject of certificate X, verify the * signature of Y using the public key of X. If * so, X signs Y. */ if (strcmp(yp->issuer, xp->subject) != 0 || xp->flags & CERT_ERROR) continue; if (cert_valid(yp, xp->pkey) != XEVNT_OK) { yp->flags |= CERT_ERROR; continue; } /* * The signature Y is valid only if it begins * during the lifetime of X; however, it is not * necessarily an error, since some other * certificate might sign Y. */ if (yp->first < xp->first || yp->first > xp->last) continue; yp->flags |= CERT_SIGN; /* * If X is trusted, then Y is trusted. Note that * we might stumble over a self-signed * certificate that is not trusted, at least * temporarily. This can happen when a dude * first comes up, but has not synchronized the * clock and had its certificate signed by its * server. In case of broken certificate trail, * this might result in a loop that could * persist until timeout. */ if (!(xp->flags & (CERT_TRUST | CERT_VALID))) continue; yp->flags |= CERT_VALID; /* * If subject Y matches the server subject name, * then Y has completed the certificate trail. * Save the group key and light the valid bit. */ if (strcmp(yp->subject, peer->subject) != 0) continue; if (yp->grpkey != NULL) { if (peer->grpkey != NULL) BN_free(peer->grpkey); peer->grpkey = BN_bin2bn(yp->grpkey, yp->grplen, NULL); } peer->crypto |= CRYPTO_FLAG_VALID; /* * If the server has an an identity scheme, * fetch the identity credentials. If not, the * identity is verified only by the trusted * certificate. The next signature will set the * server proventic. */ if (peer->crypto & (CRYPTO_FLAG_GQ | CRYPTO_FLAG_IFF | CRYPTO_FLAG_MV)) continue; peer->crypto |= CRYPTO_FLAG_VRFY; } } /* * That was awesome. Now update the timestamps and signatures. */ crypto_update(); return (XEVNT_OK); } /* * cert_free - free certificate information structure */ void cert_free( struct cert_info *cinf /* certificate info/value structure */ ) { if (cinf->pkey != NULL) EVP_PKEY_free(cinf->pkey); if (cinf->subject != NULL) free(cinf->subject); if (cinf->issuer != NULL) free(cinf->issuer); if (cinf->grpkey != NULL) free(cinf->grpkey); value_free(&cinf->cert); free(cinf); } /* *********************************************************************** * * * The following routines are used only at initialization time * * * *********************************************************************** */ /* * crypto_key - load cryptographic parameters and keys from files * * This routine loads a PEM-encoded public/private key pair and extracts * the filestamp from the file name. * * Returns public key pointer if valid, NULL if not. Side effect updates * the filestamp if valid. */ static EVP_PKEY * crypto_key( char *cp, /* file name */ tstamp_t *fstamp /* filestamp */ ) { FILE *str; /* file handle */ EVP_PKEY *pkey = NULL; /* public/private key */ char filename[MAXFILENAME]; /* name of key file */ char linkname[MAXFILENAME]; /* filestamp buffer) */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ char *ptr; /* * Open the key file. If the first character of the file name is * not '/', prepend the keys directory string. If something goes * wrong, abandon ship. */ if (*cp == '/') strcpy(filename, cp); else snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp); str = fopen(filename, "r"); if (str == NULL) return (NULL); /* * Read the filestamp, which is contained in the first line. */ if ((ptr = fgets(linkname, MAXFILENAME, str)) == NULL) { msyslog(LOG_ERR, "crypto_key: no data %s\n", filename); (void)fclose(str); return (NULL); } if ((ptr = strrchr(ptr, '.')) == NULL) { msyslog(LOG_ERR, "crypto_key: no filestamp %s\n", filename); (void)fclose(str); return (NULL); } if (sscanf(++ptr, "%u", fstamp) != 1) { msyslog(LOG_ERR, "crypto_key: invalid timestamp %s\n", filename); (void)fclose(str); return (NULL); } /* * Read and decrypt PEM-encoded private key. */ pkey = PEM_read_PrivateKey(str, NULL, NULL, passwd); fclose(str); if (pkey == NULL) { msyslog(LOG_ERR, "crypto_key %s\n", ERR_error_string(ERR_get_error(), NULL)); return (NULL); } /* * Leave tracks in the cryptostats. */ if ((ptr = strrchr(linkname, '\n')) != NULL) *ptr = '\0'; snprintf(statstr, NTP_MAXSTRLEN, "%s mod %d", &linkname[2], EVP_PKEY_size(pkey) * 8); record_crypto_stats(NULL, statstr); #ifdef DEBUG if (debug) printf("crypto_key: %s\n", statstr); if (debug > 1) { if (pkey->type == EVP_PKEY_DSA) DSA_print_fp(stdout, pkey->pkey.dsa, 0); else RSA_print_fp(stdout, pkey->pkey.rsa, 0); } #endif return (pkey); } /* * crypto_cert - load certificate from file * * This routine loads a X.509 RSA or DSA certificate from a file and * constructs a info/cert value structure for this machine. The * structure includes a filestamp extracted from the file name. Later * the certificate can be sent to another machine by request. * * Returns certificate info/value pointer if valid, NULL if not. */ static struct cert_info * /* certificate information */ crypto_cert( char *cp /* file name */ ) { struct cert_info *ret; /* certificate information */ FILE *str; /* file handle */ char filename[MAXFILENAME]; /* name of certificate file */ char linkname[MAXFILENAME]; /* filestamp buffer */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ tstamp_t fstamp; /* filestamp */ long len; char *ptr; char *name, *header; u_char *data; /* * Open the certificate file. If the first character of the file * name is not '/', prepend the keys directory string. If * something goes wrong, abandon ship. */ if (*cp == '/') strcpy(filename, cp); else snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp); str = fopen(filename, "r"); if (str == NULL) return (NULL); /* * Read the filestamp, which is contained in the first line. */ if ((ptr = fgets(linkname, MAXFILENAME, str)) == NULL) { msyslog(LOG_ERR, "crypto_cert: no data %s\n", filename); (void)fclose(str); return (NULL); } if ((ptr = strrchr(ptr, '.')) == NULL) { msyslog(LOG_ERR, "crypto_cert: no filestamp %s\n", filename); (void)fclose(str); return (NULL); } if (sscanf(++ptr, "%u", &fstamp) != 1) { msyslog(LOG_ERR, "crypto_cert: invalid filestamp %s\n", filename); (void)fclose(str); return (NULL); } /* * Read PEM-encoded certificate and install. */ if (!PEM_read(str, &name, &header, &data, &len)) { msyslog(LOG_ERR, "crypto_cert %s\n", ERR_error_string(ERR_get_error(), NULL)); (void)fclose(str); return (NULL); } free(header); if (strcmp(name, "CERTIFICATE") !=0) { msyslog(LOG_INFO, "crypto_cert: wrong PEM type %s", name); free(name); free(data); (void)fclose(str); return (NULL); } free(name); /* * Parse certificate and generate info/value structure. */ ret = cert_parse(data, len, fstamp); free(data); (void)fclose(str); if (ret == NULL) return (NULL); if ((ptr = strrchr(linkname, '\n')) != NULL) *ptr = '\0'; snprintf(statstr, NTP_MAXSTRLEN, "%s 0x%x len %lu", &linkname[2], ret->flags, len); record_crypto_stats(NULL, statstr); #ifdef DEBUG if (debug) printf("crypto_cert: %s\n", statstr); #endif return (ret); } /* * crypto_tai - load leapseconds table from file * * This routine loads the ERTS leapsecond file in NIST text format, * converts to a value structure and extracts a filestamp from the file * name. The data are used to establish the TAI offset from UTC, which * is provided to the kernel if supported. Later the data can be sent to * another machine on request. */ static void crypto_tai( char *cp /* file name */ ) { FILE *str; /* file handle */ char buf[NTP_MAXSTRLEN]; /* file line buffer */ u_int32 leapsec[MAX_LEAP]; /* NTP time at leaps */ int offset; /* offset at leap (s) */ char filename[MAXFILENAME]; /* name of leapseconds file */ char linkname[MAXFILENAME]; /* file link (for filestamp) */ char statstr[NTP_MAXSTRLEN]; /* statistics for filegen */ tstamp_t fstamp; /* filestamp */ u_int len; u_int32 *ptr; char *dp; int rval, i, j; /* * Open the file and discard comment lines. If the first * character of the file name is not '/', prepend the keys * directory string. If the file is not found, not to worry; it * can be retrieved over the net. But, if it is found with * errors, we crash and burn. */ if (*cp == '/') strcpy(filename, cp); else snprintf(filename, MAXFILENAME, "%s/%s", keysdir, cp); if ((str = fopen(filename, "r")) == NULL) return; /* * Extract filestamp if present. */ rval = readlink(filename, linkname, MAXFILENAME - 1); if (rval > 0) { linkname[rval] = '\0'; dp = strrchr(linkname, '.'); } else { dp = strrchr(filename, '.'); } if (dp != NULL) sscanf(++dp, "%u", &fstamp); else fstamp = 0; tai_leap.fstamp = htonl(fstamp); /* * We are rather paranoid here, since an intruder might cause a * coredump by infiltrating naughty values. Empty lines and * comments are ignored. Other lines must begin with two * integers followed by junk or comments. The first integer is * the NTP seconds of leap insertion, the second is the offset * of TAI relative to UTC after that insertion. The second word * must equal the initial insertion of ten seconds on 1 January * 1972 plus one second for each succeeding insertion. */ i = 0; while (i < MAX_LEAP) { dp = fgets(buf, NTP_MAXSTRLEN - 1, str); if (dp == NULL) break; if (strlen(buf) < 1) continue; if (*buf == '#') continue; if (sscanf(buf, "%u %d", &leapsec[i], &offset) != 2) continue; if (i != offset - TAI_1972) break; i++; } fclose(str); if (dp != NULL) { msyslog(LOG_INFO, "crypto_tai: leapseconds file %s error %d", cp, rval); exit (-1); } /* * The extension field table entries consists of the NTP seconds * of leap insertion in network byte order. */ len = i * sizeof(u_int32); tai_leap.vallen = htonl(len); ptr = emalloc(len); tai_leap.ptr = (u_char *)ptr; for (j = 0; j < i; j++) *ptr++ = htonl(leapsec[j]); crypto_flags |= CRYPTO_FLAG_TAI; snprintf(statstr, NTP_MAXSTRLEN, "%s fs %u leap %u len %u", cp, fstamp, leapsec[--j], len); record_crypto_stats(NULL, statstr); #ifdef DEBUG if (debug) printf("crypto_tai: %s\n", statstr); #endif } /* * crypto_setup - load keys, certificate and leapseconds table * * This routine loads the public/private host key and certificate. If * available, it loads the public/private sign key, which defaults to * the host key, and leapseconds table. The host key must be RSA, but * the sign key can be either RSA or DSA. In either case, the public key * on the certificate must agree with the sign key. */ void crypto_setup(void) { EVP_PKEY *pkey; /* private/public key pair */ char filename[MAXFILENAME]; /* file name buffer */ l_fp seed; /* crypto PRNG seed as NTP timestamp */ tstamp_t fstamp; /* filestamp */ tstamp_t sstamp; /* sign filestamp */ u_int len, bytes; u_char *ptr; /* * Initialize structures. */ if (!crypto_flags) return; gethostname(filename, MAXFILENAME); bytes = strlen(filename) + 1; sys_hostname = emalloc(bytes); memcpy(sys_hostname, filename, bytes); if (passwd == NULL) passwd = sys_hostname; memset(&hostval, 0, sizeof(hostval)); memset(&pubkey, 0, sizeof(pubkey)); memset(&tai_leap, 0, sizeof(tai_leap)); /* * Load required random seed file and seed the random number * generator. Be default, it is found in the user home * directory. The root home directory may be / or /root, * depending on the system. Wiggle the contents a bit and write * it back so the sequence does not repeat when we next restart. */ ERR_load_crypto_strings(); if (rand_file == NULL) { if ((RAND_file_name(filename, MAXFILENAME)) != NULL) { rand_file = emalloc(strlen(filename) + 1); strcpy(rand_file, filename); } } else if (*rand_file != '/') { snprintf(filename, MAXFILENAME, "%s/%s", keysdir, rand_file); free(rand_file); rand_file = emalloc(strlen(filename) + 1); strcpy(rand_file, filename); } if (rand_file == NULL) { msyslog(LOG_ERR, "crypto_setup: random seed file not specified"); exit (-1); } if ((bytes = RAND_load_file(rand_file, -1)) == 0) { msyslog(LOG_ERR, "crypto_setup: random seed file %s not found\n", rand_file); exit (-1); } arc4random_buf(&seed, sizeof(l_fp)); RAND_seed(&seed, sizeof(l_fp)); RAND_write_file(rand_file); OpenSSL_add_all_algorithms(); #ifdef DEBUG if (debug) printf( "crypto_setup: OpenSSL version %lx random seed file %s bytes read %d\n", SSLeay(), rand_file, bytes); #endif /* * Load required host key from file "ntpkey_host_". It * also becomes the default sign key. */ if (host_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_host_%s", sys_hostname); host_file = emalloc(strlen(filename) + 1); strcpy(host_file, filename); } pkey = crypto_key(host_file, &fstamp); if (pkey == NULL) { msyslog(LOG_ERR, "crypto_setup: host key file %s not found or corrupt", host_file); exit (-1); } host_pkey = pkey; sign_pkey = pkey; sstamp = fstamp; hostval.fstamp = htonl(fstamp); if (host_pkey->type != EVP_PKEY_RSA) { msyslog(LOG_ERR, "crypto_setup: host key is not RSA key type"); exit (-1); } hostval.vallen = htonl(strlen(sys_hostname)); hostval.ptr = (u_char *)sys_hostname; /* * Construct public key extension field for agreement scheme. */ len = i2d_PublicKey(host_pkey, NULL); ptr = emalloc(len); pubkey.ptr = ptr; i2d_PublicKey(host_pkey, &ptr); pubkey.vallen = htonl(len); pubkey.fstamp = hostval.fstamp; /* * Load optional sign key from file "ntpkey_sign_". If * loaded, it becomes the sign key. */ if (sign_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_sign_%s", sys_hostname); sign_file = emalloc(strlen(filename) + 1); strcpy(sign_file, filename); } pkey = crypto_key(sign_file, &fstamp); if (pkey != NULL) { sign_pkey = pkey; sstamp = fstamp; } sign_siglen = EVP_PKEY_size(sign_pkey); /* * Load optional IFF parameters from file * "ntpkey_iff_". */ if (iffpar_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_iff_%s", sys_hostname); iffpar_file = emalloc(strlen(filename) + 1); strcpy(iffpar_file, filename); } iffpar_pkey = crypto_key(iffpar_file, &if_fstamp); if (iffpar_pkey != NULL) crypto_flags |= CRYPTO_FLAG_IFF; /* * Load optional GQ parameters from file "ntpkey_gq_". */ if (gqpar_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_gq_%s", sys_hostname); gqpar_file = emalloc(strlen(filename) + 1); strcpy(gqpar_file, filename); } gqpar_pkey = crypto_key(gqpar_file, &gq_fstamp); if (gqpar_pkey != NULL) crypto_flags |= CRYPTO_FLAG_GQ; /* * Load optional MV parameters from file "ntpkey_mv_". */ if (mvpar_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_mv_%s", sys_hostname); mvpar_file = emalloc(strlen(filename) + 1); strcpy(mvpar_file, filename); } mvpar_pkey = crypto_key(mvpar_file, &mv_fstamp); if (mvpar_pkey != NULL) crypto_flags |= CRYPTO_FLAG_MV; /* * Load required certificate from file "ntpkey_cert_". */ if (cert_file == NULL) { snprintf(filename, MAXFILENAME, "ntpkey_cert_%s", sys_hostname); cert_file = emalloc(strlen(filename) + 1); strcpy(cert_file, filename); } if ((cinfo = crypto_cert(cert_file)) == NULL) { msyslog(LOG_ERR, "certificate file %s not found or corrupt", cert_file); exit (-1); } /* * The subject name must be the same as the host name, unless * the certificate is private, in which case it may have come * from another host. */ if (!(cinfo->flags & CERT_PRIV) && strcmp(cinfo->subject, sys_hostname) != 0) { msyslog(LOG_ERR, "crypto_setup: certificate %s not for this host", cert_file); cert_free(cinfo); exit (-1); } /* * It the certificate is trusted, the subject must be the same * as the issuer, in other words it must be self signed. */ if (cinfo->flags & CERT_TRUST && strcmp(cinfo->subject, cinfo->issuer) != 0) { if (cert_valid(cinfo, sign_pkey) != XEVNT_OK) { msyslog(LOG_ERR, "crypto_setup: certificate %s is trusted, but not self signed.", cert_file); cert_free(cinfo); exit (-1); } } sign_digest = cinfo->digest; if (cinfo->flags & CERT_PRIV) crypto_flags |= CRYPTO_FLAG_PRIV; crypto_flags |= cinfo->nid << 16; /* * Load optional leapseconds table from file "ntpkey_leap". If * the file is missing or defective, the values can later be * retrieved from a server. */ if (leap_file == NULL) leap_file = "ntpkey_leap"; crypto_tai(leap_file); #ifdef DEBUG if (debug) printf( "crypto_setup: flags 0x%x host %s signature %s\n", crypto_flags, sys_hostname, OBJ_nid2ln(cinfo->nid)); #endif } /* * crypto_config - configure data from crypto configuration command. */ void crypto_config( int item, /* configuration item */ char *cp /* file name */ ) { switch (item) { /* * Set random seed file name. */ case CRYPTO_CONF_RAND: rand_file = emalloc(strlen(cp) + 1); strcpy(rand_file, cp); break; /* * Set private key password. */ case CRYPTO_CONF_PW: passwd = emalloc(strlen(cp) + 1); strcpy(passwd, cp); break; /* * Set host file name. */ case CRYPTO_CONF_PRIV: host_file = emalloc(strlen(cp) + 1); strcpy(host_file, cp); break; /* * Set sign key file name. */ case CRYPTO_CONF_SIGN: sign_file = emalloc(strlen(cp) + 1); strcpy(sign_file, cp); break; /* * Set iff parameters file name. */ case CRYPTO_CONF_IFFPAR: iffpar_file = emalloc(strlen(cp) + 1); strcpy(iffpar_file, cp); break; /* * Set gq parameters file name. */ case CRYPTO_CONF_GQPAR: gqpar_file = emalloc(strlen(cp) + 1); strcpy(gqpar_file, cp); break; /* * Set mv parameters file name. */ case CRYPTO_CONF_MVPAR: mvpar_file = emalloc(strlen(cp) + 1); strcpy(mvpar_file, cp); break; /* * Set identity scheme. */ case CRYPTO_CONF_IDENT: if (!strcasecmp(cp, "iff")) ident_scheme |= CRYPTO_FLAG_IFF; else if (!strcasecmp(cp, "gq")) ident_scheme |= CRYPTO_FLAG_GQ; else if (!strcasecmp(cp, "mv")) ident_scheme |= CRYPTO_FLAG_MV; break; /* * Set certificate file name. */ case CRYPTO_CONF_CERT: cert_file = emalloc(strlen(cp) + 1); strcpy(cert_file, cp); break; /* * Set leapseconds file name. */ case CRYPTO_CONF_LEAP: leap_file = emalloc(strlen(cp) + 1); strcpy(leap_file, cp); break; } crypto_flags |= CRYPTO_FLAG_ENAB; } # else int ntp_crypto_bs_pubkey; # endif /* OPENSSL */ Index: stable/9/contrib/ntp/ntpd/ntp_proto.c =================================================================== --- stable/9/contrib/ntp/ntpd/ntp_proto.c (revision 281230) +++ stable/9/contrib/ntp/ntpd/ntp_proto.c (revision 281231) @@ -1,3451 +1,3461 @@ /* * ntp_proto.c - NTP version 4 protocol machinery * * ATTENTION: Get approval from Dave Mills on all changes to this file! * */ #ifdef HAVE_CONFIG_H #include #endif #include "ntpd.h" #include "ntp_stdlib.h" #include "ntp_unixtime.h" #include "ntp_control.h" #include "ntp_string.h" #include #if defined(VMS) && defined(VMS_LOCALUNIT) /*wjm*/ #include "ntp_refclock.h" #endif #if defined(__FreeBSD__) && __FreeBSD__ >= 3 #include #endif /* * This macro defines the authentication state. If x is 1 authentication * is required; othewise it is optional. */ #define AUTH(x, y) ((x) ? (y) == AUTH_OK : (y) == AUTH_OK || \ (y) == AUTH_NONE) /* * System variables are declared here. See Section 3.2 of the * specification. */ u_char sys_leap; /* system leap indicator */ u_char sys_stratum; /* stratum of system */ s_char sys_precision; /* local clock precision (log2 s) */ double sys_rootdelay; /* roundtrip delay to primary source */ double sys_rootdispersion; /* dispersion to primary source */ u_int32 sys_refid; /* source/loop in network byte order */ static double sys_offset; /* current local clock offset */ l_fp sys_reftime; /* time we were last updated */ struct peer *sys_peer; /* our current peer */ struct peer *sys_pps; /* our PPS peer */ struct peer *sys_prefer; /* our cherished peer */ int sys_kod; /* kod credit */ int sys_kod_rate = 2; /* max kod packets per second */ #ifdef OPENSSL u_long sys_automax; /* maximum session key lifetime */ #endif /* OPENSSL */ /* * Nonspecified system state variables. */ int sys_bclient; /* broadcast client enable */ double sys_bdelay; /* broadcast client default delay */ int sys_calldelay; /* modem callup delay (s) */ int sys_authenticate; /* requre authentication for config */ l_fp sys_authdelay; /* authentication delay */ static u_long sys_authdly[2]; /* authentication delay shift reg */ static double sys_mindisp = MINDISPERSE; /* min disp increment (s) */ static double sys_maxdist = MAXDISTANCE; /* selection threshold (s) */ double sys_jitter; /* system jitter (s) */ static int sys_hopper; /* anticlockhop counter */ static int sys_maxhop = MAXHOP; /* anticlockhop counter threshold */ int leap_next; /* leap consensus */ keyid_t sys_private; /* private value for session seed */ int sys_manycastserver; /* respond to manycast client pkts */ int peer_ntpdate; /* active peers in ntpdate mode */ int sys_survivors; /* truest of the truechimers */ #ifdef OPENSSL char *sys_hostname; /* gethostname() name */ #endif /* OPENSSL */ /* * TOS and multicast mapping stuff */ int sys_floor = 0; /* cluster stratum floor */ int sys_ceiling = STRATUM_UNSPEC; /* cluster stratum ceiling */ int sys_minsane = 1; /* minimum candidates */ int sys_minclock = NTP_MINCLOCK; /* minimum survivors */ int sys_maxclock = NTP_MAXCLOCK; /* maximum candidates */ int sys_cohort = 0; /* cohort switch */ int sys_orphan = STRATUM_UNSPEC + 1; /* orphan stratum */ double sys_orphandelay = 0; /* orphan root delay */ int sys_beacon = BEACON; /* manycast beacon interval */ int sys_ttlmax; /* max ttl mapping vector index */ u_char sys_ttl[MAX_TTL]; /* ttl mapping vector */ /* * Statistics counters */ u_long sys_stattime; /* time since reset */ u_long sys_received; /* packets received */ u_long sys_processed; /* packets processed */ u_long sys_newversionpkt; /* current version */ u_long sys_oldversionpkt; /* recent version */ u_long sys_unknownversion; /* invalid version */ u_long sys_restricted; /* access denied */ u_long sys_badlength; /* bad length or format */ u_long sys_badauth; /* bad authentication */ u_long sys_limitrejected; /* rate exceeded */ static double root_distance P((struct peer *)); static void clock_combine P((struct peer **, int)); static void peer_xmit P((struct peer *)); static void fast_xmit P((struct recvbuf *, int, keyid_t, int)); static void clock_update P((void)); static int default_get_precision P((void)); static int peer_unfit P((struct peer *)); /* * transmit - Transmit Procedure. See Section 3.4.2 of the * specification. */ void transmit( struct peer *peer /* peer structure pointer */ ) { int hpoll; /* * The polling state machine. There are two kinds of machines, * those that never expect a reply (broadcast and manycast * server modes) and those that do (all other modes). The dance * is intricate... */ /* * Orphan mode is active when enabled and when no servers less * than the orphan statum are available. In this mode packets * are sent at the orphan stratum. An orphan with no other * synchronization source is an orphan parent. It assumes root * delay zero and reference ID the loopback address. All others * are orphan children with root delay randomized over a 1-s * range. The root delay is used by the election algorithm to * select the order of synchronization. */ hpoll = peer->hpoll; if (sys_orphan < STRATUM_UNSPEC && sys_peer == NULL) { sys_leap = LEAP_NOWARNING; sys_stratum = sys_orphan; sys_refid = htonl(LOOPBACKADR); sys_rootdelay = 0; sys_rootdispersion = 0; } /* * In broadcast mode the poll interval is never changed from * minpoll. */ if (peer->cast_flags & (MDF_BCAST | MDF_MCAST)) { peer->outdate = current_time; peer_xmit(peer); poll_update(peer, hpoll); return; } /* * In manycast mode we start with unity ttl. The ttl is * increased by one for each poll until either sys_maxclock * servers have been found or the maximum ttl is reached. When * sys_maxclock servers are found we stop polling until one or * more servers have timed out or until less than minpoll * associations turn up. In this case additional better servers * are dragged in and preempt the existing ones. */ if (peer->cast_flags & MDF_ACAST) { peer->outdate = current_time; if (peer->unreach > sys_beacon) { peer->unreach = 0; peer->ttl = 0; peer_xmit(peer); } else if (sys_survivors < sys_minclock || peer_preempt < sys_maxclock) { if (peer->ttl < sys_ttlmax) peer->ttl++; peer_xmit(peer); } peer->unreach++; poll_update(peer, hpoll); return; } /* * In unicast modes the dance is much more intricate. It is * desigmed to back off whenever possible to minimize network * traffic. */ if (peer->burst == 0) { u_char oreach; /* * Update the reachability status. If not heard for * three consecutive polls, stuff infinity in the clock * filter. */ oreach = peer->reach; peer->outdate = current_time; if (peer == sys_peer) sys_hopper++; peer->reach <<= 1; if (!(peer->reach & 0x07)) clock_filter(peer, 0., 0., MAXDISPERSE); if (!peer->reach) { /* * Here the peer is unreachable. If it was * previously reachable, raise a trap. */ if (oreach) { report_event(EVNT_UNREACH, peer); peer->timereachable = current_time; } /* * Send a burst if enabled, but only once after * a peer becomes unreachable. If the prempt * flag is dim, bump the unreach counter by one; * otherwise, bump it by three. */ if (peer->flags & FLAG_IBURST && peer->unreach == 0) { peer->burst = NTP_BURST; } if (!(peer->flags & FLAG_PREEMPT)) peer->unreach++; else peer->unreach += 3; } else { /* * Here the peer is reachable. Set the poll * interval to the system poll interval. Send a * burst only if enabled and the peer is fit. * * Respond to the peer evaluation produced by * the selection algorithm. If less than the * outlyer level, up the unreach by three. If * there are excess associations, up the unreach * by two if not a candidate and by one if so. */ if (!(peer->flags & FLAG_PREEMPT)) { peer->unreach = 0; } else if (peer->status < CTL_PST_SEL_SELCAND) { peer->unreach += 3; } else if (peer_preempt > sys_maxclock) { if (peer->status < CTL_PST_SEL_SYNCCAND) peer->unreach += 2; else peer->unreach++; } else { peer->unreach = 0; } hpoll = sys_poll; if (peer->flags & FLAG_BURST && !peer_unfit(peer)) peer->burst = NTP_BURST; } /* * Watch for timeout. If ephemeral or preemptable, toss * the rascal; otherwise, bump the poll interval. */ if (peer->unreach >= NTP_UNREACH) { if (peer->flags & FLAG_PREEMPT || !(peer->flags & FLAG_CONFIG)) { peer_clear(peer, "TIME"); unpeer(peer); return; } else { hpoll++; } } } else { peer->burst--; /* * If a broadcast client at this point, the burst has * concluded, so we switch to client mode and purge the * keylist, since no further transmissions will be made. */ if (peer->burst == 0) { if (peer->cast_flags & MDF_BCLNT) { peer->hmode = MODE_BCLIENT; #ifdef OPENSSL key_expire(peer); #endif /* OPENSSL */ } /* * If ntpdate mode and the clock has not been * set and all peers have completed the burst, * we declare a successful failure. */ if (mode_ntpdate) { peer_ntpdate--; if (peer_ntpdate == 0) { msyslog(LOG_NOTICE, "no reply; clock not set"); exit (0); } } } } /* * Do not transmit if in broadcast client mode. */ if (peer->hmode != MODE_BCLIENT) peer_xmit(peer); poll_update(peer, hpoll); } /* * receive - Receive Procedure. See section 3.4.3 in the specification. */ void receive( struct recvbuf *rbufp ) { register struct peer *peer; /* peer structure pointer */ register struct pkt *pkt; /* receive packet pointer */ int hisversion; /* packet version */ int hisleap; /* packet leap indicator */ int hismode; /* packet mode */ int hisstratum; /* packet stratum */ int restrict_mask; /* restrict bits */ int has_mac; /* length of MAC field */ int authlen; /* offset of MAC field */ int is_authentic = 0; /* cryptosum ok */ keyid_t skeyid = 0; /* key ID */ struct sockaddr_storage *dstadr_sin; /* active runway */ struct peer *peer2; /* aux peer structure pointer */ l_fp p_org; /* origin timestamp */ l_fp p_rec; /* receive timestamp */ l_fp p_xmt; /* transmit timestamp */ #ifdef OPENSSL keyid_t tkeyid = 0; /* temporary key ID */ keyid_t pkeyid = 0; /* previous key ID */ struct autokey *ap; /* autokey structure pointer */ int rval; /* cookie snatcher */ #endif /* OPENSSL */ int retcode = AM_NOMATCH; int at_listhead; /* * Monitor the packet and get restrictions. Note that the packet * length for control and private mode packets must be checked * by the service routines. Note that no statistics counters are * recorded for restrict violations, since these counters are in * the restriction routine. Note the careful distinctions here * between a packet with a format error and a packet that is * simply discarded without prejudice. Some restrictions have to * be handled later in order to generate a kiss-of-death packet. */ /* * Bogus port check is before anything, since it probably * reveals a clogging attack. */ sys_received++; if (SRCPORT(&rbufp->recv_srcadr) == 0) { sys_badlength++; return; /* bogus port */ } at_listhead = ntp_monitor(rbufp); restrict_mask = restrictions(&rbufp->recv_srcadr, at_listhead); #ifdef DEBUG if (debug > 1) printf("receive: at %ld %s<-%s flags %x restrict %03x\n", current_time, stoa(&rbufp->dstadr->sin), stoa(&rbufp->recv_srcadr), rbufp->dstadr->flags, restrict_mask); #endif if (restrict_mask & RES_IGNORE) { sys_restricted++; return; /* ignore everything */ } pkt = &rbufp->recv_pkt; hisversion = PKT_VERSION(pkt->li_vn_mode); hisleap = PKT_LEAP(pkt->li_vn_mode); hismode = (int)PKT_MODE(pkt->li_vn_mode); hisstratum = PKT_TO_STRATUM(pkt->stratum); if (hismode == MODE_PRIVATE) { if (restrict_mask & RES_NOQUERY) { sys_restricted++; return; /* no query private */ } process_private(rbufp, ((restrict_mask & RES_NOMODIFY) == 0)); return; } if (hismode == MODE_CONTROL) { if (restrict_mask & RES_NOQUERY) { sys_restricted++; return; /* no query control */ } process_control(rbufp, restrict_mask); return; } if (restrict_mask & RES_DONTSERVE) { sys_restricted++; return; /* no time */ } if (rbufp->recv_length < LEN_PKT_NOMAC) { sys_badlength++; return; /* runt packet */ } /* * Version check must be after the query packets, since they * intentionally use early version. */ if (hisversion == NTP_VERSION) { sys_newversionpkt++; /* new version */ } else if (!(restrict_mask & RES_VERSION) && hisversion >= NTP_OLDVERSION) { sys_oldversionpkt++; /* previous version */ } else { sys_unknownversion++; return; /* old version */ } /* * Figure out his mode and validate the packet. This has some * legacy raunch that probably should be removed. In very early * NTP versions mode 0 was equivalent to what later versions * would interpret as client mode. */ if (hismode == MODE_UNSPEC) { if (hisversion == NTP_OLDVERSION) { hismode = MODE_CLIENT; } else { sys_badlength++; return; /* invalid mode */ } } /* * Parse the extension field if present. We figure out whether * an extension field is present by measuring the MAC size. If * the number of words following the packet header is 0, no MAC * is present and the packet is not authenticated. If 1, the * packet is a crypto-NAK; if 3, the packet is authenticated * with DES; if 5, the packet is authenticated with MD5. If 2 or * 4, the packet is a runt and discarded forthwith. If greater * than 5, an extension field is present, so we subtract the * length of the field and go around again. */ authlen = LEN_PKT_NOMAC; has_mac = rbufp->recv_length - authlen; while (has_mac > 0) { int temp; - if (has_mac % 4 != 0 || has_mac < 0) { + if (has_mac % 4 != 0 || has_mac < MIN_MAC_LEN) { sys_badlength++; return; /* bad MAC length */ } if (has_mac == 1 * 4 || has_mac == 3 * 4 || has_mac == MAX_MAC_LEN) { skeyid = ntohl(((u_int32 *)pkt)[authlen / 4]); break; } else if (has_mac > MAX_MAC_LEN) { temp = ntohl(((u_int32 *)pkt)[authlen / 4]) & 0xffff; if (temp < 4 || temp > NTP_MAXEXTEN || temp % 4 != 0) { sys_badlength++; return; /* bad MAC length */ } authlen += temp; has_mac -= temp; } else { sys_badlength++; return; /* bad MAC length */ } } + /* + * If has_mac is < 0 we had a malformed packet. + */ + if (has_mac < 0) { + sys_badlength++; + return; /* bad length */ + } #ifdef OPENSSL pkeyid = tkeyid = 0; #endif /* OPENSSL */ /* * We have tossed out as many buggy packets as possible early in * the game to reduce the exposure to a clogging attack. Now we * have to burn some cycles to find the association and * authenticate the packet if required. Note that we burn only * MD5 cycles, again to reduce exposure. There may be no * matching association and that's okay. * * More on the autokey mambo. Normally the local interface is * found when the association was mobilized with respect to a * designated remote address. We assume packets arriving from * the remote address arrive via this interface and the local * address used to construct the autokey is the unicast address * of the interface. However, if the sender is a broadcaster, * the interface broadcast address is used instead. & Notwithstanding this technobabble, if the sender is a * multicaster, the broadcast address is null, so we use the * unicast address anyway. Don't ask. */ peer = findpeer(&rbufp->recv_srcadr, rbufp->dstadr, hismode, &retcode); dstadr_sin = &rbufp->dstadr->sin; NTOHL_FP(&pkt->org, &p_org); NTOHL_FP(&pkt->rec, &p_rec); NTOHL_FP(&pkt->xmt, &p_xmt); /* * Authentication is conditioned by three switches: * * NOPEER (RES_NOPEER) do not mobilize an association unless * authenticated * NOTRUST (RES_DONTTRUST) do not allow access unless * authenticated (implies NOPEER) * enable (sys_authenticate) master NOPEER switch, by default * on * * The NOPEER and NOTRUST can be specified on a per-client basis * using the restrict command. The enable switch if on implies * NOPEER for all clients. There are four outcomes: * * NONE The packet has no MAC. * OK the packet has a MAC and authentication succeeds * ERROR the packet has a MAC and authentication fails * CRYPTO crypto-NAK. The MAC has four octets only. * * Note: The AUTH(x, y) macro is used to filter outcomes. If x * is zero, acceptable outcomes of y are NONE and OK. If x is * one, the only acceptable outcome of y is OK. */ if (has_mac == 0) { is_authentic = AUTH_NONE; /* not required */ #ifdef DEBUG if (debug) printf("receive: at %ld %s<-%s mode %d code %d auth %d\n", current_time, stoa(dstadr_sin), stoa(&rbufp->recv_srcadr), hismode, retcode, is_authentic); #endif } else if (has_mac == 4) { is_authentic = AUTH_CRYPTO; /* crypto-NAK */ #ifdef DEBUG if (debug) printf( "receive: at %ld %s<-%s mode %d code %d keyid %08x len %d mac %d auth %d\n", current_time, stoa(dstadr_sin), stoa(&rbufp->recv_srcadr), hismode, retcode, skeyid, authlen, has_mac, is_authentic); #endif } else { #ifdef OPENSSL /* * For autokey modes, generate the session key * and install in the key cache. Use the socket * broadcast or unicast address as appropriate. */ if (skeyid > NTP_MAXKEY) { /* * More on the autokey dance (AKD). A cookie is * constructed from public and private values. * For broadcast packets, the cookie is public * (zero). For packets that match no * association, the cookie is hashed from the * addresses and private value. For server * packets, the cookie was previously obtained * from the server. For symmetric modes, the * cookie was previously constructed using an * agreement protocol; however, should PKI be * unavailable, we construct a fake agreement as * the EXOR of the peer and host cookies. * * hismode ephemeral persistent * ======================================= * active 0 cookie# * passive 0% cookie# * client sys cookie 0% * server 0% sys cookie * broadcast 0 0 * * # if unsync, 0 * % can't happen */ if (hismode == MODE_BROADCAST) { /* * For broadcaster, use the interface * broadcast address when available; * otherwise, use the unicast address * found when the association was * mobilized. However, if this is from * the wildcard interface, game over. */ if (crypto_flags && rbufp->dstadr == any_interface) { sys_restricted++; return; /* no wildcard */ } pkeyid = 0; if (!SOCKNUL(&rbufp->dstadr->bcast)) dstadr_sin = &rbufp->dstadr->bcast; } else if (peer == NULL) { pkeyid = session_key( &rbufp->recv_srcadr, dstadr_sin, 0, sys_private, 0); } else { pkeyid = peer->pcookie; } /* * The session key includes both the public * values and cookie. In case of an extension * field, the cookie used for authentication * purposes is zero. Note the hash is saved for * use later in the autokey mambo. */ if (authlen > LEN_PKT_NOMAC && pkeyid != 0) { session_key(&rbufp->recv_srcadr, dstadr_sin, skeyid, 0, 2); tkeyid = session_key( &rbufp->recv_srcadr, dstadr_sin, skeyid, pkeyid, 0); } else { tkeyid = session_key( &rbufp->recv_srcadr, dstadr_sin, skeyid, pkeyid, 2); } } #endif /* OPENSSL */ /* * Compute the cryptosum. Note a clogging attack may * succeed in bloating the key cache. If an autokey, * purge it immediately, since we won't be needing it * again. If the packet is authentic, it can mobilize an * association. Note that there is no key zero. */ if (!authdecrypt(skeyid, (u_int32 *)pkt, authlen, has_mac)) { is_authentic = AUTH_ERROR; sys_badauth++; return; } else { is_authentic = AUTH_OK; } #ifdef OPENSSL if (skeyid > NTP_MAXKEY) authtrust(skeyid, 0); #endif /* OPENSSL */ #ifdef DEBUG if (debug) printf( "receive: at %ld %s<-%s mode %d code %d keyid %08x len %d mac %d auth %d\n", current_time, stoa(dstadr_sin), stoa(&rbufp->recv_srcadr), hismode, retcode, skeyid, authlen, has_mac, is_authentic); #endif } /* * The association matching rules are implemented by a set of * routines and an association table. A packet matching an * association is processed by the peer process for that * association. If there are no errors, an ephemeral association * is mobilized: a broadcast packet mobilizes a broadcast client * aassociation; a manycast server packet mobilizes a manycast * client association; a symmetric active packet mobilizes a * symmetric passive association. */ switch (retcode) { /* * This is a client mode packet not matching any association. If * an ordinary client, simply toss a server mode packet back * over the fence. If a manycast client, we have to work a * little harder. */ case AM_FXMIT: /* * The vanilla case is when this is not a multicast * interface. If authentication succeeds, return a * server mode packet; if not and the key ID is nonzero, * return a crypto-NAK. */ if (!(rbufp->dstadr->flags & INT_MCASTOPEN)) { if (AUTH(restrict_mask & RES_DONTTRUST, is_authentic)) fast_xmit(rbufp, MODE_SERVER, skeyid, restrict_mask); else if (is_authentic == AUTH_ERROR) fast_xmit(rbufp, MODE_SERVER, 0, restrict_mask); return; /* hooray */ } /* * This must be manycast. Do not respond if not * configured as a manycast server. */ if (!sys_manycastserver) { sys_restricted++; return; /* not enabled */ } /* * Do not respond if unsynchronized or stratum is below * the floor or at or above the ceiling. */ if (sys_leap == LEAP_NOTINSYNC || sys_stratum < sys_floor || sys_stratum >= sys_ceiling) return; /* bad stratum */ /* * Do not respond if our stratum is greater than the * manycaster or it has already synchronized to us. */ if (sys_peer == NULL || hisstratum < sys_stratum || (sys_cohort && hisstratum == sys_stratum) || rbufp->dstadr->addr_refid == pkt->refid) return; /* no help */ /* * Respond only if authentication succeeds. Don't do a * crypto-NAK, as that would not be useful. */ if (AUTH(restrict_mask & RES_DONTTRUST, is_authentic)) fast_xmit(rbufp, MODE_SERVER, skeyid, restrict_mask); return; /* hooray */ /* * This is a server mode packet returned in response to a client * mode packet sent to a multicast group address. The origin * timestamp is a good nonce to reliably associate the reply * with what was sent. If there is no match, that's curious and * could be an intruder attempting to clog, so we just ignore * it. * * If the packet is authentic and the manycast association is * found, we mobilize a client association and copy pertinent * variables from the manycast association to the new client * association. If not, just ignore the packet. * * There is an implosion hazard at the manycast client, since * the manycast servers send the server packet immediately. If * the guy is already here, don't fire up a duplicate. */ case AM_MANYCAST: if (!AUTH(sys_authenticate | (restrict_mask & (RES_NOPEER | RES_DONTTRUST)), is_authentic)) return; /* bad auth */ if ((peer2 = findmanycastpeer(rbufp)) == NULL) { sys_restricted++; return; /* not enabled */ } if ((peer = newpeer(&rbufp->recv_srcadr, rbufp->dstadr, MODE_CLIENT, hisversion, NTP_MINDPOLL, NTP_MAXDPOLL, FLAG_IBURST | FLAG_PREEMPT, MDF_UCAST | MDF_ACLNT, 0, skeyid)) == NULL) return; /* system error */ /* * We don't need these, but it warms the billboards. */ peer->ttl = peer2->ttl; break; /* * This is the first packet received from a broadcast server. If * the packet is authentic and we are enabled as broadcast * client, mobilize a broadcast client association. We don't * kiss any frogs here. */ case AM_NEWBCL: if (!AUTH(sys_authenticate | (restrict_mask & (RES_NOPEER | RES_DONTTRUST)), is_authentic)) return; /* bad auth */ /* * Do not respond if unsynchronized or stratum is below * the floor or at or above the ceiling. */ if (hisleap == LEAP_NOTINSYNC || hisstratum < sys_floor || hisstratum >= sys_ceiling) return; /* bad stratum */ switch (sys_bclient) { /* * If not enabled, just skedaddle. */ case 0: sys_restricted++; return; /* not enabled */ /* * Execute the initial volley in order to calibrate the * propagation delay and run the Autokey protocol, if * enabled. */ case 1: if ((peer = newpeer(&rbufp->recv_srcadr, rbufp->dstadr, MODE_CLIENT, hisversion, NTP_MINDPOLL, NTP_MAXDPOLL, FLAG_MCAST | FLAG_IBURST, MDF_BCLNT, 0, skeyid)) == NULL) return; /* system error */ #ifdef OPENSSL if (skeyid > NTP_MAXKEY) crypto_recv(peer, rbufp); #endif /* OPENSSL */ return; /* hooray */ /* * Do not execute the initial volley. */ case 2: #ifdef OPENSSL /* * If a two-way exchange is not possible, * neither is Autokey. */ if (skeyid > NTP_MAXKEY) { msyslog(LOG_INFO, "receive: autokey requires two-way communication"); return; /* no autokey */ } #endif /* OPENSSL */ if ((peer = newpeer(&rbufp->recv_srcadr, rbufp->dstadr, MODE_BCLIENT, hisversion, NTP_MINDPOLL, NTP_MAXDPOLL, 0, MDF_BCLNT, 0, skeyid)) == NULL) return; /* system error */ } break; /* * This is the first packet received from a symmetric active * peer. If the packet is authentic and the first he sent, * mobilize a passive association. If not, kiss the frog. */ case AM_NEWPASS: /* * If the inbound packet is correctly authenticated and * enabled, a symmetric passive association is * mobilized. If not but correctly authenticated, a * symmetric active response is sent. If authentication * fails, send a crypto-NAK packet. */ if (!AUTH(restrict_mask & RES_DONTTRUST, is_authentic)) { if (is_authentic == AUTH_ERROR) fast_xmit(rbufp, MODE_ACTIVE, 0, restrict_mask); return; /* bad auth */ } if (!AUTH(sys_authenticate | (restrict_mask & RES_NOPEER), is_authentic)) { fast_xmit(rbufp, MODE_ACTIVE, skeyid, restrict_mask); return; /* hooray */ } /* * Do not respond if stratum is below the floor. */ if (hisstratum < sys_floor) return; /* bad stratum */ if ((peer = newpeer(&rbufp->recv_srcadr, rbufp->dstadr, MODE_PASSIVE, hisversion, NTP_MINDPOLL, NTP_MAXDPOLL, 0, MDF_UCAST, 0, skeyid)) == NULL) return; /* system error */ break; /* * Process regular packet. Nothing special. */ case AM_PROCPKT: break; /* * A passive packet matches a passive association. This is * usually the result of reconfiguring a client on the fly. As * this association might be legitamate and this packet an * attempt to deny service, just ignore it. */ case AM_ERR: return; /* * For everything else there is the bit bucket. */ default: return; } peer->flash &= ~PKT_TEST_MASK; /* * Next comes a rigorous schedule of timestamp checking. If the * transmit timestamp is zero, the server is horribly broken. */ if (L_ISZERO(&p_xmt)) { return; /* read rfc1305 */ /* * If the transmit timestamp duplicates a previous one, the * packet is a replay. This prevents the bad guys from replaying * the most recent packet, authenticated or not. */ } else if (L_ISEQU(&peer->org, &p_xmt)) { peer->flash |= TEST1; peer->oldpkt++; return; /* duplicate packet */ /* * If this is a broadcast mode packet, skip further checking. */ } else if (hismode != MODE_BROADCAST) { if (L_ISZERO(&p_org)) peer->flash |= TEST3; /* protocol unsynch */ else if (!L_ISEQU(&p_org, &peer->xmt)) peer->flash |= TEST2; /* bogus packet */ } /* - * Update the origin and destination timestamps. If - * unsynchronized or bogus abandon ship. If the crypto machine + * If unsynchronized or bogus abandon ship. If the crypto machine * breaks, light the crypto bit and plaint the log. */ - peer->org = p_xmt; - peer->rec = rbufp->recv_time; if (peer->flash & PKT_TEST_MASK) { #ifdef OPENSSL if (crypto_flags && (peer->flags & FLAG_SKEY)) { rval = crypto_recv(peer, rbufp); if (rval != XEVNT_OK) { peer_clear(peer, "CRYP"); peer->flash |= TEST9; /* crypto error */ } } #endif /* OPENSSL */ return; /* unsynch */ } /* * The timestamps are valid and the receive packet matches the * last one sent. If the packet is a crypto-NAK, the server * might have just changed keys. We reset the association * and restart the protocol. */ if (is_authentic == AUTH_CRYPTO) { peer_clear(peer, "AUTH"); return; /* crypto-NAK */ /* * If the association is authenticated, the key ID is nonzero * and received packets must be authenticated. This is designed * to avoid a bait-and-switch attack, which was possible in past * versions. If symmetric modes, return a crypto-NAK. The peer * should restart the protocol. */ - } else if (!AUTH(peer->keyid || (restrict_mask & RES_DONTTRUST), - is_authentic)) { + } else if (!AUTH(peer->keyid || has_mac || + (restrict_mask & RES_DONTTRUST), is_authentic)) { peer->flash |= TEST5; - if (hismode == MODE_ACTIVE || hismode == MODE_PASSIVE) + if (has_mac && + (hismode == MODE_ACTIVE || hismode == MODE_PASSIVE)) fast_xmit(rbufp, MODE_ACTIVE, 0, restrict_mask); return; /* bad auth */ } /* * That was hard and I am sweaty, but the packet is squeaky * clean. Get on with real work. + * + * Update the origin and destination timestamps. */ + peer->org = p_xmt; + peer->rec = rbufp->recv_time; + peer->received++; peer->timereceived = current_time; if (is_authentic == AUTH_OK) peer->flags |= FLAG_AUTHENTIC; else peer->flags &= ~FLAG_AUTHENTIC; #ifdef OPENSSL /* * More autokey dance. The rules of the cha-cha are as follows: * * 1. If there is no key or the key is not auto, do nothing. * * 2. If this packet is in response to the one just previously * sent or from a broadcast server, do the extension fields. * Otherwise, assume bogosity and bail out. * * 3. If an extension field contains a verified signature, it is * self-authenticated and we sit the dance. * * 4. If this is a server reply, check only to see that the * transmitted key ID matches the received key ID. * * 5. Check to see that one or more hashes of the current key ID * matches the previous key ID or ultimate original key ID * obtained from the broadcaster or symmetric peer. If no * match, sit the dance and wait for timeout. * * In case of crypto error, fire the orchestra and stop dancing. * This is considered a permanant error, so light the crypto bit * to suppress further requests. If preemptable or ephemeral, * scuttle the ship. */ if (crypto_flags && (peer->flags & FLAG_SKEY)) { peer->flash |= TEST8; rval = crypto_recv(peer, rbufp); if (rval != XEVNT_OK) { peer_clear(peer, "CRYP"); peer->flash |= TEST9; /* crypto error */ if (peer->flags & FLAG_PREEMPT || !(peer->flags & FLAG_CONFIG)) unpeer(peer); return; } else if (hismode == MODE_SERVER) { if (skeyid == peer->keyid) peer->flash &= ~TEST8; } else if (!(peer->flash & TEST8)) { peer->pkeyid = skeyid; } else if ((ap = (struct autokey *)peer->recval.ptr) != NULL) { int i; for (i = 0; ; i++) { if (tkeyid == peer->pkeyid || tkeyid == ap->key) { peer->flash &= ~TEST8; peer->pkeyid = skeyid; break; } if (i > ap->seq) break; tkeyid = session_key( &rbufp->recv_srcadr, dstadr_sin, tkeyid, pkeyid, 0); } } if (!(peer->crypto & CRYPTO_FLAG_PROV)) /* test 9 */ peer->flash |= TEST8; /* not proventic */ /* * If the transmit queue is nonempty, clamp the host * poll interval to the packet poll interval. */ if (peer->cmmd != 0) { peer->ppoll = pkt->ppoll; poll_update(peer, peer->hpoll); } } #endif /* OPENSSL */ /* * The dance is complete and the flash bits have been lit. Toss * the packet over the fence for processing, which may light up * more flashers. */ process_packet(peer, pkt); /* * Well, that was nice. If TEST4 is lit, either the crypto * machine jammed or a kiss-o'-death packet flew in, either of * which is fatal. */ if (peer->flash & TEST4) { msyslog(LOG_INFO, "receive: fatal error %04x for %s", peer->flash, stoa(&peer->srcadr)); return; } } /* * process_packet - Packet Procedure, a la Section 3.4.4 of the * specification. Or almost, at least. If we're in here we have a * reasonable expectation that we will be having a long term * relationship with this host. */ void process_packet( register struct peer *peer, register struct pkt *pkt ) { double t34, t21; double p_offset, p_del, p_disp; l_fp p_rec, p_xmt, p_org, p_reftime; l_fp ci; u_char pmode, pleap, pstratum; sys_processed++; peer->processed++; p_del = FPTOD(NTOHS_FP(pkt->rootdelay)); p_disp = FPTOD(NTOHS_FP(pkt->rootdispersion)); NTOHL_FP(&pkt->reftime, &p_reftime); NTOHL_FP(&pkt->rec, &p_rec); NTOHL_FP(&pkt->xmt, &p_xmt); pmode = PKT_MODE(pkt->li_vn_mode); pleap = PKT_LEAP(pkt->li_vn_mode); if (pmode != MODE_BROADCAST) NTOHL_FP(&pkt->org, &p_org); else p_org = peer->rec; pstratum = PKT_TO_STRATUM(pkt->stratum); /* * Test for kiss-o'death packet) */ if (pleap == LEAP_NOTINSYNC && pstratum == STRATUM_UNSPEC) { if (memcmp(&pkt->refid, "DENY", 4) == 0) { peer_clear(peer, "DENY"); peer->flash |= TEST4; /* access denied */ } } /* * Capture the header values. */ record_raw_stats(&peer->srcadr, peer->dstadr ? &peer->dstadr->sin : NULL, &p_org, &p_rec, &p_xmt, &peer->rec); peer->leap = pleap; peer->stratum = min(pstratum, STRATUM_UNSPEC); peer->pmode = pmode; peer->ppoll = pkt->ppoll; peer->precision = pkt->precision; peer->rootdelay = p_del; peer->rootdispersion = p_disp; peer->refid = pkt->refid; /* network byte order */ peer->reftime = p_reftime; /* * Verify the server is synchronized; that is, the leap bits and * stratum are valid, the root delay and root dispersion are * valid and the reference timestamp is not later than the * transmit timestamp. */ if (pleap == LEAP_NOTINSYNC || /* test 6 */ pstratum < sys_floor || pstratum >= sys_ceiling) peer->flash |= TEST6; /* peer not synch */ if (p_del < 0 || p_disp < 0 || p_del / /* test 7 */ 2 + p_disp >= MAXDISPERSE || !L_ISHIS(&p_xmt, &p_reftime)) peer->flash |= TEST7; /* bad header */ /* * If any tests fail at this point, the packet is discarded. * Note that some flashers may have already been set in the * receive() routine. */ if (peer->flash & PKT_TEST_MASK) { #ifdef DEBUG if (debug) printf("packet: flash header %04x\n", peer->flash); #endif return; } if (!(peer->reach)) { report_event(EVNT_REACH, peer); peer->timereachable = current_time; } poll_update(peer, peer->hpoll); peer->reach |= 1; /* * For a client/server association, calculate the clock offset, * roundtrip delay and dispersion. The equations are reordered * from the spec for more efficient use of temporaries. For a * broadcast association, offset the last measurement by the * computed delay during the client/server volley. Note that * org has been set to the time of last reception. Note the * computation of dispersion includes the system precision plus * that due to the frequency error since the origin time. * * It is very important to respect the hazards of overflow. The * only permitted operation on raw timestamps is subtraction, * where the result is a signed quantity spanning from 68 years * in the past to 68 years in the future. To avoid loss of * precision, these calculations are done using 64-bit integer * arithmetic. However, the offset and delay calculations are * sums and differences of these first-order differences, which * if done using 64-bit integer arithmetic, would be valid over * only half that span. Since the typical first-order * differences are usually very small, they are converted to 64- * bit doubles and all remaining calculations done in floating- * point arithmetic. This preserves the accuracy while retaining * the 68-year span. * * Let t1 = p_org, t2 = p_rec, t3 = p_xmt, t4 = peer->rec: */ ci = p_xmt; /* t3 - t4 */ L_SUB(&ci, &peer->rec); LFPTOD(&ci, t34); ci = p_rec; /* t2 - t1 */ L_SUB(&ci, &p_org); LFPTOD(&ci, t21); ci = peer->rec; /* t4 - t1 */ L_SUB(&ci, &p_org); /* * If running in a broadcast association, the clock offset is * (t1 - t0) corrected by the one-way delay, but we can't * measure that directly. Therefore, we start up in MODE_CLIENT * mode, set FLAG_MCAST and exchange eight messages to determine * the clock offset. When the last message is sent, we switch to * MODE_BCLIENT mode. The next broadcast message after that * computes the broadcast offset and clears FLAG_MCAST. */ if (pmode == MODE_BROADCAST) { p_offset = t34; if (peer->flags & FLAG_MCAST) { peer->estbdelay = peer->offset - p_offset; if (peer->hmode == MODE_CLIENT) return; peer->flags &= ~(FLAG_MCAST | FLAG_BURST); } p_offset += peer->estbdelay; p_del = peer->delay; p_disp = 0; } else { p_offset = (t21 + t34) / 2.; p_del = t21 - t34; LFPTOD(&ci, p_disp); p_disp = LOGTOD(sys_precision) + LOGTOD(peer->precision) + clock_phi * p_disp; } p_del = max(p_del, LOGTOD(sys_precision)); clock_filter(peer, p_offset, p_del, p_disp); record_peer_stats(&peer->srcadr, ctlpeerstatus(peer), peer->offset, peer->delay, peer->disp, peer->jitter); } /* * clock_update - Called at system process update intervals. */ static void clock_update(void) { u_char oleap; u_char ostratum; double dtemp; /* * There must be a system peer at this point. If we just changed * the system peer, but have a newer sample from the old one, * wait until newer data are available. */ if (sys_poll < sys_peer->minpoll) sys_poll = sys_peer->minpoll; if (sys_poll > sys_peer->maxpoll) sys_poll = sys_peer->maxpoll; poll_update(sys_peer, sys_poll); if (sys_peer->epoch <= sys_clocktime) return; #ifdef DEBUG if (debug) printf("clock_update: at %ld assoc %d \n", current_time, peer_associations); #endif oleap = sys_leap; ostratum = sys_stratum; switch (local_clock(sys_peer, sys_offset)) { /* * Clock exceeds panic threshold. Life as we know it ends. */ case -1: report_event(EVNT_SYSFAULT, NULL); exit (-1); /* not reached */ /* * Clock was stepped. Flush all time values of all peers. */ case 2: clear_all(); sys_leap = LEAP_NOTINSYNC; sys_stratum = STRATUM_UNSPEC; sys_peer = NULL; sys_rootdelay = 0; sys_rootdispersion = 0; memcpy(&sys_refid, "STEP", 4); report_event(EVNT_CLOCKRESET, NULL); break; /* * Clock was slewed. Update the system stratum, leap bits, root * delay, root dispersion, reference ID and reference time. If * the leap changes, we gotta reroll the keys. Except for * reference clocks, the minimum dispersion increment is not * less than sys_mindisp. */ case 1: sys_leap = leap_next; sys_stratum = min(sys_peer->stratum + 1, STRATUM_UNSPEC); sys_reftime = sys_peer->rec; /* * In orphan mode the stratum defaults to the orphan * stratum. The root delay is set to a random value * generated at startup. The root dispersion is set from * the peer dispersion; the peer root dispersion is * ignored. */ dtemp = sys_peer->disp + clock_phi * (current_time - sys_peer->update) + sys_jitter + fabs(sys_peer->offset); #ifdef REFCLOCK if (!(sys_peer->flags & FLAG_REFCLOCK) && dtemp < sys_mindisp) dtemp = sys_mindisp; #else if (dtemp < sys_mindisp) dtemp = sys_mindisp; #endif /* REFCLOCK */ if (sys_stratum >= sys_orphan) { sys_stratum = sys_orphan; sys_rootdelay = sys_peer->delay; sys_rootdispersion = dtemp; } else { sys_rootdelay = sys_peer->delay + sys_peer->rootdelay; sys_rootdispersion = dtemp + sys_peer->rootdispersion; } if (oleap == LEAP_NOTINSYNC) { report_event(EVNT_SYNCCHG, NULL); #ifdef OPENSSL expire_all(); crypto_update(); #endif /* OPENSSL */ } break; /* * Popcorn spike or step threshold exceeded. Pretend it never * happened. */ default: break; } if (ostratum != sys_stratum) report_event(EVNT_PEERSTCHG, NULL); } /* * poll_update - update peer poll interval */ void poll_update( struct peer *peer, int mpoll ) { int hpoll; /* * This routine figures out when the next poll should be sent. * That turns out to be wickedly complicated. The big problem is * that sometimes the time for the next poll is in the past. * Watch out for races here between the receive process and the * poll process. The key assertion is that, if nextdate equals * current_time, the call is from the poll process; otherwise, * it is from the receive process. * * First, bracket the poll interval according to the type of * association and options. If a fixed interval is configured, * use minpoll. This primarily is for reference clocks, but * works for any association. */ if (peer->flags & FLAG_FIXPOLL) { hpoll = peer->minpoll; /* * The ordinary case; clamp the poll interval between minpoll * and maxpoll. */ } else { hpoll = max(min(peer->maxpoll, mpoll), peer->minpoll); } #ifdef OPENSSL /* * Bit of crass arrogance at this point. If the poll interval * has changed and we have a keylist, the lifetimes in the * keylist are probably bogus. In this case purge the keylist * and regenerate it later. */ if (hpoll != peer->hpoll) key_expire(peer); #endif /* OPENSSL */ peer->hpoll = hpoll; /* * Now we figure out if there is an override. If during the * crypto protocol and a message is pending, make it wait not * more than two seconds. */ #ifdef OPENSSL if (peer->cmmd != NULL && (sys_leap != LEAP_NOTINSYNC || peer->crypto)) { peer->nextdate = current_time + RESP_DELAY; /* * If we get called from the receive routine while a burst is * pending, just slink away. If from the poll routine and a * reference clock or a pending crypto response, delay for one * second. If this is the first sent in a burst, wait for the * modem to come up. For others in the burst, delay two seconds. */ } else if (peer->burst > 0) { #else /* OPENSSL */ if (peer->burst > 0) { #endif /* OPENSSL */ if (peer->nextdate != current_time) return; #ifdef REFCLOCK else if (peer->flags & FLAG_REFCLOCK) peer->nextdate += RESP_DELAY; #endif /* REFCLOCK */ else if (peer->flags & (FLAG_IBURST | FLAG_BURST) && peer->burst == NTP_BURST) peer->nextdate += sys_calldelay; else peer->nextdate += BURST_DELAY; /* * The ordinary case; use the minimum of the host and peer * intervals, but not less than minpoll. In other words, * oversampling is okay but understampling is evil. */ } else { peer->nextdate = peer->outdate + RANDPOLL(max(min(peer->ppoll, hpoll), peer->minpoll)); } /* * If the time for the next poll has already happened, bring it * up to the next second after this one. This way the only way * to get nexdate == current time is from the poll routine. */ if (peer->nextdate <= current_time) peer->nextdate = current_time + 1; #ifdef DEBUG if (debug > 1) printf("poll_update: at %lu %s flags %04x poll %d burst %d last %lu next %lu\n", current_time, ntoa(&peer->srcadr), peer->flags, peer->hpoll, peer->burst, peer->outdate, peer->nextdate); #endif } /* * peer_crypto_clear - discard crypto information */ void peer_crypto_clear( struct peer *peer ) { /* * If cryptographic credentials have been acquired, toss them to * Valhalla. Note that autokeys are ephemeral, in that they are * tossed immediately upon use. Therefore, the keylist can be * purged anytime without needing to preserve random keys. Note * that, if the peer is purged, the cryptographic variables are * purged, too. This makes it much harder to sneak in some * unauthenticated data in the clock filter. */ DPRINTF(1, ("peer_crypto_clear: at %ld next %ld assoc ID %d\n", current_time, peer->nextdate, peer->associd)); #ifdef OPENSSL peer->assoc = 0; peer->crypto = 0; if (peer->pkey != NULL) EVP_PKEY_free(peer->pkey); peer->pkey = NULL; peer->digest = NULL; /* XXX MEMLEAK? check whether this needs to be freed in any way - never was freed */ if (peer->subject != NULL) free(peer->subject); peer->subject = NULL; if (peer->issuer != NULL) free(peer->issuer); peer->issuer = NULL; peer->pkeyid = 0; peer->pcookie = 0; if (peer->ident_pkey != NULL) EVP_PKEY_free(peer->ident_pkey); peer->ident_pkey = NULL; memset(&peer->fstamp, 0, sizeof(peer->fstamp)); if (peer->iffval != NULL) BN_free(peer->iffval); peer->iffval = NULL; if (peer->grpkey != NULL) BN_free(peer->grpkey); peer->grpkey = NULL; value_free(&peer->cookval); value_free(&peer->recval); if (peer->cmmd != NULL) { free(peer->cmmd); peer->cmmd = NULL; } key_expire(peer); value_free(&peer->encrypt); #endif /* OPENSSL */ } /* * peer_clear - clear peer filter registers. See Section 3.4.8 of the spec. */ void peer_clear( struct peer *peer, /* peer structure */ char *ident /* tally lights */ ) { int i; peer_crypto_clear(peer); if (peer == sys_peer) sys_peer = NULL; /* * Wipe the association clean and initialize the nonzero values. */ memset(CLEAR_TO_ZERO(peer), 0, LEN_CLEAR_TO_ZERO); peer->estbdelay = sys_bdelay; peer->ppoll = peer->maxpoll; peer->hpoll = peer->minpoll; peer->disp = MAXDISPERSE; peer->jitter = LOGTOD(sys_precision); for (i = 0; i < NTP_SHIFT; i++) { peer->filter_order[i] = i; peer->filter_disp[i] = MAXDISPERSE; } #ifdef REFCLOCK if (!(peer->flags & FLAG_REFCLOCK)) { peer->leap = LEAP_NOTINSYNC; peer->stratum = STRATUM_UNSPEC; memcpy(&peer->refid, ident, 4); } #else peer->leap = LEAP_NOTINSYNC; peer->stratum = STRATUM_UNSPEC; memcpy(&peer->refid, ident, 4); #endif /* REFCLOCK */ /* * During initialization use the association count to spread out * the polls at one-second intervals. Othersie, randomize over * the minimum poll interval in order to avoid broadcast * implosion. */ peer->nextdate = peer->update = peer->outdate = current_time; if (initializing) peer->nextdate += peer_associations; else if (peer->hmode == MODE_PASSIVE) peer->nextdate += RESP_DELAY; else peer->nextdate += (ntp_random() & ((1 << NTP_MINDPOLL) - 1)); DPRINTF(1, ("peer_clear: at %ld next %ld assoc ID %d refid %s\n", current_time, peer->nextdate, peer->associd, ident)); } /* * clock_filter - add incoming clock sample to filter register and run * the filter procedure to find the best sample. */ void clock_filter( struct peer *peer, /* peer structure pointer */ double sample_offset, /* clock offset */ double sample_delay, /* roundtrip delay */ double sample_disp /* dispersion */ ) { double dst[NTP_SHIFT]; /* distance vector */ int ord[NTP_SHIFT]; /* index vector */ int i, j, k, m; double dtemp, etemp; /* * Shift the new sample into the register and discard the oldest * one. The new offset and delay come directly from the * timestamp calculations. The dispersion grows from the last * outbound packet or reference clock update to the present time * and increased by the sum of the peer precision and the system * precision. The delay can sometimes swing negative due to * frequency skew, so it is clamped non-negative. */ j = peer->filter_nextpt; peer->filter_offset[j] = sample_offset; peer->filter_delay[j] = max(0, sample_delay); peer->filter_disp[j] = sample_disp; peer->filter_epoch[j] = current_time; j = (j + 1) % NTP_SHIFT; peer->filter_nextpt = j; /* * Update dispersions since the last update and at the same * time initialize the distance and index lists. The distance * list uses a compound metric. If the sample is valid and * younger than the minimum Allan intercept, use delay; * otherwise, use biased dispersion. */ dtemp = clock_phi * (current_time - peer->update); peer->update = current_time; for (i = NTP_SHIFT - 1; i >= 0; i--) { if (i != 0) peer->filter_disp[j] += dtemp; if (peer->filter_disp[j] >= MAXDISPERSE) peer->filter_disp[j] = MAXDISPERSE; if (peer->filter_disp[j] >= MAXDISPERSE) dst[i] = MAXDISPERSE; else if (peer->update - peer->filter_epoch[j] > allan_xpt) dst[i] = sys_maxdist + peer->filter_disp[j]; else dst[i] = peer->filter_delay[j]; ord[i] = j; j++; j %= NTP_SHIFT; } /* * If the clock discipline has stabilized, sort the samples in * both lists by distance. Note, we do not displace a higher * distance sample by a lower distance one unless lower by at * least the precision. */ if (state == 4) { for (i = 1; i < NTP_SHIFT; i++) { for (j = 0; j < i; j++) { if (dst[j] > dst[i] + LOGTOD(sys_precision)) { k = ord[j]; ord[j] = ord[i]; ord[i] = k; etemp = dst[j]; dst[j] = dst[i]; dst[i] = etemp; } } } } /* * Copy the index list to the association structure so ntpq * can see it later. Prune the distance list to samples less * than max distance, but keep at least two valid samples for * jitter calculation. */ m = 0; for (i = 0; i < NTP_SHIFT; i++) { peer->filter_order[i] = (u_char) ord[i]; if (dst[i] >= MAXDISPERSE || (m >= 2 && dst[i] >= sys_maxdist)) continue; m++; } /* * Compute the dispersion and jitter. The dispersion is weighted * exponentially by NTP_FWEIGHT (0.5) so it is normalized close * to 1.0. The jitter is the RMS differences relative to the * lowest delay sample. If no acceptable samples remain in the * shift register, quietly tiptoe home leaving only the * dispersion. */ peer->disp = peer->jitter = 0; k = ord[0]; for (i = NTP_SHIFT - 1; i >= 0; i--) { j = ord[i]; peer->disp = NTP_FWEIGHT * (peer->disp + peer->filter_disp[j]); if (i < m) peer->jitter += DIFF(peer->filter_offset[j], peer->filter_offset[k]); } /* * If no acceptable samples remain in the shift register, * quietly tiptoe home leaving only the dispersion. Otherwise, * save the offset, delay and jitter. Note the jitter must not * be less than the precision. */ if (m == 0) return; etemp = fabs(peer->offset - peer->filter_offset[k]); peer->offset = peer->filter_offset[k]; peer->delay = peer->filter_delay[k]; if (m > 1) peer->jitter /= m - 1; peer->jitter = max(SQRT(peer->jitter), LOGTOD(sys_precision)); /* * A new sample is useful only if it is younger than the last * one used. Note the order is FIFO if the clock discipline has * not stabilized. */ if (peer->filter_epoch[k] <= peer->epoch) { #ifdef DEBUG if (debug) printf("clock_filter: discard %lu\n", peer->epoch - peer->filter_epoch[k]); #endif return; } /* * If the difference between the last offset and the current one * exceeds the jitter by CLOCK_SGATE and the interval since the * last update is less than twice the system poll interval, * consider the update a popcorn spike and ignore it. */ if (etemp > CLOCK_SGATE * peer->jitter && m > 1 && peer->filter_epoch[k] - peer->epoch < 2. * ULOGTOD(sys_poll)) { #ifdef DEBUG if (debug) printf("clock_filter: popcorn %.6f %.6f\n", etemp, dtemp); #endif return; } /* * The mitigated sample statistics are saved for later * processing. If not in a burst, tickle the select. */ peer->epoch = peer->filter_epoch[k]; #ifdef DEBUG if (debug) printf( "clock_filter: n %d off %.6f del %.6f dsp %.6f jit %.6f, age %lu\n", m, peer->offset, peer->delay, peer->disp, peer->jitter, current_time - peer->epoch); #endif if (peer->burst == 0 || sys_leap == LEAP_NOTINSYNC) clock_select(); } /* * clock_select - find the pick-of-the-litter clock * * LOCKCLOCK: If the local clock is the prefer peer, it will always be * enabled, even if declared falseticker, (2) only the prefer peer can * be selected as the system peer, (3) if the external source is down, * the system leap bits are set to 11 and the stratum set to infinity. */ void clock_select(void) { struct peer *peer; int i, j, k, n; int nlist, nl3; int allow, osurv; double d, e, f, g; double high, low; double synch[NTP_MAXASSOC], error[NTP_MAXASSOC]; struct peer *osys_peer; struct peer *typeacts = NULL; struct peer *typelocal = NULL; struct peer *typesystem = NULL; static int list_alloc = 0; static struct endpoint *endpoint = NULL; static int *indx = NULL; static struct peer **peer_list = NULL; static u_int endpoint_size = 0; static u_int indx_size = 0; static u_int peer_list_size = 0; /* * Initialize and create endpoint, index and peer lists big * enough to handle all associations. */ osys_peer = sys_peer; sys_peer = NULL; sys_pps = NULL; sys_prefer = NULL; osurv = sys_survivors; sys_survivors = 0; #ifdef LOCKCLOCK sys_leap = LEAP_NOTINSYNC; sys_stratum = STRATUM_UNSPEC; memcpy(&sys_refid, "DOWN", 4); #endif /* LOCKCLOCK */ nlist = 0; for (n = 0; n < NTP_HASH_SIZE; n++) nlist += peer_hash_count[n]; if (nlist > list_alloc) { if (list_alloc > 0) { free(endpoint); free(indx); free(peer_list); } while (list_alloc < nlist) { list_alloc += 5; endpoint_size += 5 * 3 * sizeof(*endpoint); indx_size += 5 * 3 * sizeof(*indx); peer_list_size += 5 * sizeof(*peer_list); } endpoint = (struct endpoint *)emalloc(endpoint_size); indx = (int *)emalloc(indx_size); peer_list = (struct peer **)emalloc(peer_list_size); } /* * Initially, we populate the island with all the rifraff peers * that happen to be lying around. Those with seriously * defective clocks are immediately booted off the island. Then, * the falsetickers are culled and put to sea. The truechimers * remaining are subject to repeated rounds where the most * unpopular at each round is kicked off. When the population * has dwindled to sys_minclock, the survivors split a million * bucks and collectively crank the chimes. */ nlist = nl3 = 0; /* none yet */ for (n = 0; n < NTP_HASH_SIZE; n++) { for (peer = peer_hash[n]; peer != NULL; peer = peer->next) { peer->flags &= ~FLAG_SYSPEER; peer->status = CTL_PST_SEL_REJECT; /* * Leave the island immediately if the peer is * unfit to synchronize. */ if (peer_unfit(peer)) continue; /* * Don't allow the local clock or modem drivers * in the kitchen at this point, unless the * prefer peer. Do that later, but only if * nobody else is around. These guys are all * configured, so we never throw them away. */ #ifdef REFCLOCK if (peer->refclktype == REFCLK_LOCALCLOCK #if defined(VMS) && defined(VMS_LOCALUNIT) /* wjm: VMS_LOCALUNIT taken seriously */ && REFCLOCKUNIT(&peer->srcadr) != VMS_LOCALUNIT #endif /* VMS && VMS_LOCALUNIT */ ) { typelocal = peer; #ifndef LOCKCLOCK if (!(peer->flags & FLAG_PREFER)) continue; /* no local clock */ #endif /* LOCKCLOCK */ } if (peer->sstclktype == CTL_SST_TS_TELEPHONE) { typeacts = peer; if (!(peer->flags & FLAG_PREFER)) continue; /* no acts */ } #endif /* REFCLOCK */ /* * If we get this far, the peer can stay on the * island, but does not yet have the immunity * idol. */ peer->status = CTL_PST_SEL_SANE; peer_list[nlist++] = peer; /* * Insert each interval endpoint on the sorted * list. */ e = peer->offset; /* Upper end */ f = root_distance(peer); e = e + f; for (i = nl3 - 1; i >= 0; i--) { if (e >= endpoint[indx[i]].val) break; indx[i + 3] = indx[i]; } indx[i + 3] = nl3; endpoint[nl3].type = 1; endpoint[nl3++].val = e; e = e - f; /* Center point */ for (; i >= 0; i--) { if (e >= endpoint[indx[i]].val) break; indx[i + 2] = indx[i]; } indx[i + 2] = nl3; endpoint[nl3].type = 0; endpoint[nl3++].val = e; e = e - f; /* Lower end */ for (; i >= 0; i--) { if (e >= endpoint[indx[i]].val) break; indx[i + 1] = indx[i]; } indx[i + 1] = nl3; endpoint[nl3].type = -1; endpoint[nl3++].val = e; } } #ifdef DEBUG if (debug > 2) for (i = 0; i < nl3; i++) printf("select: endpoint %2d %.6f\n", endpoint[indx[i]].type, endpoint[indx[i]].val); #endif /* * This is the actual algorithm that cleaves the truechimers * from the falsetickers. The original algorithm was described * in Keith Marzullo's dissertation, but has been modified for * better accuracy. * * Briefly put, we first assume there are no falsetickers, then * scan the candidate list first from the low end upwards and * then from the high end downwards. The scans stop when the * number of intersections equals the number of candidates less * the number of falsetickers. If this doesn't happen for a * given number of falsetickers, we bump the number of * falsetickers and try again. If the number of falsetickers * becomes equal to or greater than half the number of * candidates, the Albanians have won the Byzantine wars and * correct synchronization is not possible. * * Here, nlist is the number of candidates and allow is the * number of falsetickers. Upon exit, the truechimers are the * susvivors with offsets not less than low and not greater than * high. There may be none of them. */ low = 1e9; high = -1e9; for (allow = 0; 2 * allow < nlist; allow++) { int found; /* * Bound the interval (low, high) as the largest * interval containing points from presumed truechimers. */ found = 0; n = 0; for (i = 0; i < nl3; i++) { low = endpoint[indx[i]].val; n -= endpoint[indx[i]].type; if (n >= nlist - allow) break; if (endpoint[indx[i]].type == 0) found++; } n = 0; for (j = nl3 - 1; j >= 0; j--) { high = endpoint[indx[j]].val; n += endpoint[indx[j]].type; if (n >= nlist - allow) break; if (endpoint[indx[j]].type == 0) found++; } /* * If the number of candidates found outside the * interval is greater than the number of falsetickers, * then at least one truechimer is outside the interval, * so go around again. This is what makes this algorithm * different than Marzullo's. */ if (found > allow) continue; /* * If an interval containing truechimers is found, stop. * If not, increase the number of falsetickers and go * around again. */ if (high > low) break; } /* * Clustering algorithm. Construct candidate list in order first * by stratum then by root distance, but keep only the best * NTP_MAXASSOC of them. Scan the list to find falsetickers, who * leave the island immediately. The TRUE peer is always a * truechimer. We must leave at least one peer to collect the * million bucks. If in orphan mode, rascals found with lower * stratum are guaranteed a seat on the bus. */ j = 0; for (i = 0; i < nlist; i++) { peer = peer_list[i]; if (nlist > 1 && (peer->offset <= low || peer->offset >= high) && !(peer->flags & FLAG_TRUE) && !(sys_stratum >= sys_orphan && peer->stratum < sys_orphan)) continue; peer->status = CTL_PST_SEL_DISTSYSPEER; /* * The order metric is formed from the stratum times * max distance (1.) plus the root distance. It strongly * favors the lowest stratum, but a higher stratum peer * can capture the clock if the low stratum dominant * hasn't been heard for awhile. */ d = root_distance(peer) + peer->stratum * sys_maxdist; if (j >= NTP_MAXASSOC) { if (d >= synch[j - 1]) continue; else j--; } for (k = j; k > 0; k--) { if (d >= synch[k - 1]) break; peer_list[k] = peer_list[k - 1]; error[k] = error[k - 1]; synch[k] = synch[k - 1]; } peer_list[k] = peer; error[k] = peer->jitter; synch[k] = d; j++; } nlist = j; /* * If no survivors remain at this point, check if the local * clock or modem drivers have been found. If so, nominate one * of them as the only survivor. Otherwise, give up and leave * the island to the rats. */ if (nlist == 0) { if (typeacts != 0) { typeacts->status = CTL_PST_SEL_DISTSYSPEER; peer_list[0] = typeacts; nlist = 1; } else if (typelocal != 0) { typelocal->status = CTL_PST_SEL_DISTSYSPEER; peer_list[0] = typelocal; nlist = 1; } else { if (osys_peer != NULL) { NLOG(NLOG_SYNCSTATUS) msyslog(LOG_INFO, "no servers reachable"); report_event(EVNT_PEERSTCHG, NULL); } } } /* * We can only trust the survivors if the number of candidates * sys_minsane is at least the number required to detect and * cast out one falsticker. For the Byzantine agreement * algorithm used here, that number is 4; however, the default * sys_minsane is 1 to speed initial synchronization. Careful * operators will tinker a higher value and use at least that * number of synchronization sources. */ if (nlist < sys_minsane) return; for (i = 0; i < nlist; i++) peer_list[i]->status = CTL_PST_SEL_SELCAND; /* * Now, vote outlyers off the island by select jitter weighted * by root distance. Continue voting as long as there are more * than sys_minclock survivors and the minimum select jitter is * greater than the maximum peer jitter. Stop if we are about to * discard a TRUE or PREFER peer, who of course has the * immunity idol. */ while (1) { d = 1e9; e = -1e9; f = g = 0; k = 0; for (i = 0; i < nlist; i++) { if (error[i] < d) d = error[i]; f = 0; if (nlist > 1) { for (j = 0; j < nlist; j++) f += DIFF(peer_list[j]->offset, peer_list[i]->offset); f = SQRT(f / (nlist - 1)); } if (f * synch[i] > e) { g = f; e = f * synch[i]; k = i; } } f = max(f, LOGTOD(sys_precision)); if (nlist <= sys_minclock || f <= d || peer_list[k]->flags & (FLAG_TRUE | FLAG_PREFER)) break; #ifdef DEBUG if (debug > 2) printf( "select: drop %s select %.6f jitter %.6f\n", ntoa(&peer_list[k]->srcadr), g, d); #endif for (j = k + 1; j < nlist; j++) { peer_list[j - 1] = peer_list[j]; error[j - 1] = error[j]; } nlist--; } /* * What remains is a list usually not greater than sys_minclock * peers. We want only a peer at the lowest stratum to become * the system peer, although all survivors are eligible for the * combining algorithm. Consider each peer in turn and OR the * leap bits on the assumption that, if some of them honk * nonzero bits, they must know what they are doing. Check for * prefer and pps peers at any stratum. Note that the head of * the list is at the lowest stratum and that unsynchronized * peers cannot survive this far. */ leap_next = 0; for (i = 0; i < nlist; i++) { peer = peer_list[i]; sys_survivors++; leap_next |= peer->leap; peer->status = CTL_PST_SEL_SYNCCAND; if (peer->flags & FLAG_PREFER) sys_prefer = peer; if (peer == osys_peer) typesystem = peer; #ifdef REFCLOCK if (peer->refclktype == REFCLK_ATOM_PPS) sys_pps = peer; #endif /* REFCLOCK */ #if DEBUG if (debug > 1) printf("cluster: survivor %s metric %.6f\n", ntoa(&peer_list[i]->srcadr), synch[i]); #endif } /* * Anticlockhop provision. Keep the current system peer if it is * a survivor but not first in the list. But do that only HOPPER * times. */ if (osys_peer == NULL || typesystem == NULL || typesystem == peer_list[0] || sys_hopper > sys_maxhop) { typesystem = peer_list[0]; sys_hopper = 0; } else { peer->selbroken++; } /* * Mitigation rules of the game. There are several types of * peers that can be selected here: (1) orphan, (2) prefer peer * (flag FLAG_PREFER) (3) pps peers (type REFCLK_ATOM_PPS), (4) * the existing system peer, if any, and (5) the head of the * survivor list. */ if (typesystem->stratum >= sys_orphan) { /* * If in orphan mode, choose the system peer. If the * lowest distance, we are the orphan parent and the * offset is zero. */ sys_peer = typesystem; sys_peer->status = CTL_PST_SEL_SYSPEER; if (sys_orphandelay < sys_peer->rootdelay) { sys_offset = 0; sys_refid = htonl(LOOPBACKADR); } else { sys_offset = sys_peer->offset; sys_refid = addr2refid(&sys_peer->srcadr); } sys_jitter = LOGTOD(sys_precision); #ifdef DEBUG if (debug > 1) printf("select: orphan offset %.6f\n", sys_offset); #endif } else if (sys_prefer) { /* * If a pps peer is present, choose it; otherwise, * choose the prefer peer. */ if (sys_pps) { sys_peer = sys_pps; sys_peer->status = CTL_PST_SEL_PPS; sys_offset = sys_peer->offset; if (!pps_control) NLOG(NLOG_SYSEVENT) msyslog(LOG_INFO, "pps sync enabled"); pps_control = current_time; #ifdef DEBUG if (debug > 1) printf("select: pps offset %.6f\n", sys_offset); #endif } else { sys_peer = sys_prefer; sys_peer->status = CTL_PST_SEL_SYSPEER; sys_offset = sys_peer->offset; #ifdef DEBUG if (debug > 1) printf("select: prefer offset %.6f\n", sys_offset); #endif } if (sys_peer->stratum == STRATUM_REFCLOCK || sys_peer->stratum == STRATUM_UNSPEC) sys_refid = sys_peer->refid; else sys_refid = addr2refid(&sys_peer->srcadr); sys_jitter = sys_peer->jitter; } else { /* * Otherwise, choose the anticlockhopper. */ sys_peer = typesystem; sys_peer->status = CTL_PST_SEL_SYSPEER; clock_combine(peer_list, nlist); if (sys_peer->stratum == STRATUM_REFCLOCK || sys_peer->stratum == STRATUM_UNSPEC) sys_refid = sys_peer->refid; else sys_refid = addr2refid(&sys_peer->srcadr); sys_jitter = SQRT(SQUARE(sys_peer->jitter) + SQUARE(sys_jitter)); #ifdef DEBUG if (debug > 1) printf("select: combine offset %.6f\n", sys_offset); #endif } /* * We have found the alpha male. */ sys_peer->flags |= FLAG_SYSPEER; if (osys_peer != sys_peer) { char *src; report_event(EVNT_PEERSTCHG, NULL); #ifdef REFCLOCK if (sys_peer->flags & FLAG_REFCLOCK) src = refnumtoa(&sys_peer->srcadr); else #endif /* REFCLOCK */ src = ntoa(&sys_peer->srcadr); NLOG(NLOG_SYNCSTATUS) msyslog(LOG_INFO, "synchronized to %s, stratum %d", src, sys_peer->stratum); } clock_update(); } /* * clock_combine - compute system offset and jitter from selected peers */ static void clock_combine( struct peer **peers, /* survivor list */ int npeers /* number of survivors */ ) { int i; double x, y, z, w; y = z = w = 0; for (i = 0; i < npeers; i++) { x = root_distance(peers[i]); y += 1. / x; z += peers[i]->offset / x; w += SQUARE(peers[i]->offset - peers[0]->offset) / x; } sys_offset = z / y; sys_jitter = SQRT(w / y); } /* * root_distance - compute synchronization distance from peer to root */ static double root_distance( struct peer *peer ) { double dist; /* * Careful squeak here. The value returned must be greater than * the minimum root dispersion in order to avoid clockhop with * highly precise reference clocks. In orphan mode lose the peer * root delay, as that is used by the election algorithm. */ if (peer->stratum >= sys_orphan) dist = 0; else dist = peer->rootdelay; dist += max(sys_mindisp, dist + peer->delay) / 2 + peer->rootdispersion + peer->disp + clock_phi * (current_time - peer->update) + peer->jitter; return (dist); } /* * peer_xmit - send packet for persistent association. */ static void peer_xmit( struct peer *peer /* peer structure pointer */ ) { struct pkt xpkt; /* transmit packet */ int sendlen, authlen; keyid_t xkeyid = 0; /* transmit key ID */ l_fp xmt_tx; if (!peer->dstadr) /* don't bother with peers without interface */ return; /* * This is deliciously complicated. There are three cases. * * case leap stratum refid delay dispersion * * normal system system system system system * orphan child 00 orphan system orphan system * orphan parent 00 orphan loopbk 0 0 */ /* * This is a normal packet. Use the system variables. */ if (sys_stratum < sys_orphan) { xpkt.li_vn_mode = PKT_LI_VN_MODE(sys_leap, peer->version, peer->hmode); xpkt.stratum = STRATUM_TO_PKT(sys_stratum); xpkt.refid = sys_refid; xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay)); xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); /* * This is a orphan child packet. The host is synchronized to an * orphan parent. Show leap synchronized, orphan stratum, system * reference ID, orphan root delay and system root dispersion. */ } else if (sys_peer != NULL) { xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING, peer->version, peer->hmode); xpkt.stratum = STRATUM_TO_PKT(sys_orphan); xpkt.refid = htonl(LOOPBACKADR); xpkt.rootdelay = HTONS_FP(DTOFP(sys_orphandelay)); xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); /* * This is an orphan parent. Show leap synchronized, orphan * stratum, loopack reference ID and zero root delay and root * dispersion. */ } else { xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING, peer->version, peer->hmode); xpkt.stratum = STRATUM_TO_PKT(sys_orphan); xpkt.refid = sys_refid; xpkt.rootdelay = 0; xpkt.rootdispersion = 0; } xpkt.ppoll = peer->hpoll; xpkt.precision = sys_precision; HTONL_FP(&sys_reftime, &xpkt.reftime); HTONL_FP(&peer->org, &xpkt.org); HTONL_FP(&peer->rec, &xpkt.rec); /* * If the received packet contains a MAC, the transmitted packet * is authenticated and contains a MAC. If not, the transmitted * packet is not authenticated. * * It is most important when autokey is in use that the local * interface IP address be known before the first packet is * sent. Otherwise, it is not possible to compute a correct MAC * the recipient will accept. Thus, the I/O semantics have to do * a little more work. In particular, the wildcard interface * might not be usable. */ sendlen = LEN_PKT_NOMAC; if (!(peer->flags & FLAG_AUTHENABLE)) { get_systime(&peer->xmt); HTONL_FP(&peer->xmt, &xpkt.xmt); sendpkt(&peer->srcadr, peer->dstadr, sys_ttl[peer->ttl], &xpkt, sendlen); peer->sent++; #ifdef DEBUG if (debug) printf("transmit: at %ld %s->%s mode %d\n", current_time, peer->dstadr ? stoa(&peer->dstadr->sin) : "-", stoa(&peer->srcadr), peer->hmode); #endif return; } /* * The received packet contains a MAC, so the transmitted packet * must be authenticated. If autokey is enabled, fuss with the * various modes; otherwise, symmetric key cryptography is used. */ #ifdef OPENSSL if (crypto_flags && (peer->flags & FLAG_SKEY)) { struct exten *exten; /* extension field */ /* * The Public Key Dance (PKD): Cryptographic credentials * are contained in extension fields, each including a * 4-octet length/code word followed by a 4-octet * association ID and optional additional data. Optional * data includes a 4-octet data length field followed by * the data itself. Request messages are sent from a * configured association; response messages can be sent * from a configured association or can take the fast * path without ever matching an association. Response * messages have the same code as the request, but have * a response bit and possibly an error bit set. In this * implementation, a message may contain no more than * one command and no more than one response. * * Cryptographic session keys include both a public and * a private componet. Request and response messages * using extension fields are always sent with the * private component set to zero. Packets without * extension fields indlude the private component when * the session key is generated. */ while (1) { /* * Allocate and initialize a keylist if not * already done. Then, use the list in inverse * order, discarding keys once used. Keep the * latest key around until the next one, so * clients can use client/server packets to * compute propagation delay. * * Note that once a key is used from the list, * it is retained in the key cache until the * next key is used. This is to allow a client * to retrieve the encrypted session key * identifier to verify authenticity. * * If for some reason a key is no longer in the * key cache, a birthday has happened and the * pseudo-random sequence is probably broken. In * that case, purge the keylist and regenerate * it. */ if (peer->keynumber == 0) make_keylist(peer, peer->dstadr); else peer->keynumber--; xkeyid = peer->keylist[peer->keynumber]; if (authistrusted(xkeyid)) break; else key_expire(peer); } peer->keyid = xkeyid; exten = NULL; switch (peer->hmode) { /* * In broadcast server mode the autokey values are * required by the broadcast clients. Push them when a * new keylist is generated; otherwise, push the * association message so the client can request them at * other times. */ case MODE_BROADCAST: if (peer->flags & FLAG_ASSOC) exten = crypto_args(peer, CRYPTO_AUTO | CRYPTO_RESP, NULL); else exten = crypto_args(peer, CRYPTO_ASSOC | CRYPTO_RESP, NULL); break; /* * In symmetric modes the digest, certificate, agreement * parameters, cookie and autokey values are required. * The leapsecond table is optional. But, a passive peer * will not believe the active peer until the latter has * synchronized, so the agreement must be postponed * until then. In any case, if a new keylist is * generated, the autokey values are pushed. * * If the crypto bit is lit, don't send requests. */ case MODE_ACTIVE: case MODE_PASSIVE: if (peer->flash & TEST9) break; /* * Parameter and certificate. */ if (!peer->crypto) exten = crypto_args(peer, CRYPTO_ASSOC, sys_hostname); else if (!(peer->crypto & CRYPTO_FLAG_VALID)) exten = crypto_args(peer, CRYPTO_CERT, peer->issuer); /* * Identity. Note we have to sign the * certificate before the cookie to avoid a * deadlock when the passive peer is walking the * certificate trail. Awesome. */ else if (!(peer->crypto & CRYPTO_FLAG_VRFY)) exten = crypto_args(peer, crypto_ident(peer), NULL); else if (sys_leap != LEAP_NOTINSYNC && !(peer->crypto & CRYPTO_FLAG_SIGN)) exten = crypto_args(peer, CRYPTO_SIGN, sys_hostname); /* * Autokey. We request the cookie only when the * server and client are synchronized and * signatures work both ways. On the other hand, * the active peer needs the autokey values * before then and when the passive peer is * waiting for the active peer to synchronize. * Any time we regenerate the key list, we offer * the autokey values without being asked. */ else if (sys_leap != LEAP_NOTINSYNC && peer->leap != LEAP_NOTINSYNC && !(peer->crypto & CRYPTO_FLAG_AGREE)) exten = crypto_args(peer, CRYPTO_COOK, NULL); else if (peer->flags & FLAG_ASSOC) exten = crypto_args(peer, CRYPTO_AUTO | CRYPTO_RESP, NULL); else if (!(peer->crypto & CRYPTO_FLAG_AUTO)) exten = crypto_args(peer, CRYPTO_AUTO, NULL); /* * Postamble. We trade leapseconds only when the * server and client are synchronized. */ else if (sys_leap != LEAP_NOTINSYNC && peer->leap != LEAP_NOTINSYNC && peer->crypto & CRYPTO_FLAG_TAI && !(peer->crypto & CRYPTO_FLAG_LEAP)) exten = crypto_args(peer, CRYPTO_TAI, NULL); break; /* * In client mode the digest, certificate, agreement * parameters and cookie are required. The leapsecond * table is optional. If broadcast client mode, the * autokey values are required as well. In broadcast * client mode, these values must be acquired during the * client/server exchange to avoid having to wait until * the next key list regeneration. Otherwise, the poor * dude may die a lingering death until becoming * unreachable and attempting rebirth. * * If neither the server or client have the agreement * parameters, the protocol transmits the cookie in the * clear. If the server has the parameters, the client * requests them and the protocol blinds it using the * agreed key. It is a protocol error if the client has * the parameters but the server does not. * * If the crypto bit is lit, don't send requests. */ case MODE_CLIENT: if (peer->flash & TEST9) break; /* * Parameter and certificate. */ if (!peer->crypto) exten = crypto_args(peer, CRYPTO_ASSOC, sys_hostname); else if (!(peer->crypto & CRYPTO_FLAG_VALID)) exten = crypto_args(peer, CRYPTO_CERT, peer->issuer); /* * Identity */ else if (!(peer->crypto & CRYPTO_FLAG_VRFY)) exten = crypto_args(peer, crypto_ident(peer), NULL); /* * Autokey */ else if (!(peer->crypto & CRYPTO_FLAG_AGREE)) exten = crypto_args(peer, CRYPTO_COOK, NULL); else if (!(peer->crypto & CRYPTO_FLAG_AUTO) && (peer->cast_flags & MDF_BCLNT)) exten = crypto_args(peer, CRYPTO_AUTO, NULL); /* * Postamble. We can sign the certificate here, * since there is no chance of deadlock. */ else if (sys_leap != LEAP_NOTINSYNC && !(peer->crypto & CRYPTO_FLAG_SIGN)) exten = crypto_args(peer, CRYPTO_SIGN, sys_hostname); else if (sys_leap != LEAP_NOTINSYNC && peer->crypto & CRYPTO_FLAG_TAI && !(peer->crypto & CRYPTO_FLAG_LEAP)) exten = crypto_args(peer, CRYPTO_TAI, NULL); break; } /* * Build the extension fields as directed. A response to * a request is always sent, even if an error. If an * error occurs when sending a request, the crypto * machinery broke or was misconfigured. In that case * light the crypto bit to suppress further requests. */ if (peer->cmmd != NULL) { peer->cmmd->associd = htonl(peer->associd); sendlen += crypto_xmit(&xpkt, &peer->srcadr, sendlen, peer->cmmd, 0); free(peer->cmmd); peer->cmmd = NULL; } if (exten != NULL) { int ltemp = 0; if (exten->opcode != 0) { ltemp = crypto_xmit(&xpkt, &peer->srcadr, sendlen, exten, 0); if (ltemp == 0) { peer->flash |= TEST9; /* crypto error */ free(exten); return; } } sendlen += ltemp; free(exten); } /* * If extension fields are present, we must use a * private cookie value of zero. Don't send if the * crypto bit is set and no extension field is present, * but in that case give back the key. Most intricate. */ if (sendlen > LEN_PKT_NOMAC) { session_key(&peer->dstadr->sin, &peer->srcadr, xkeyid, 0, 2); } else if (peer->flash & TEST9) { authtrust(xkeyid, 0); return; } } #endif /* OPENSSL */ /* * Stash the transmit timestamp corrected for the encryption * delay. If autokey, give back the key, as we use keys only * once. Check for errors such as missing keys, buffer overflow, * etc. */ xkeyid = peer->keyid; get_systime(&peer->xmt); L_ADD(&peer->xmt, &sys_authdelay); HTONL_FP(&peer->xmt, &xpkt.xmt); authlen = authencrypt(xkeyid, (u_int32 *)&xpkt, sendlen); if (authlen == 0) { msyslog(LOG_INFO, "transmit: %s key %u not found", stoa(&peer->srcadr), xkeyid); peer->flash |= TEST9; /* no key found */ return; } sendlen += authlen; #ifdef OPENSSL if (xkeyid > NTP_MAXKEY) authtrust(xkeyid, 0); #endif /* OPENSSL */ get_systime(&xmt_tx); if (sendlen > sizeof(xpkt)) { msyslog(LOG_ERR, "buffer overflow %u", sendlen); exit (-1); } sendpkt(&peer->srcadr, peer->dstadr, sys_ttl[peer->ttl], &xpkt, sendlen); /* * Calculate the encryption delay. Keep the minimum over * the latest two samples. */ L_SUB(&xmt_tx, &peer->xmt); L_ADD(&xmt_tx, &sys_authdelay); sys_authdly[1] = sys_authdly[0]; sys_authdly[0] = xmt_tx.l_uf; if (sys_authdly[0] < sys_authdly[1]) sys_authdelay.l_uf = sys_authdly[0]; else sys_authdelay.l_uf = sys_authdly[1]; peer->sent++; #ifdef OPENSSL #ifdef DEBUG if (debug) printf( "transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d index %d\n", current_time, peer->dstadr ? ntoa(&peer->dstadr->sin) : "-", ntoa(&peer->srcadr), peer->hmode, xkeyid, sendlen - authlen, authlen, peer->keynumber); #endif #else #ifdef DEBUG if (debug) printf( "transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d\n", current_time, peer->dstadr ? ntoa(&peer->dstadr->sin) : "-", ntoa(&peer->srcadr), peer->hmode, xkeyid, sendlen - authlen, authlen); #endif #endif /* OPENSSL */ } /* * fast_xmit - Send packet for nonpersistent association. Note that * neither the source or destination can be a broadcast address. */ static void fast_xmit( struct recvbuf *rbufp, /* receive packet pointer */ int xmode, /* transmit mode */ keyid_t xkeyid, /* transmit key ID */ int mask /* restrict mask */ ) { struct pkt xpkt; /* transmit packet structure */ struct pkt *rpkt; /* receive packet structure */ l_fp xmt_ts; /* timestamp */ l_fp xmt_tx; /* timestamp after authent */ int sendlen, authlen; #ifdef OPENSSL u_int32 temp32; #endif /* * Initialize transmit packet header fields from the receive * buffer provided. We leave some fields intact as received. If * the gazinta was from a multicast address, the gazoutta must * go out another way. * * The root delay field is special. If the system stratum is * less than the orphan stratum, send the real root delay. * Otherwise, if there is no system peer, send the orphan delay. * Otherwise, we must be an orphan parent, so send zero. */ rpkt = &rbufp->recv_pkt; if (rbufp->dstadr->flags & INT_MCASTOPEN) rbufp->dstadr = findinterface(&rbufp->recv_srcadr); /* * This is deliciously complicated. There are four cases. * * case leap stratum refid delay dispersion * * KoD 11 16 KISS system system * normal system system system system system * orphan child 00 orphan system orphan system * orphan parent 00 orphan loopbk 0 0 */ /* * This is a kiss-of-death (KoD) packet. Show leap * unsynchronized, stratum zero, reference ID the four-character * kiss code and system root delay. Note the rate limit on these * packets. Once a second initialize a bucket counter. Every * packet sent decrements the counter until reaching zero. If * the counter is zero, drop the kiss. */ if (mask & RES_LIMITED) { sys_limitrejected++; if (sys_kod == 0 || !(mask & RES_DEMOBILIZE)) return; sys_kod--; xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOTINSYNC, PKT_VERSION(rpkt->li_vn_mode), xmode); xpkt.stratum = STRATUM_UNSPEC; memcpy(&xpkt.refid, "RATE", 4); xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay)); xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); /* * This is a normal packet. Use the system variables. */ } else if (sys_stratum < sys_orphan) { xpkt.li_vn_mode = PKT_LI_VN_MODE(sys_leap, PKT_VERSION(rpkt->li_vn_mode), xmode); xpkt.stratum = STRATUM_TO_PKT(sys_stratum); xpkt.refid = sys_refid; xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay)); xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); /* * This is a orphan child packet. The host is synchronized to an * orphan parent. Show leap synchronized, orphan stratum, system * reference ID and orphan root delay. */ } else if (sys_peer != NULL) { xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING, PKT_VERSION(rpkt->li_vn_mode), xmode); xpkt.stratum = STRATUM_TO_PKT(sys_orphan); xpkt.refid = sys_refid; xpkt.rootdelay = HTONS_FP(DTOFP(sys_orphandelay)); xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); /* * This is an orphan parent. Show leap synchronized, orphan * stratum, loopack reference ID and zero root delay. */ } else { xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOWARNING, PKT_VERSION(rpkt->li_vn_mode), xmode); xpkt.stratum = STRATUM_TO_PKT(sys_orphan); xpkt.refid = htonl(LOOPBACKADR); xpkt.rootdelay = HTONS_FP(DTOFP(0)); xpkt.rootdispersion = HTONS_FP(DTOFP(0)); } xpkt.ppoll = rpkt->ppoll; xpkt.precision = sys_precision; xpkt.rootdispersion = HTONS_FP(DTOUFP(sys_rootdispersion)); HTONL_FP(&sys_reftime, &xpkt.reftime); xpkt.org = rpkt->xmt; HTONL_FP(&rbufp->recv_time, &xpkt.rec); /* * If the received packet contains a MAC, the transmitted packet * is authenticated and contains a MAC. If not, the transmitted * packet is not authenticated. */ sendlen = LEN_PKT_NOMAC; if (rbufp->recv_length == sendlen) { get_systime(&xmt_ts); HTONL_FP(&xmt_ts, &xpkt.xmt); sendpkt(&rbufp->recv_srcadr, rbufp->dstadr, 0, &xpkt, sendlen); #ifdef DEBUG if (debug) printf("transmit: at %ld %s->%s mode %d\n", current_time, stoa(&rbufp->dstadr->sin), stoa(&rbufp->recv_srcadr), xmode); #endif return; } /* * The received packet contains a MAC, so the transmitted packet * must be authenticated. For symmetric key cryptography, use * the predefined and trusted symmetric keys to generate the * cryptosum. For autokey cryptography, use the server private * value to generate the cookie, which is unique for every * source-destination-key ID combination. */ #ifdef OPENSSL if (xkeyid > NTP_MAXKEY) { keyid_t cookie; /* * The only way to get here is a reply to a legitimate * client request message, so the mode must be * MODE_SERVER. If an extension field is present, there * can be only one and that must be a command. Do what * needs, but with private value of zero so the poor * jerk can decode it. If no extension field is present, * use the cookie to generate the session key. */ cookie = session_key(&rbufp->recv_srcadr, &rbufp->dstadr->sin, 0, sys_private, 0); if (rbufp->recv_length >= (int)(sendlen + MAX_MAC_LEN + 2 * sizeof(u_int32))) { session_key(&rbufp->dstadr->sin, &rbufp->recv_srcadr, xkeyid, 0, 2); temp32 = CRYPTO_RESP; rpkt->exten[0] |= htonl(temp32); sendlen += crypto_xmit(&xpkt, &rbufp->recv_srcadr, sendlen, (struct exten *)rpkt->exten, cookie); } else { session_key(&rbufp->dstadr->sin, &rbufp->recv_srcadr, xkeyid, cookie, 2); } } #endif /* OPENSSL */ get_systime(&xmt_ts); L_ADD(&xmt_ts, &sys_authdelay); HTONL_FP(&xmt_ts, &xpkt.xmt); authlen = authencrypt(xkeyid, (u_int32 *)&xpkt, sendlen); sendlen += authlen; #ifdef OPENSSL if (xkeyid > NTP_MAXKEY) authtrust(xkeyid, 0); #endif /* OPENSSL */ get_systime(&xmt_tx); if (sendlen > sizeof(xpkt)) { msyslog(LOG_ERR, "buffer overflow %u", sendlen); exit (-1); } sendpkt(&rbufp->recv_srcadr, rbufp->dstadr, 0, &xpkt, sendlen); /* * Calculate the encryption delay. Keep the minimum over the * latest two samples. */ L_SUB(&xmt_tx, &xmt_ts); L_ADD(&xmt_tx, &sys_authdelay); sys_authdly[1] = sys_authdly[0]; sys_authdly[0] = xmt_tx.l_uf; if (sys_authdly[0] < sys_authdly[1]) sys_authdelay.l_uf = sys_authdly[0]; else sys_authdelay.l_uf = sys_authdly[1]; #ifdef DEBUG if (debug) printf( "transmit: at %ld %s->%s mode %d keyid %08x len %d mac %d\n", current_time, ntoa(&rbufp->dstadr->sin), ntoa(&rbufp->recv_srcadr), xmode, xkeyid, sendlen - authlen, authlen); #endif } #ifdef OPENSSL /* * key_expire - purge the key list */ void key_expire( struct peer *peer /* peer structure pointer */ ) { int i; if (peer->keylist != NULL) { for (i = 0; i <= peer->keynumber; i++) authtrust(peer->keylist[i], 0); free(peer->keylist); peer->keylist = NULL; } value_free(&peer->sndval); peer->keynumber = 0; #ifdef DEBUG if (debug) printf("key_expire: at %lu\n", current_time); #endif } #endif /* OPENSSL */ /* * Determine if the peer is unfit for synchronization * * A peer is unfit for synchronization if * > TEST10 bad leap or stratum below floor or at or above ceiling * > TEST11 root distance exceeded * > TEST12 a direct or indirect synchronization loop would form * > TEST13 unreachable or noselect */ int /* FALSE if fit, TRUE if unfit */ peer_unfit( struct peer *peer /* peer structure pointer */ ) { int rval = 0; /* * A stratum error occurs if (1) the server has never been * synchronized, (2) the server stratum is below the floor or * greater than or equal to the ceiling, (3) the system stratum * is below the orphan stratum and the server stratum is greater * than or equal to the orphan stratum. */ if (peer->leap == LEAP_NOTINSYNC || peer->stratum < sys_floor || peer->stratum >= sys_ceiling || (sys_stratum < sys_orphan && peer->stratum >= sys_orphan)) rval |= TEST10; /* stratum out of bounds */ /* * A distance error occurs if the root distance is greater than * or equal to the distance threshold plus the increment due to * one poll interval. */ if (root_distance(peer) >= sys_maxdist + clock_phi * ULOGTOD(sys_poll)) rval |= TEST11; /* distance exceeded */ /* * A loop error occurs if the remote peer is synchronized to the * local peer of if the remote peer is synchronized to the same * server as the local peer, but only if the remote peer is not * the orphan parent. */ if (peer->stratum > 1 && peer->refid != htonl(LOOPBACKADR) && ((!peer->dstadr || peer->refid == peer->dstadr->addr_refid) || peer->refid == sys_refid)) rval |= TEST12; /* synch loop */ /* * An unreachable error occurs if the server is unreachable or * the noselect bit is set. */ if (!peer->reach || peer->flags & FLAG_NOSELECT) rval |= TEST13; /* unreachable */ peer->flash &= ~PEER_TEST_MASK; peer->flash |= rval; return (rval); } /* * Find the precision of this particular machine */ #define MINSTEP 100e-9 /* minimum clock increment (s) */ #define MAXSTEP 20e-3 /* maximum clock increment (s) */ #define MINLOOPS 5 /* minimum number of step samples */ /* * This routine calculates the system precision, defined as the minimum * of a sequence of differences between successive readings of the * system clock. However, if the system clock can be read more than once * during a tick interval, the difference can be zero or one LSB unit, * where the LSB corresponds to one nanosecond or one microsecond. * Conceivably, if some other process preempts this one and reads the * clock, the difference can be more than one LSB unit. * * For hardware clock frequencies of 10 MHz or less, we assume the * logical clock advances only at the hardware clock tick. For higher * frequencies, we assume the logical clock can advance no more than 100 * nanoseconds between ticks. */ int default_get_precision(void) { l_fp val; /* current seconds fraction */ l_fp last; /* last seconds fraction */ l_fp diff; /* difference */ double tick; /* computed tick value */ double dtemp; /* scratch */ int i; /* log2 precision */ /* * Loop to find tick value in nanoseconds. Toss out outlyer * values less than the minimun tick value. In wacky cases, use * the default maximum value. */ get_systime(&last); tick = MAXSTEP; for (i = 0; i < MINLOOPS;) { get_systime(&val); diff = val; L_SUB(&diff, &last); last = val; LFPTOD(&diff, dtemp); if (dtemp < MINSTEP) continue; i++; if (dtemp < tick) tick = dtemp; } /* * Find the nearest power of two. */ NLOG(NLOG_SYSEVENT) msyslog(LOG_INFO, "precision = %.3f usec", tick * 1e6); for (i = 0; tick <= 1; i++) tick *= 2; if (tick - 1. > 1. - tick / 2) i--; return (-i); } /* * kod_proto - called once per second to limit kiss-of-death packets */ void kod_proto(void) { sys_kod = sys_kod_rate; } /* * init_proto - initialize the protocol module's data */ void init_proto(void) { l_fp dummy; int i; /* * Fill in the sys_* stuff. Default is don't listen to * broadcasting, authenticate. */ sys_leap = LEAP_NOTINSYNC; sys_stratum = STRATUM_UNSPEC; memcpy(&sys_refid, "INIT", 4); sys_precision = (s_char)default_get_precision(); sys_jitter = LOGTOD(sys_precision); sys_rootdelay = 0; sys_orphandelay = (double)(ntp_random() & 0xffff) / 65536. * sys_maxdist; sys_rootdispersion = 0; L_CLR(&sys_reftime); sys_peer = NULL; sys_survivors = 0; get_systime(&dummy); sys_manycastserver = 0; sys_bclient = 0; sys_bdelay = DEFBROADDELAY; sys_calldelay = BURST_DELAY; sys_authenticate = 1; L_CLR(&sys_authdelay); sys_authdly[0] = sys_authdly[1] = 0; sys_stattime = 0; proto_clr_stats(); for (i = 0; i < MAX_TTL; i++) { sys_ttl[i] = (u_char)((i * 256) / MAX_TTL); sys_ttlmax = i; } #ifdef OPENSSL sys_automax = 1 << NTP_AUTOMAX; #endif /* OPENSSL */ /* * Default these to enable */ ntp_enable = 1; #ifndef KERNEL_FLL_BUG kern_enable = 1; #endif pps_enable = 0; stats_control = 1; } /* * proto_config - configure the protocol module */ void proto_config( int item, u_long value, double dvalue, struct sockaddr_storage* svalue ) { /* * Figure out what he wants to change, then do it */ switch (item) { /* * Turn on/off kernel discipline. */ case PROTO_KERNEL: kern_enable = (int)value; break; /* * Turn on/off clock discipline. */ case PROTO_NTP: ntp_enable = (int)value; break; /* * Turn on/off monitoring. */ case PROTO_MONITOR: if (value) mon_start(MON_ON); else mon_stop(MON_ON); break; /* * Turn on/off statistics. */ case PROTO_FILEGEN: stats_control = (int)value; break; /* * Turn on/off enable broadcasts. */ case PROTO_BROADCLIENT: sys_bclient = (int)value; if (sys_bclient == 0) io_unsetbclient(); else io_setbclient(); break; /* * Turn on/off PPS discipline. */ case PROTO_PPS: pps_enable = (int)value; break; /* * Add muliticast group address. */ case PROTO_MULTICAST_ADD: if (svalue) io_multicast_add(*svalue); sys_bclient = 1; break; /* * Delete multicast group address. */ case PROTO_MULTICAST_DEL: if (svalue) io_multicast_del(*svalue); break; /* * Set default broadcast delay. */ case PROTO_BROADDELAY: sys_bdelay = dvalue; break; /* * Set modem call delay. */ case PROTO_CALLDELAY: sys_calldelay = (int)value; break; /* * Turn on/off authentication to mobilize ephemeral * associations. */ case PROTO_AUTHENTICATE: sys_authenticate = (int)value; break; /* * Set minimum number of survivors. */ case PROTO_MINCLOCK: sys_minclock = (int)dvalue; break; /* * Set maximum number of preemptable associations. */ case PROTO_MAXCLOCK: sys_maxclock = (int)dvalue; break; /* * Set minimum number of survivors. */ case PROTO_MINSANE: sys_minsane = (int)dvalue; break; /* * Set stratum floor. */ case PROTO_FLOOR: sys_floor = (int)dvalue; break; /* * Set stratum ceiling. */ case PROTO_CEILING: sys_ceiling = (int)dvalue; break; /* * Set orphan stratum. */ case PROTO_ORPHAN: sys_orphan = (int)dvalue; break; /* * Set cohort switch. */ case PROTO_COHORT: sys_cohort = (int)dvalue; break; /* * Set minimum dispersion increment. */ case PROTO_MINDISP: sys_mindisp = dvalue; break; /* * Set maximum distance (select threshold). */ case PROTO_MAXDIST: sys_maxdist = dvalue; break; /* * Set anticlockhop threshold. */ case PROTO_MAXHOP: sys_maxhop = (int)dvalue; break; /* * Set adjtime() resolution (s). */ case PROTO_ADJ: sys_tick = dvalue; break; /* * Set manycast beacon interval. */ case PROTO_BEACON: sys_beacon = (int)dvalue; break; #ifdef REFCLOCK /* * Turn on/off refclock calibrate */ case PROTO_CAL: cal_enable = (int)value; break; #endif /* REFCLOCK */ default: /* * Log this error. */ msyslog(LOG_INFO, "proto_config: illegal item %d, value %ld", item, value); } } /* * proto_clr_stats - clear protocol stat counters */ void proto_clr_stats(void) { sys_stattime = current_time; sys_received = 0; sys_processed = 0; sys_newversionpkt = 0; sys_oldversionpkt = 0; sys_unknownversion = 0; sys_restricted = 0; sys_badlength = 0; sys_badauth = 0; sys_limitrejected = 0; } Index: stable/9/sys/netinet/igmp.c =================================================================== --- stable/9/sys/netinet/igmp.c (revision 281230) +++ stable/9/sys/netinet/igmp.c (revision 281231) @@ -1,3649 +1,3648 @@ /*- * Copyright (c) 2007-2009 Bruce Simpson. * Copyright (c) 1988 Stephen Deering. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)igmp.c 8.1 (Berkeley) 7/19/93 */ /* * Internet Group Management Protocol (IGMP) routines. * [RFC1112, RFC2236, RFC3376] * * Written by Steve Deering, Stanford, May 1988. * Modified by Rosen Sharma, Stanford, Aug 1994. * Modified by Bill Fenner, Xerox PARC, Feb 1995. * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995. * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson. * * MULTICAST Revision: 3.5.1.4 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef KTR_IGMPV3 #define KTR_IGMPV3 KTR_INET #endif static struct igmp_ifinfo * igi_alloc_locked(struct ifnet *); static void igi_delete_locked(const struct ifnet *); static void igmp_dispatch_queue(struct ifqueue *, int, const int); static void igmp_fasttimo_vnet(void); static void igmp_final_leave(struct in_multi *, struct igmp_ifinfo *); static int igmp_handle_state_change(struct in_multi *, struct igmp_ifinfo *); static int igmp_initial_join(struct in_multi *, struct igmp_ifinfo *); static int igmp_input_v1_query(struct ifnet *, const struct ip *, const struct igmp *); static int igmp_input_v2_query(struct ifnet *, const struct ip *, const struct igmp *); static int igmp_input_v3_query(struct ifnet *, const struct ip *, /*const*/ struct igmpv3 *); static int igmp_input_v3_group_query(struct in_multi *, struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *); static int igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *, /*const*/ struct igmp *); static int igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *, /*const*/ struct igmp *); static void igmp_intr(struct mbuf *); static int igmp_isgroupreported(const struct in_addr); static struct mbuf * igmp_ra_alloc(void); #ifdef KTR static char * igmp_rec_type_to_str(const int); #endif static void igmp_set_version(struct igmp_ifinfo *, const int); static void igmp_slowtimo_vnet(void); static int igmp_v1v2_queue_report(struct in_multi *, const int); static void igmp_v1v2_process_group_timer(struct in_multi *, const int); static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *); static void igmp_v2_update_group(struct in_multi *, const int); static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *); static void igmp_v3_dispatch_general_query(struct igmp_ifinfo *); static struct mbuf * igmp_v3_encap_report(struct ifnet *, struct mbuf *); static int igmp_v3_enqueue_group_record(struct ifqueue *, struct in_multi *, const int, const int, const int); static int igmp_v3_enqueue_filter_change(struct ifqueue *, struct in_multi *); static void igmp_v3_process_group_timers(struct igmp_ifinfo *, struct ifqueue *, struct ifqueue *, struct in_multi *, const int); static int igmp_v3_merge_state_changes(struct in_multi *, struct ifqueue *); static void igmp_v3_suppress_group_record(struct in_multi *); static int sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS); static int sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS); static int sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS); static const struct netisr_handler igmp_nh = { .nh_name = "igmp", .nh_handler = igmp_intr, .nh_proto = NETISR_IGMP, .nh_policy = NETISR_POLICY_SOURCE, }; /* * System-wide globals. * * Unlocked access to these is OK, except for the global IGMP output * queue. The IGMP subsystem lock ends up being system-wide for the moment, * because all VIMAGEs have to share a global output queue, as netisrs * themselves are not virtualized. * * Locking: * * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK. * Any may be taken independently; if any are held at the same * time, the above lock order must be followed. * * All output is delegated to the netisr. * Now that Giant has been eliminated, the netisr may be inlined. * * IN_MULTI_LOCK covers in_multi. * * IGMP_LOCK covers igmp_ifinfo and any global variables in this file, * including the output queue. * * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of * per-link state iterators. * * igmp_ifinfo is valid as long as PF_INET is attached to the interface, * therefore it is not refcounted. * We allow unlocked reads of igmp_ifinfo when accessed via in_multi. * * Reference counting * * IGMP acquires its own reference every time an in_multi is passed to * it and the group is being joined for the first time. * * IGMP releases its reference(s) on in_multi in a deferred way, * because the operations which process the release run as part of * a loop whose control variables are directly affected by the release * (that, and not recursing on the IF_ADDR_LOCK). * * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds * to a vnet in ifp->if_vnet. * * SMPng: XXX We may potentially race operations on ifma_protospec. * The problem is that we currently lack a clean way of taking the * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing, * as anything which modifies ifma needs to be covered by that lock. * So check for ifma_protospec being NULL before proceeding. */ struct mtx igmp_mtx; struct mbuf *m_raopt; /* Router Alert option */ static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state"); /* * VIMAGE-wide globals. * * The IGMPv3 timers themselves need to run per-image, however, * protosw timers run globally (see tcp). * An ifnet can only be in one vimage at a time, and the loopback * ifnet, loif, is itself virtualized. * It would otherwise be possible to seriously hose IGMP state, * and create inconsistencies in upstream multicast routing, if you have * multiple VIMAGEs running on the same link joining different multicast * groups, UNLESS the "primary IP address" is different. This is because * IGMP for IPv4 does not force link-local addresses to be used for each * node, unlike MLD for IPv6. * Obviously the IGMPv3 per-interface state has per-vimage granularity * also as a result. * * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection * policy to control the address used by IGMP on the link. */ static VNET_DEFINE(int, interface_timers_running); /* IGMPv3 general * query response */ static VNET_DEFINE(int, state_change_timers_running); /* IGMPv3 state-change * retransmit */ static VNET_DEFINE(int, current_state_timers_running); /* IGMPv1/v2 host * report; IGMPv3 g/sg * query response */ #define V_interface_timers_running VNET(interface_timers_running) #define V_state_change_timers_running VNET(state_change_timers_running) #define V_current_state_timers_running VNET(current_state_timers_running) static VNET_DEFINE(LIST_HEAD(, igmp_ifinfo), igi_head); static VNET_DEFINE(struct igmpstat, igmpstat) = { .igps_version = IGPS_VERSION_3, .igps_len = sizeof(struct igmpstat), }; static VNET_DEFINE(struct timeval, igmp_gsrdelay) = {10, 0}; #define V_igi_head VNET(igi_head) #define V_igmpstat VNET(igmpstat) #define V_igmp_gsrdelay VNET(igmp_gsrdelay) static VNET_DEFINE(int, igmp_recvifkludge) = 1; static VNET_DEFINE(int, igmp_sendra) = 1; static VNET_DEFINE(int, igmp_sendlocal) = 1; static VNET_DEFINE(int, igmp_v1enable) = 1; static VNET_DEFINE(int, igmp_v2enable) = 1; static VNET_DEFINE(int, igmp_legacysupp); static VNET_DEFINE(int, igmp_default_version) = IGMP_VERSION_3; #define V_igmp_recvifkludge VNET(igmp_recvifkludge) #define V_igmp_sendra VNET(igmp_sendra) #define V_igmp_sendlocal VNET(igmp_sendlocal) #define V_igmp_v1enable VNET(igmp_v1enable) #define V_igmp_v2enable VNET(igmp_v2enable) #define V_igmp_legacysupp VNET(igmp_legacysupp) #define V_igmp_default_version VNET(igmp_default_version) /* * Virtualized sysctls. */ SYSCTL_VNET_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RW, &VNET_NAME(igmpstat), igmpstat, ""); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW, &VNET_NAME(igmp_recvifkludge), 0, "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address"); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW, &VNET_NAME(igmp_sendra), 0, "Send IP Router Alert option in IGMPv2/v3 messages"); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW, &VNET_NAME(igmp_sendlocal), 0, "Send IGMP membership reports for 224.0.0.0/24 groups"); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW, &VNET_NAME(igmp_v1enable), 0, "Enable backwards compatibility with IGMPv1"); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW, &VNET_NAME(igmp_v2enable), 0, "Enable backwards compatibility with IGMPv2"); SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW, &VNET_NAME(igmp_legacysupp), 0, "Allow v1/v2 reports to suppress v3 group responses"); SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, default_version, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &VNET_NAME(igmp_default_version), 0, sysctl_igmp_default_version, "I", "Default version of IGMP to run on each interface"); SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, gsrdelay, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &VNET_NAME(igmp_gsrdelay.tv_sec), 0, sysctl_igmp_gsr, "I", "Rate limit for IGMPv3 Group-and-Source queries in seconds"); /* * Non-virtualized sysctls. */ static SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_igmp_ifinfo, "Per-interface IGMPv3 state"); static __inline void igmp_save_context(struct mbuf *m, struct ifnet *ifp) { #ifdef VIMAGE m->m_pkthdr.header = ifp->if_vnet; #endif /* VIMAGE */ m->m_pkthdr.flowid = ifp->if_index; } static __inline void igmp_scrub_context(struct mbuf *m) { m->m_pkthdr.header = NULL; m->m_pkthdr.flowid = 0; } #ifdef KTR static __inline char * inet_ntoa_haddr(in_addr_t haddr) { struct in_addr ia; ia.s_addr = htonl(haddr); return (inet_ntoa(ia)); } #endif /* * Restore context from a queued IGMP output chain. * Return saved ifindex. * * VIMAGE: The assertion is there to make sure that we * actually called CURVNET_SET() with what's in the mbuf chain. */ static __inline uint32_t igmp_restore_context(struct mbuf *m) { #ifdef notyet #if defined(VIMAGE) && defined(INVARIANTS) KASSERT(curvnet == (m->m_pkthdr.header), ("%s: called when curvnet was not restored", __func__)); #endif #endif return (m->m_pkthdr.flowid); } /* * Retrieve or set default IGMP version. * * VIMAGE: Assume curvnet set by caller. * SMPng: NOTE: Serialized by IGMP lock. */ static int sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS) { int error; int new; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error) return (error); IGMP_LOCK(); new = V_igmp_default_version; error = sysctl_handle_int(oidp, &new, 0, req); if (error || !req->newptr) goto out_locked; if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) { error = EINVAL; goto out_locked; } CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d", V_igmp_default_version, new); V_igmp_default_version = new; out_locked: IGMP_UNLOCK(); return (error); } /* * Retrieve or set threshold between group-source queries in seconds. * * VIMAGE: Assume curvnet set by caller. * SMPng: NOTE: Serialized by IGMP lock. */ static int sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS) { int error; int i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error) return (error); IGMP_LOCK(); i = V_igmp_gsrdelay.tv_sec; error = sysctl_handle_int(oidp, &i, 0, req); if (error || !req->newptr) goto out_locked; if (i < -1 || i >= 60) { error = EINVAL; goto out_locked; } CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d", V_igmp_gsrdelay.tv_sec, i); V_igmp_gsrdelay.tv_sec = i; out_locked: IGMP_UNLOCK(); return (error); } /* * Expose struct igmp_ifinfo to userland, keyed by ifindex. * For use by ifmcstat(8). * * SMPng: NOTE: Does an unlocked ifindex space read. * VIMAGE: Assume curvnet set by caller. The node handler itself * is not directly virtualized. */ static int sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS) { int *name; int error; u_int namelen; struct ifnet *ifp; struct igmp_ifinfo *igi; name = (int *)arg1; namelen = arg2; if (req->newptr != NULL) return (EPERM); if (namelen != 1) return (EINVAL); error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo)); if (error) return (error); IN_MULTI_LOCK(); IGMP_LOCK(); if (name[0] <= 0 || name[0] > V_if_index) { error = ENOENT; goto out_locked; } error = ENOENT; ifp = ifnet_byindex(name[0]); if (ifp == NULL) goto out_locked; LIST_FOREACH(igi, &V_igi_head, igi_link) { if (ifp == igi->igi_ifp) { error = SYSCTL_OUT(req, igi, sizeof(struct igmp_ifinfo)); break; } } out_locked: IGMP_UNLOCK(); IN_MULTI_UNLOCK(); return (error); } /* * Dispatch an entire queue of pending packet chains * using the netisr. * VIMAGE: Assumes the vnet pointer has been set. */ static void igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop) { struct mbuf *m; for (;;) { _IF_DEQUEUE(ifq, m); if (m == NULL) break; CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m); if (loop) m->m_flags |= M_IGMP_LOOP; netisr_dispatch(NETISR_IGMP, m); if (--limit == 0) break; } } /* * Filter outgoing IGMP report state by group. * * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1). * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are * disabled for all groups in the 224.0.0.0/24 link-local scope. However, * this may break certain IGMP snooping switches which rely on the old * report behaviour. * * Return zero if the given group is one for which IGMP reports * should be suppressed, or non-zero if reports should be issued. */ static __inline int igmp_isgroupreported(const struct in_addr addr) { if (in_allhosts(addr) || ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) return (0); return (1); } /* * Construct a Router Alert option to use in outgoing packets. */ static struct mbuf * igmp_ra_alloc(void) { struct mbuf *m; struct ipoption *p; MGET(m, M_DONTWAIT, MT_DATA); p = mtod(m, struct ipoption *); p->ipopt_dst.s_addr = INADDR_ANY; p->ipopt_list[0] = IPOPT_RA; /* Router Alert Option */ p->ipopt_list[1] = 0x04; /* 4 bytes long */ p->ipopt_list[2] = IPOPT_EOL; /* End of IP option list */ p->ipopt_list[3] = 0x00; /* pad byte */ m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1]; return (m); } /* * Attach IGMP when PF_INET is attached to an interface. */ struct igmp_ifinfo * igmp_domifattach(struct ifnet *ifp) { struct igmp_ifinfo *igi; CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp, ifp->if_xname); IGMP_LOCK(); igi = igi_alloc_locked(ifp); if (!(ifp->if_flags & IFF_MULTICAST)) igi->igi_flags |= IGIF_SILENT; IGMP_UNLOCK(); return (igi); } /* * VIMAGE: assume curvnet set by caller. */ static struct igmp_ifinfo * igi_alloc_locked(/*const*/ struct ifnet *ifp) { struct igmp_ifinfo *igi; IGMP_LOCK_ASSERT(); igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO); if (igi == NULL) goto out; igi->igi_ifp = ifp; igi->igi_version = V_igmp_default_version; igi->igi_flags = 0; igi->igi_rv = IGMP_RV_INIT; igi->igi_qi = IGMP_QI_INIT; igi->igi_qri = IGMP_QRI_INIT; igi->igi_uri = IGMP_URI_INIT; SLIST_INIT(&igi->igi_relinmhead); /* * Responses to general queries are subject to bounds. */ IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS); LIST_INSERT_HEAD(&V_igi_head, igi, igi_link); CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)", ifp, ifp->if_xname); out: return (igi); } /* * Hook for ifdetach. * * NOTE: Some finalization tasks need to run before the protocol domain * is detached, but also before the link layer does its cleanup. * * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK(). * XXX This is also bitten by unlocked ifma_protospec access. */ void igmp_ifdetach(struct ifnet *ifp) { struct igmp_ifinfo *igi; struct ifmultiaddr *ifma; struct in_multi *inm, *tinm; CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp, ifp->if_xname); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; if (igi->igi_version == IGMP_VERSION_3) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; #if 0 KASSERT(ifma->ifma_protospec != NULL, ("%s: ifma_protospec is NULL", __func__)); #endif inm = (struct in_multi *)ifma->ifma_protospec; if (inm->inm_state == IGMP_LEAVING_MEMBER) { SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele); } inm_clear_recorded(inm); } IF_ADDR_RUNLOCK(ifp); /* * Free the in_multi reference(s) for this IGMP lifecycle. */ SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, tinm) { SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele); inm_release_locked(inm); } } IGMP_UNLOCK(); } /* * Hook for domifdetach. */ void igmp_domifdetach(struct ifnet *ifp) { struct igmp_ifinfo *igi; CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp, ifp->if_xname); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; igi_delete_locked(ifp); IGMP_UNLOCK(); } static void igi_delete_locked(const struct ifnet *ifp) { struct igmp_ifinfo *igi, *tigi; CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)", __func__, ifp, ifp->if_xname); IGMP_LOCK_ASSERT(); LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) { if (igi->igi_ifp == ifp) { /* * Free deferred General Query responses. */ _IF_DRAIN(&igi->igi_gq); LIST_REMOVE(igi, igi_link); KASSERT(SLIST_EMPTY(&igi->igi_relinmhead), ("%s: there are dangling in_multi references", __func__)); free(igi, M_IGMP); return; } } #ifdef INVARIANTS panic("%s: igmp_ifinfo not found for ifp %p\n", __func__, ifp); #endif } /* * Process a received IGMPv1 query. * Return non-zero if the message should be dropped. * * VIMAGE: The curvnet pointer is derived from the input ifp. */ static int igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip, const struct igmp *igmp) { struct ifmultiaddr *ifma; struct igmp_ifinfo *igi; struct in_multi *inm; /* * IGMPv1 Host Mmembership Queries SHOULD always be addressed to * 224.0.0.1. They are always treated as General Queries. * igmp_group is always ignored. Do not drop it as a userland * daemon may wish to see it. * XXX SMPng: unlocked increments in igmpstat assumed atomic. */ if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) { IGMPSTAT_INC(igps_rcv_badqueries); return (0); } IGMPSTAT_INC(igps_rcv_gen_queries); IN_MULTI_LOCK(); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); if (igi->igi_flags & IGIF_LOOPBACK) { CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)", ifp, ifp->if_xname); goto out_locked; } /* * Switch to IGMPv1 host compatibility mode. */ igmp_set_version(igi, IGMP_VERSION_1); CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname); /* * Start the timers in all of our group records * for the interface on which the query arrived, * except those which are already running. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; inm = (struct in_multi *)ifma->ifma_protospec; if (inm->inm_timer != 0) continue; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: inm->inm_state = IGMP_REPORTING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_V1V2_MAX_RI * PR_FASTHZ); V_current_state_timers_running = 1; break; case IGMP_LEAVING_MEMBER: break; } } IF_ADDR_RUNLOCK(ifp); out_locked: IGMP_UNLOCK(); IN_MULTI_UNLOCK(); return (0); } /* * Process a received IGMPv2 general or group-specific query. */ static int igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, const struct igmp *igmp) { struct ifmultiaddr *ifma; struct igmp_ifinfo *igi; struct in_multi *inm; int is_general_query; uint16_t timer; is_general_query = 0; /* * Validate address fields upfront. * XXX SMPng: unlocked increments in igmpstat assumed atomic. */ if (in_nullhost(igmp->igmp_group)) { /* * IGMPv2 General Query. * If this was not sent to the all-hosts group, ignore it. */ if (!in_allhosts(ip->ip_dst)) return (0); IGMPSTAT_INC(igps_rcv_gen_queries); is_general_query = 1; } else { /* IGMPv2 Group-Specific Query. */ IGMPSTAT_INC(igps_rcv_group_queries); } IN_MULTI_LOCK(); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); if (igi->igi_flags & IGIF_LOOPBACK) { CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)", ifp, ifp->if_xname); goto out_locked; } /* * Ignore v2 query if in v1 Compatibility Mode. */ if (igi->igi_version == IGMP_VERSION_1) goto out_locked; igmp_set_version(igi, IGMP_VERSION_2); timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE; if (timer == 0) timer = 1; if (is_general_query) { /* * For each reporting group joined on this * interface, kick the report timer. */ CTR2(KTR_IGMPV3, "process v2 general query on ifp %p(%s)", ifp, ifp->if_xname); IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; inm = (struct in_multi *)ifma->ifma_protospec; igmp_v2_update_group(inm, timer); } IF_ADDR_RUNLOCK(ifp); } else { /* * Group-specific IGMPv2 query, we need only * look up the single group to process it. */ inm = inm_lookup(ifp, igmp->igmp_group); if (inm != NULL) { CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); igmp_v2_update_group(inm, timer); } } out_locked: IGMP_UNLOCK(); IN_MULTI_UNLOCK(); return (0); } /* * Update the report timer on a group in response to an IGMPv2 query. * * If we are becoming the reporting member for this group, start the timer. * If we already are the reporting member for this group, and timer is * below the threshold, reset it. * * We may be updating the group for the first time since we switched * to IGMPv3. If we are, then we must clear any recorded source lists, * and transition to REPORTING state; the group timer is overloaded * for group and group-source query responses. * * Unlike IGMPv3, the delay per group should be jittered * to avoid bursts of IGMPv2 reports. */ static void igmp_v2_update_group(struct in_multi *inm, const int timer) { CTR4(KTR_IGMPV3, "%s: %s/%s timer=%d", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, timer); IN_MULTI_LOCK_ASSERT(); switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_REPORTING_MEMBER: if (inm->inm_timer != 0 && inm->inm_timer <= timer) { CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, " "skipping.", __func__); break; } /* FALLTHROUGH */ case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__); inm->inm_state = IGMP_REPORTING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; break; case IGMP_SLEEPING_MEMBER: CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__); inm->inm_state = IGMP_AWAKENING_MEMBER; break; case IGMP_LEAVING_MEMBER: break; } } /* * Process a received IGMPv3 general, group-specific or * group-and-source-specific query. * Assumes m has already been pulled up to the full IGMP message length. * Return 0 if successful, otherwise an appropriate error code is returned. */ static int igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, /*const*/ struct igmpv3 *igmpv3) { struct igmp_ifinfo *igi; struct in_multi *inm; int is_general_query; uint32_t maxresp, nsrc, qqi; uint16_t timer; uint8_t qrv; is_general_query = 0; CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname); maxresp = igmpv3->igmp_code; /* in 1/10ths of a second */ if (maxresp >= 128) { maxresp = IGMP_MANT(igmpv3->igmp_code) << (IGMP_EXP(igmpv3->igmp_code) + 3); } /* * Robustness must never be less than 2 for on-wire IGMPv3. * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make * an exception for interfaces whose IGMPv3 state changes * are redirected to loopback (e.g. MANET). */ qrv = IGMP_QRV(igmpv3->igmp_misc); if (qrv < 2) { CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__, qrv, IGMP_RV_INIT); qrv = IGMP_RV_INIT; } qqi = igmpv3->igmp_qqi; if (qqi >= 128) { qqi = IGMP_MANT(igmpv3->igmp_qqi) << (IGMP_EXP(igmpv3->igmp_qqi) + 3); } timer = maxresp * PR_FASTHZ / IGMP_TIMER_SCALE; if (timer == 0) timer = 1; nsrc = ntohs(igmpv3->igmp_numsrc); /* * Validate address fields and versions upfront before * accepting v3 query. * XXX SMPng: Unlocked access to igmpstat counters here. */ if (in_nullhost(igmpv3->igmp_group)) { /* * IGMPv3 General Query. * * General Queries SHOULD be directed to 224.0.0.1. * A general query with a source list has undefined * behaviour; discard it. */ IGMPSTAT_INC(igps_rcv_gen_queries); if (!in_allhosts(ip->ip_dst) || nsrc > 0) { IGMPSTAT_INC(igps_rcv_badqueries); return (0); } is_general_query = 1; } else { /* Group or group-source specific query. */ if (nsrc == 0) IGMPSTAT_INC(igps_rcv_group_queries); else IGMPSTAT_INC(igps_rcv_gsr_queries); } IN_MULTI_LOCK(); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); if (igi->igi_flags & IGIF_LOOPBACK) { CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)", ifp, ifp->if_xname); goto out_locked; } /* * Discard the v3 query if we're in Compatibility Mode. * The RFC is not obviously worded that hosts need to stay in * compatibility mode until the Old Version Querier Present * timer expires. */ if (igi->igi_version != IGMP_VERSION_3) { CTR3(KTR_IGMPV3, "ignore v3 query in v%d mode on ifp %p(%s)", igi->igi_version, ifp, ifp->if_xname); goto out_locked; } igmp_set_version(igi, IGMP_VERSION_3); igi->igi_rv = qrv; igi->igi_qi = qqi; igi->igi_qri = maxresp; CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi, maxresp); if (is_general_query) { /* * Schedule a current-state report on this ifp for * all groups, possibly containing source lists. * If there is a pending General Query response * scheduled earlier than the selected delay, do * not schedule any other reports. * Otherwise, reset the interface timer. */ CTR2(KTR_IGMPV3, "process v3 general query on ifp %p(%s)", ifp, ifp->if_xname); if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) { igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer); V_interface_timers_running = 1; } } else { /* * Group-source-specific queries are throttled on * a per-group basis to defeat denial-of-service attempts. * Queries for groups we are not a member of on this * link are simply ignored. */ inm = inm_lookup(ifp, igmpv3->igmp_group); if (inm == NULL) goto out_locked; if (nsrc > 0) { if (!ratecheck(&inm->inm_lastgsrtv, &V_igmp_gsrdelay)) { CTR1(KTR_IGMPV3, "%s: GS query throttled.", __func__); IGMPSTAT_INC(igps_drop_gsr_queries); goto out_locked; } } CTR3(KTR_IGMPV3, "process v3 %s query on ifp %p(%s)", inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_xname); /* * If there is a pending General Query response * scheduled sooner than the selected delay, no * further report need be scheduled. * Otherwise, prepare to respond to the * group-specific or group-and-source query. */ if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) igmp_input_v3_group_query(inm, igi, timer, igmpv3); } out_locked: IGMP_UNLOCK(); IN_MULTI_UNLOCK(); return (0); } /* * Process a recieved IGMPv3 group-specific or group-and-source-specific * query. * Return <0 if any error occured. Currently this is ignored. */ static int igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifinfo *igi, int timer, /*const*/ struct igmpv3 *igmpv3) { int retval; uint16_t nsrc; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); retval = 0; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LEAVING_MEMBER: return (retval); break; case IGMP_REPORTING_MEMBER: case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: break; } nsrc = ntohs(igmpv3->igmp_numsrc); /* * Deal with group-specific queries upfront. * If any group query is already pending, purge any recorded * source-list state if it exists, and schedule a query response * for this group-specific query. */ if (nsrc == 0) { if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER || inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) { inm_clear_recorded(inm); timer = min(inm->inm_timer, timer); } inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; return (retval); } /* * Deal with the case where a group-and-source-specific query has * been received but a group-specific query is already pending. */ if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) { timer = min(inm->inm_timer, timer); inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; return (retval); } /* * Finally, deal with the case where a group-and-source-specific * query has been received, where a response to a previous g-s-r * query exists, or none exists. * In this case, we need to parse the source-list which the Querier * has provided us with and check if we have any source list filter * entries at T1 for these sources. If we do not, there is no need * schedule a report and the query may be dropped. * If we do, we must record them and schedule a current-state * report for those sources. * FIXME: Handling source lists larger than 1 mbuf requires that * we pass the mbuf chain pointer down to this function, and use * m_getptr() to walk the chain. */ if (inm->inm_nsrc > 0) { const struct in_addr *ap; int i, nrecorded; ap = (const struct in_addr *)(igmpv3 + 1); nrecorded = 0; for (i = 0; i < nsrc; i++, ap++) { retval = inm_record_source(inm, ap->s_addr); if (retval < 0) break; nrecorded += retval; } if (nrecorded > 0) { CTR1(KTR_IGMPV3, "%s: schedule response to SG query", __func__); inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); V_current_state_timers_running = 1; } } return (retval); } /* * Process a received IGMPv1 host membership report. * * NOTE: 0.0.0.0 workaround breaks const correctness. */ static int igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip, /*const*/ struct igmp *igmp) { struct in_ifaddr *ia; struct in_multi *inm; IGMPSTAT_INC(igps_rcv_reports); if (ifp->if_flags & IFF_LOOPBACK) return (0); if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || !in_hosteq(igmp->igmp_group, ip->ip_dst)) { IGMPSTAT_INC(igps_rcv_badreports); return (EINVAL); } /* * RFC 3376, Section 4.2.13, 9.2, 9.3: * Booting clients may use the source address 0.0.0.0. Some * IGMP daemons may not know how to use IP_RECVIF to determine * the interface upon which this message was received. * Replace 0.0.0.0 with the subnet address if told to do so. */ if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) { IFP_TO_IA(ifp, ia); if (ia != NULL) { ip->ip_src.s_addr = htonl(ia->ia_subnet); ifa_free(&ia->ia_ifa); } } CTR3(KTR_IGMPV3, "process v1 report %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); /* * IGMPv1 report suppression. * If we are a member of this group, and our membership should be * reported, stop our group timer and transition to the 'lazy' state. */ IN_MULTI_LOCK(); inm = inm_lookup(ifp, igmp->igmp_group); if (inm != NULL) { struct igmp_ifinfo *igi; igi = inm->inm_igi; if (igi == NULL) { KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp)); goto out_locked; } IGMPSTAT_INC(igps_rcv_ourreports); /* * If we are in IGMPv3 host mode, do not allow the * other host's IGMPv1 report to suppress our reports * unless explicitly configured to do so. */ if (igi->igi_version == IGMP_VERSION_3) { if (V_igmp_legacysupp) igmp_v3_suppress_group_record(inm); goto out_locked; } inm->inm_timer = 0; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: CTR3(KTR_IGMPV3, "report suppressed for %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); case IGMP_SLEEPING_MEMBER: inm->inm_state = IGMP_SLEEPING_MEMBER; break; case IGMP_REPORTING_MEMBER: CTR3(KTR_IGMPV3, "report suppressed for %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); if (igi->igi_version == IGMP_VERSION_1) inm->inm_state = IGMP_LAZY_MEMBER; else if (igi->igi_version == IGMP_VERSION_2) inm->inm_state = IGMP_SLEEPING_MEMBER; break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } out_locked: IN_MULTI_UNLOCK(); return (0); } /* * Process a received IGMPv2 host membership report. * * NOTE: 0.0.0.0 workaround breaks const correctness. */ static int igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip, /*const*/ struct igmp *igmp) { struct in_ifaddr *ia; struct in_multi *inm; /* * Make sure we don't hear our own membership report. Fast * leave requires knowing that we are the only member of a * group. */ IFP_TO_IA(ifp, ia); if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) { ifa_free(&ia->ia_ifa); return (0); } IGMPSTAT_INC(igps_rcv_reports); if (ifp->if_flags & IFF_LOOPBACK) { if (ia != NULL) ifa_free(&ia->ia_ifa); return (0); } if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || !in_hosteq(igmp->igmp_group, ip->ip_dst)) { if (ia != NULL) ifa_free(&ia->ia_ifa); IGMPSTAT_INC(igps_rcv_badreports); return (EINVAL); } /* * RFC 3376, Section 4.2.13, 9.2, 9.3: * Booting clients may use the source address 0.0.0.0. Some * IGMP daemons may not know how to use IP_RECVIF to determine * the interface upon which this message was received. * Replace 0.0.0.0 with the subnet address if told to do so. */ if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) { if (ia != NULL) ip->ip_src.s_addr = htonl(ia->ia_subnet); } if (ia != NULL) ifa_free(&ia->ia_ifa); CTR3(KTR_IGMPV3, "process v2 report %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); /* * IGMPv2 report suppression. * If we are a member of this group, and our membership should be * reported, and our group timer is pending or about to be reset, * stop our group timer by transitioning to the 'lazy' state. */ IN_MULTI_LOCK(); inm = inm_lookup(ifp, igmp->igmp_group); if (inm != NULL) { struct igmp_ifinfo *igi; igi = inm->inm_igi; KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp)); IGMPSTAT_INC(igps_rcv_ourreports); /* * If we are in IGMPv3 host mode, do not allow the * other host's IGMPv1 report to suppress our reports * unless explicitly configured to do so. */ if (igi->igi_version == IGMP_VERSION_3) { if (V_igmp_legacysupp) igmp_v3_suppress_group_record(inm); goto out_locked; } inm->inm_timer = 0; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_SLEEPING_MEMBER: break; case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_AWAKENING_MEMBER: CTR3(KTR_IGMPV3, "report suppressed for %s on ifp %p(%s)", inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); case IGMP_LAZY_MEMBER: inm->inm_state = IGMP_LAZY_MEMBER; break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } out_locked: IN_MULTI_UNLOCK(); return (0); } void igmp_input(struct mbuf *m, int off) { int iphlen; struct ifnet *ifp; struct igmp *igmp; struct ip *ip; int igmplen; int minlen; int queryver; CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, m, off); ifp = m->m_pkthdr.rcvif; IGMPSTAT_INC(igps_rcv_total); ip = mtod(m, struct ip *); iphlen = off; igmplen = ip->ip_len; /* * Validate lengths. */ if (igmplen < IGMP_MINLEN) { IGMPSTAT_INC(igps_rcv_tooshort); m_freem(m); return; } /* * Always pullup to the minimum size for v1/v2 or v3 * to amortize calls to m_pullup(). */ minlen = iphlen; if (igmplen >= IGMP_V3_QUERY_MINLEN) minlen += IGMP_V3_QUERY_MINLEN; else minlen += IGMP_MINLEN; if ((m->m_flags & M_EXT || m->m_len < minlen) && (m = m_pullup(m, minlen)) == 0) { IGMPSTAT_INC(igps_rcv_tooshort); return; } ip = mtod(m, struct ip *); /* * Validate checksum. */ m->m_data += iphlen; m->m_len -= iphlen; igmp = mtod(m, struct igmp *); if (in_cksum(m, igmplen)) { IGMPSTAT_INC(igps_rcv_badsum); m_freem(m); return; } m->m_data -= iphlen; m->m_len += iphlen; /* * IGMP control traffic is link-scope, and must have a TTL of 1. * DVMRP traffic (e.g. mrinfo, mtrace) is an exception; * probe packets may come from beyond the LAN. */ if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) { IGMPSTAT_INC(igps_rcv_badttl); m_freem(m); return; } switch (igmp->igmp_type) { case IGMP_HOST_MEMBERSHIP_QUERY: if (igmplen == IGMP_MINLEN) { if (igmp->igmp_code == 0) queryver = IGMP_VERSION_1; else queryver = IGMP_VERSION_2; } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { queryver = IGMP_VERSION_3; } else { IGMPSTAT_INC(igps_rcv_tooshort); m_freem(m); return; } switch (queryver) { case IGMP_VERSION_1: IGMPSTAT_INC(igps_rcv_v1v2_queries); if (!V_igmp_v1enable) break; if (igmp_input_v1_query(ifp, ip, igmp) != 0) { m_freem(m); return; } break; case IGMP_VERSION_2: IGMPSTAT_INC(igps_rcv_v1v2_queries); if (!V_igmp_v2enable) break; if (igmp_input_v2_query(ifp, ip, igmp) != 0) { m_freem(m); return; } break; case IGMP_VERSION_3: { struct igmpv3 *igmpv3; uint16_t igmpv3len; uint16_t nsrc; - int srclen; IGMPSTAT_INC(igps_rcv_v3_queries); igmpv3 = (struct igmpv3 *)igmp; /* * Validate length based on source count. */ nsrc = ntohs(igmpv3->igmp_numsrc); - srclen = sizeof(struct in_addr) * nsrc; - if (nsrc * sizeof(in_addr_t) > srclen) { + if (nsrc * sizeof(in_addr_t) > + UINT16_MAX - iphlen - IGMP_V3_QUERY_MINLEN) { IGMPSTAT_INC(igps_rcv_tooshort); return; } /* * m_pullup() may modify m, so pullup in * this scope. */ igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN + - srclen; + sizeof(struct in_addr) * nsrc; if ((m->m_flags & M_EXT || m->m_len < igmpv3len) && (m = m_pullup(m, igmpv3len)) == NULL) { IGMPSTAT_INC(igps_rcv_tooshort); return; } igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *) + iphlen); if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) { m_freem(m); return; } } break; } break; case IGMP_v1_HOST_MEMBERSHIP_REPORT: if (!V_igmp_v1enable) break; if (igmp_input_v1_report(ifp, ip, igmp) != 0) { m_freem(m); return; } break; case IGMP_v2_HOST_MEMBERSHIP_REPORT: if (!V_igmp_v2enable) break; if (!ip_checkrouteralert(m)) IGMPSTAT_INC(igps_rcv_nora); if (igmp_input_v2_report(ifp, ip, igmp) != 0) { m_freem(m); return; } break; case IGMP_v3_HOST_MEMBERSHIP_REPORT: /* * Hosts do not need to process IGMPv3 membership reports, * as report suppression is no longer required. */ if (!ip_checkrouteralert(m)) IGMPSTAT_INC(igps_rcv_nora); break; default: break; } /* * Pass all valid IGMP packets up to any process(es) listening on a * raw IGMP socket. */ rip_input(m, off); } /* * Fast timeout handler (global). * VIMAGE: Timeout handlers are expected to service all vimages. */ void igmp_fasttimo(void) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); igmp_fasttimo_vnet(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * Fast timeout handler (per-vnet). * Sends are shuffled off to a netisr to deal with Giant. * * VIMAGE: Assume caller has set up our curvnet. */ static void igmp_fasttimo_vnet(void) { struct ifqueue scq; /* State-change packets */ struct ifqueue qrq; /* Query response packets */ struct ifnet *ifp; struct igmp_ifinfo *igi; struct ifmultiaddr *ifma; struct in_multi *inm; int loop, uri_fasthz; loop = 0; uri_fasthz = 0; /* * Quick check to see if any work needs to be done, in order to * minimize the overhead of fasttimo processing. * SMPng: XXX Unlocked reads. */ if (!V_current_state_timers_running && !V_interface_timers_running && !V_state_change_timers_running) return; IN_MULTI_LOCK(); IGMP_LOCK(); /* * IGMPv3 General Query response timer processing. */ if (V_interface_timers_running) { CTR1(KTR_IGMPV3, "%s: interface timers running", __func__); V_interface_timers_running = 0; LIST_FOREACH(igi, &V_igi_head, igi_link) { if (igi->igi_v3_timer == 0) { /* Do nothing. */ } else if (--igi->igi_v3_timer == 0) { igmp_v3_dispatch_general_query(igi); } else { V_interface_timers_running = 1; } } } if (!V_current_state_timers_running && !V_state_change_timers_running) goto out_locked; V_current_state_timers_running = 0; V_state_change_timers_running = 0; CTR1(KTR_IGMPV3, "%s: state change timers running", __func__); /* * IGMPv1/v2/v3 host report and state-change timer processing. * Note: Processing a v3 group timer may remove a node. */ LIST_FOREACH(igi, &V_igi_head, igi_link) { ifp = igi->igi_ifp; if (igi->igi_version == IGMP_VERSION_3) { loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri * PR_FASTHZ); memset(&qrq, 0, sizeof(struct ifqueue)); IFQ_SET_MAXLEN(&qrq, IGMP_MAX_G_GS_PACKETS); memset(&scq, 0, sizeof(struct ifqueue)); IFQ_SET_MAXLEN(&scq, IGMP_MAX_STATE_CHANGE_PACKETS); } IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; inm = (struct in_multi *)ifma->ifma_protospec; switch (igi->igi_version) { case IGMP_VERSION_1: case IGMP_VERSION_2: igmp_v1v2_process_group_timer(inm, igi->igi_version); break; case IGMP_VERSION_3: igmp_v3_process_group_timers(igi, &qrq, &scq, inm, uri_fasthz); break; } } IF_ADDR_RUNLOCK(ifp); if (igi->igi_version == IGMP_VERSION_3) { struct in_multi *tinm; igmp_dispatch_queue(&qrq, 0, loop); igmp_dispatch_queue(&scq, 0, loop); /* * Free the in_multi reference(s) for this * IGMP lifecycle. */ SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, tinm) { SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele); inm_release_locked(inm); } } } out_locked: IGMP_UNLOCK(); IN_MULTI_UNLOCK(); } /* * Update host report group timer for IGMPv1/v2. * Will update the global pending timer flags. */ static void igmp_v1v2_process_group_timer(struct in_multi *inm, const int version) { int report_timer_expired; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); if (inm->inm_timer == 0) { report_timer_expired = 0; } else if (--inm->inm_timer == 0) { report_timer_expired = 1; } else { V_current_state_timers_running = 1; return; } switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: break; case IGMP_REPORTING_MEMBER: if (report_timer_expired) { inm->inm_state = IGMP_IDLE_MEMBER; (void)igmp_v1v2_queue_report(inm, (version == IGMP_VERSION_2) ? IGMP_v2_HOST_MEMBERSHIP_REPORT : IGMP_v1_HOST_MEMBERSHIP_REPORT); } break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } /* * Update a group's timers for IGMPv3. * Will update the global pending timer flags. * Note: Unlocked read from igi. */ static void igmp_v3_process_group_timers(struct igmp_ifinfo *igi, struct ifqueue *qrq, struct ifqueue *scq, struct in_multi *inm, const int uri_fasthz) { int query_response_timer_expired; int state_change_retransmit_timer_expired; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); query_response_timer_expired = 0; state_change_retransmit_timer_expired = 0; /* * During a transition from v1/v2 compatibility mode back to v3, * a group record in REPORTING state may still have its group * timer active. This is a no-op in this function; it is easier * to deal with it here than to complicate the slow-timeout path. */ if (inm->inm_timer == 0) { query_response_timer_expired = 0; } else if (--inm->inm_timer == 0) { query_response_timer_expired = 1; } else { V_current_state_timers_running = 1; } if (inm->inm_sctimer == 0) { state_change_retransmit_timer_expired = 0; } else if (--inm->inm_sctimer == 0) { state_change_retransmit_timer_expired = 1; } else { V_state_change_timers_running = 1; } /* We are in fasttimo, so be quick about it. */ if (!state_change_retransmit_timer_expired && !query_response_timer_expired) return; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: case IGMP_IDLE_MEMBER: break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: /* * Respond to a previously pending Group-Specific * or Group-and-Source-Specific query by enqueueing * the appropriate Current-State report for * immediate transmission. */ if (query_response_timer_expired) { int retval; retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1, (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); inm->inm_state = IGMP_REPORTING_MEMBER; /* XXX Clear recorded sources for next time. */ inm_clear_recorded(inm); } /* FALLTHROUGH */ case IGMP_REPORTING_MEMBER: case IGMP_LEAVING_MEMBER: if (state_change_retransmit_timer_expired) { /* * State-change retransmission timer fired. * If there are any further pending retransmissions, * set the global pending state-change flag, and * reset the timer. */ if (--inm->inm_scrv > 0) { inm->inm_sctimer = uri_fasthz; V_state_change_timers_running = 1; } /* * Retransmit the previously computed state-change * report. If there are no further pending * retransmissions, the mbuf queue will be consumed. * Update T0 state to T1 as we have now sent * a state-change. */ (void)igmp_v3_merge_state_changes(inm, scq); inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); /* * If we are leaving the group for good, make sure * we release IGMP's reference to it. * This release must be deferred using a SLIST, * as we are called from a loop which traverses * the in_ifmultiaddr TAILQ. */ if (inm->inm_state == IGMP_LEAVING_MEMBER && inm->inm_scrv == 0) { inm->inm_state = IGMP_NOT_MEMBER; SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele); } } break; } } /* * Suppress a group's pending response to a group or source/group query. * * Do NOT suppress state changes. This leads to IGMPv3 inconsistency. * Do NOT update ST1/ST0 as this operation merely suppresses * the currently pending group record. * Do NOT suppress the response to a general query. It is possible but * it would require adding another state or flag. */ static void igmp_v3_suppress_group_record(struct in_multi *inm) { IN_MULTI_LOCK_ASSERT(); KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3, ("%s: not IGMPv3 mode on link", __func__)); if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER || inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) return; if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) inm_clear_recorded(inm); inm->inm_timer = 0; inm->inm_state = IGMP_REPORTING_MEMBER; } /* * Switch to a different IGMP version on the given interface, * as per Section 7.2.1. */ static void igmp_set_version(struct igmp_ifinfo *igi, const int version) { int old_version_timer; IGMP_LOCK_ASSERT(); CTR4(KTR_IGMPV3, "%s: switching to v%d on ifp %p(%s)", __func__, version, igi->igi_ifp, igi->igi_ifp->if_xname); if (version == IGMP_VERSION_1 || version == IGMP_VERSION_2) { /* * Compute the "Older Version Querier Present" timer as per * Section 8.12. */ old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri; old_version_timer *= PR_SLOWHZ; if (version == IGMP_VERSION_1) { igi->igi_v1_timer = old_version_timer; igi->igi_v2_timer = 0; } else if (version == IGMP_VERSION_2) { igi->igi_v1_timer = 0; igi->igi_v2_timer = old_version_timer; } } if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) { if (igi->igi_version != IGMP_VERSION_2) { igi->igi_version = IGMP_VERSION_2; igmp_v3_cancel_link_timers(igi); } } else if (igi->igi_v1_timer > 0) { if (igi->igi_version != IGMP_VERSION_1) { igi->igi_version = IGMP_VERSION_1; igmp_v3_cancel_link_timers(igi); } } } /* * Cancel pending IGMPv3 timers for the given link and all groups * joined on it; state-change, general-query, and group-query timers. * * Only ever called on a transition from v3 to Compatibility mode. Kill * the timers stone dead (this may be expensive for large N groups), they * will be restarted if Compatibility Mode deems that they must be due to * query processing. */ static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi) { struct ifmultiaddr *ifma; struct ifnet *ifp; struct in_multi *inm, *tinm; CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__, igi->igi_ifp, igi->igi_ifp->if_xname); IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); /* * Stop the v3 General Query Response on this link stone dead. * If fasttimo is woken up due to V_interface_timers_running, * the flag will be cleared if there are no pending link timers. */ igi->igi_v3_timer = 0; /* * Now clear the current-state and state-change report timers * for all memberships scoped to this link. */ ifp = igi->igi_ifp; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; inm = (struct in_multi *)ifma->ifma_protospec; switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: /* * These states are either not relevant in v3 mode, * or are unreported. Do nothing. */ break; case IGMP_LEAVING_MEMBER: /* * If we are leaving the group and switching to * compatibility mode, we need to release the final * reference held for issuing the INCLUDE {}, and * transition to REPORTING to ensure the host leave * message is sent upstream to the old querier -- * transition to NOT would lose the leave and race. */ SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele); /* FALLTHROUGH */ case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: inm_clear_recorded(inm); /* FALLTHROUGH */ case IGMP_REPORTING_MEMBER: inm->inm_state = IGMP_REPORTING_MEMBER; break; } /* * Always clear state-change and group report timers. * Free any pending IGMPv3 state-change records. */ inm->inm_sctimer = 0; inm->inm_timer = 0; _IF_DRAIN(&inm->inm_scq); } IF_ADDR_RUNLOCK(ifp); SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, tinm) { SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele); inm_release_locked(inm); } } /* * Update the Older Version Querier Present timers for a link. * See Section 7.2.1 of RFC 3376. */ static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi) { IGMP_LOCK_ASSERT(); if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) { /* * IGMPv1 and IGMPv2 Querier Present timers expired. * * Revert to IGMPv3. */ if (igi->igi_version != IGMP_VERSION_3) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_3, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_version = IGMP_VERSION_3; } } else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) { /* * IGMPv1 Querier Present timer expired, * IGMPv2 Querier Present timer running. * If IGMPv2 was disabled since last timeout, * revert to IGMPv3. * If IGMPv2 is enabled, revert to IGMPv2. */ if (!V_igmp_v2enable) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_3, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_v2_timer = 0; igi->igi_version = IGMP_VERSION_3; } else { --igi->igi_v2_timer; if (igi->igi_version != IGMP_VERSION_2) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_2, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_version = IGMP_VERSION_2; } } } else if (igi->igi_v1_timer > 0) { /* * IGMPv1 Querier Present timer running. * Stop IGMPv2 timer if running. * * If IGMPv1 was disabled since last timeout, * revert to IGMPv3. * If IGMPv1 is enabled, reset IGMPv2 timer if running. */ if (!V_igmp_v1enable) { CTR5(KTR_IGMPV3, "%s: transition from v%d -> v%d on %p(%s)", __func__, igi->igi_version, IGMP_VERSION_3, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_v1_timer = 0; igi->igi_version = IGMP_VERSION_3; } else { --igi->igi_v1_timer; } if (igi->igi_v2_timer > 0) { CTR3(KTR_IGMPV3, "%s: cancel v2 timer on %p(%s)", __func__, igi->igi_ifp, igi->igi_ifp->if_xname); igi->igi_v2_timer = 0; } } } /* * Global slowtimo handler. * VIMAGE: Timeout handlers are expected to service all vimages. */ void igmp_slowtimo(void) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); igmp_slowtimo_vnet(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * Per-vnet slowtimo handler. */ static void igmp_slowtimo_vnet(void) { struct igmp_ifinfo *igi; IGMP_LOCK(); LIST_FOREACH(igi, &V_igi_head, igi_link) { igmp_v1v2_process_querier_timers(igi); } IGMP_UNLOCK(); } /* * Dispatch an IGMPv1/v2 host report or leave message. * These are always small enough to fit inside a single mbuf. */ static int igmp_v1v2_queue_report(struct in_multi *inm, const int type) { struct ifnet *ifp; struct igmp *igmp; struct ip *ip; struct mbuf *m; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); ifp = inm->inm_ifp; MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return (ENOMEM); MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp)); m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp); m->m_data += sizeof(struct ip); m->m_len = sizeof(struct igmp); igmp = mtod(m, struct igmp *); igmp->igmp_type = type; igmp->igmp_code = 0; igmp->igmp_group = inm->inm_addr; igmp->igmp_cksum = 0; igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp)); m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); ip = mtod(m, struct ip *); ip->ip_tos = 0; ip->ip_len = sizeof(struct ip) + sizeof(struct igmp); ip->ip_off = 0; ip->ip_p = IPPROTO_IGMP; ip->ip_src.s_addr = INADDR_ANY; if (type == IGMP_HOST_LEAVE_MESSAGE) ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP); else ip->ip_dst = inm->inm_addr; igmp_save_context(m, ifp); m->m_flags |= M_IGMPV2; if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) m->m_flags |= M_IGMP_LOOP; CTR2(KTR_IGMPV3, "%s: netisr_dispatch(NETISR_IGMP, %p)", __func__, m); netisr_dispatch(NETISR_IGMP, m); return (0); } /* * Process a state change from the upper layer for the given IPv4 group. * * Each socket holds a reference on the in_multi in its own ip_moptions. * The socket layer will have made the necessary updates to.the group * state, it is now up to IGMP to issue a state change report if there * has been any change between T0 (when the last state-change was issued) * and T1 (now). * * We use the IGMPv3 state machine at group level. The IGMP module * however makes the decision as to which IGMP protocol version to speak. * A state change *from* INCLUDE {} always means an initial join. * A state change *to* INCLUDE {} always means a final leave. * * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can * save ourselves a bunch of work; any exclusive mode groups need not * compute source filter lists. * * VIMAGE: curvnet should have been set by caller, as this routine * is called from the socket option handlers. */ int igmp_change_state(struct in_multi *inm) { struct igmp_ifinfo *igi; struct ifnet *ifp; int error; IN_MULTI_LOCK_ASSERT(); error = 0; /* * Try to detect if the upper layer just asked us to change state * for an interface which has now gone away. */ KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); ifp = inm->inm_ifma->ifma_ifp; /* * Sanity check that netinet's notion of ifp is the * same as net's. */ KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); IGMP_LOCK(); igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); /* * If we detect a state transition to or from MCAST_UNDEFINED * for this group, then we are starting or finishing an IGMP * life cycle for this group. */ if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) { CTR3(KTR_IGMPV3, "%s: inm transition %d -> %d", __func__, inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode); if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) { CTR1(KTR_IGMPV3, "%s: initial join", __func__); error = igmp_initial_join(inm, igi); goto out_locked; } else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) { CTR1(KTR_IGMPV3, "%s: final leave", __func__); igmp_final_leave(inm, igi); goto out_locked; } } else { CTR1(KTR_IGMPV3, "%s: filter set change", __func__); } error = igmp_handle_state_change(inm, igi); out_locked: IGMP_UNLOCK(); return (error); } /* * Perform the initial join for an IGMP group. * * When joining a group: * If the group should have its IGMP traffic suppressed, do nothing. * IGMPv1 starts sending IGMPv1 host membership reports. * IGMPv2 starts sending IGMPv2 host membership reports. * IGMPv3 will schedule an IGMPv3 state-change report containing the * initial state of the membership. */ static int igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi) { struct ifnet *ifp; struct ifqueue *ifq; int error, retval, syncstates; CTR4(KTR_IGMPV3, "%s: initial join %s on ifp %p(%s)", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, inm->inm_ifp->if_xname); error = 0; syncstates = 1; ifp = inm->inm_ifp; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__)); /* * Groups joined on loopback or marked as 'not reported', * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and * are never reported in any IGMP protocol exchanges. * All other groups enter the appropriate IGMP state machine * for the version in use on this link. * A link marked as IGIF_SILENT causes IGMP to be completely * disabled for the link. */ if ((ifp->if_flags & IFF_LOOPBACK) || (igi->igi_flags & IGIF_SILENT) || !igmp_isgroupreported(inm->inm_addr)) { CTR1(KTR_IGMPV3, "%s: not kicking state machine for silent group", __func__); inm->inm_state = IGMP_SILENT_MEMBER; inm->inm_timer = 0; } else { /* * Deal with overlapping in_multi lifecycle. * If this group was LEAVING, then make sure * we drop the reference we picked up to keep the * group around for the final INCLUDE {} enqueue. */ if (igi->igi_version == IGMP_VERSION_3 && inm->inm_state == IGMP_LEAVING_MEMBER) inm_release_locked(inm); inm->inm_state = IGMP_REPORTING_MEMBER; switch (igi->igi_version) { case IGMP_VERSION_1: case IGMP_VERSION_2: inm->inm_state = IGMP_IDLE_MEMBER; error = igmp_v1v2_queue_report(inm, (igi->igi_version == IGMP_VERSION_2) ? IGMP_v2_HOST_MEMBERSHIP_REPORT : IGMP_v1_HOST_MEMBERSHIP_REPORT); if (error == 0) { inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_V1V2_MAX_RI * PR_FASTHZ); V_current_state_timers_running = 1; } break; case IGMP_VERSION_3: /* * Defer update of T0 to T1, until the first copy * of the state change has been transmitted. */ syncstates = 0; /* * Immediately enqueue a State-Change Report for * this interface, freeing any previous reports. * Don't kick the timers if there is nothing to do, * or if an error occurred. */ ifq = &inm->inm_scq; _IF_DRAIN(ifq); retval = igmp_v3_enqueue_group_record(ifq, inm, 1, 0, 0); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); if (retval <= 0) { error = retval * -1; break; } /* * Schedule transmission of pending state-change * report up to RV times for this link. The timer * will fire at the next igmp_fasttimo (~200ms), * giving us an opportunity to merge the reports. */ if (igi->igi_flags & IGIF_LOOPBACK) { inm->inm_scrv = 1; } else { KASSERT(igi->igi_rv > 1, ("%s: invalid robustness %d", __func__, igi->igi_rv)); inm->inm_scrv = igi->igi_rv; } inm->inm_sctimer = 1; V_state_change_timers_running = 1; error = 0; break; } } /* * Only update the T0 state if state change is atomic, * i.e. we don't need to wait for a timer to fire before we * can consider the state change to have been communicated. */ if (syncstates) { inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); } return (error); } /* * Issue an intermediate state change during the IGMP life-cycle. */ static int igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi) { struct ifnet *ifp; int retval; CTR4(KTR_IGMPV3, "%s: state change for %s on ifp %p(%s)", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, inm->inm_ifp->if_xname); ifp = inm->inm_ifp; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__)); if ((ifp->if_flags & IFF_LOOPBACK) || (igi->igi_flags & IGIF_SILENT) || !igmp_isgroupreported(inm->inm_addr) || (igi->igi_version != IGMP_VERSION_3)) { if (!igmp_isgroupreported(inm->inm_addr)) { CTR1(KTR_IGMPV3, "%s: not kicking state machine for silent group", __func__); } CTR1(KTR_IGMPV3, "%s: nothing to do", __func__); inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); return (0); } _IF_DRAIN(&inm->inm_scq); retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); if (retval <= 0) return (-retval); /* * If record(s) were enqueued, start the state-change * report timer for this group. */ inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv); inm->inm_sctimer = 1; V_state_change_timers_running = 1; return (0); } /* * Perform the final leave for an IGMP group. * * When leaving a group: * IGMPv1 does nothing. * IGMPv2 sends a host leave message, if and only if we are the reporter. * IGMPv3 enqueues a state-change report containing a transition * to INCLUDE {} for immediate transmission. */ static void igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi) { int syncstates; syncstates = 1; CTR4(KTR_IGMPV3, "%s: final leave %s on ifp %p(%s)", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, inm->inm_ifp->if_xname); IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: case IGMP_LEAVING_MEMBER: /* Already leaving or left; do nothing. */ CTR1(KTR_IGMPV3, "%s: not kicking state machine for silent group", __func__); break; case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: if (igi->igi_version == IGMP_VERSION_2) { #ifdef INVARIANTS if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER || inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) panic("%s: IGMPv3 state reached, not IGMPv3 mode", __func__); #endif igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE); inm->inm_state = IGMP_NOT_MEMBER; } else if (igi->igi_version == IGMP_VERSION_3) { /* * Stop group timer and all pending reports. * Immediately enqueue a state-change report * TO_IN {} to be sent on the next fast timeout, * giving us an opportunity to merge reports. */ _IF_DRAIN(&inm->inm_scq); inm->inm_timer = 0; if (igi->igi_flags & IGIF_LOOPBACK) { inm->inm_scrv = 1; } else { inm->inm_scrv = igi->igi_rv; } CTR4(KTR_IGMPV3, "%s: Leaving %s/%s with %d " "pending retransmissions.", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, inm->inm_scrv); if (inm->inm_scrv == 0) { inm->inm_state = IGMP_NOT_MEMBER; inm->inm_sctimer = 0; } else { int retval; inm_acquire_locked(inm); retval = igmp_v3_enqueue_group_record( &inm->inm_scq, inm, 1, 0, 0); KASSERT(retval != 0, ("%s: enqueue record = %d", __func__, retval)); inm->inm_state = IGMP_LEAVING_MEMBER; inm->inm_sctimer = 1; V_state_change_timers_running = 1; syncstates = 0; } break; } break; case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: /* Our reports are suppressed; do nothing. */ break; } if (syncstates) { inm_commit(inm); CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); } } /* * Enqueue an IGMPv3 group record to the given output queue. * * XXX This function could do with having the allocation code * split out, and the multiple-tree-walks coalesced into a single * routine as has been done in igmp_v3_enqueue_filter_change(). * * If is_state_change is zero, a current-state record is appended. * If is_state_change is non-zero, a state-change report is appended. * * If is_group_query is non-zero, an mbuf packet chain is allocated. * If is_group_query is zero, and if there is a packet with free space * at the tail of the queue, it will be appended to providing there * is enough free space. * Otherwise a new mbuf packet chain is allocated. * * If is_source_query is non-zero, each source is checked to see if * it was recorded for a Group-Source query, and will be omitted if * it is not both in-mode and recorded. * * The function will attempt to allocate leading space in the packet * for the IP/IGMP header to be prepended without fragmenting the chain. * * If successful the size of all data appended to the queue is returned, * otherwise an error code less than zero is returned, or zero if * no record(s) were appended. */ static int igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, const int is_state_change, const int is_group_query, const int is_source_query) { struct igmp_grouprec ig; struct igmp_grouprec *pig; struct ifnet *ifp; struct ip_msource *ims, *nims; struct mbuf *m0, *m, *md; int error, is_filter_list_change; int minrec0len, m0srcs, msrcs, nbytes, off; int record_has_sources; int now; int type; in_addr_t naddr; uint8_t mode; IN_MULTI_LOCK_ASSERT(); error = 0; ifp = inm->inm_ifp; is_filter_list_change = 0; m = NULL; m0 = NULL; m0srcs = 0; msrcs = 0; nbytes = 0; nims = NULL; record_has_sources = 1; pig = NULL; type = IGMP_DO_NOTHING; mode = inm->inm_st[1].iss_fmode; /* * If we did not transition out of ASM mode during t0->t1, * and there are no source nodes to process, we can skip * the generation of source records. */ if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 && inm->inm_nsrc == 0) record_has_sources = 0; if (is_state_change) { /* * Queue a state change record. * If the mode did not change, and there are non-ASM * listeners or source filters present, * we potentially need to issue two records for the group. * If we are transitioning to MCAST_UNDEFINED, we need * not send any sources. * If there are ASM listeners, and there was no filter * mode transition of any kind, do nothing. */ if (mode != inm->inm_st[0].iss_fmode) { if (mode == MCAST_EXCLUDE) { CTR1(KTR_IGMPV3, "%s: change to EXCLUDE", __func__); type = IGMP_CHANGE_TO_EXCLUDE_MODE; } else { CTR1(KTR_IGMPV3, "%s: change to INCLUDE", __func__); type = IGMP_CHANGE_TO_INCLUDE_MODE; if (mode == MCAST_UNDEFINED) record_has_sources = 0; } } else { if (record_has_sources) { is_filter_list_change = 1; } else { type = IGMP_DO_NOTHING; } } } else { /* * Queue a current state record. */ if (mode == MCAST_EXCLUDE) { type = IGMP_MODE_IS_EXCLUDE; } else if (mode == MCAST_INCLUDE) { type = IGMP_MODE_IS_INCLUDE; KASSERT(inm->inm_st[1].iss_asm == 0, ("%s: inm %p is INCLUDE but ASM count is %d", __func__, inm, inm->inm_st[1].iss_asm)); } } /* * Generate the filter list changes using a separate function. */ if (is_filter_list_change) return (igmp_v3_enqueue_filter_change(ifq, inm)); if (type == IGMP_DO_NOTHING) { CTR3(KTR_IGMPV3, "%s: nothing to do for %s/%s", __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); return (0); } /* * If any sources are present, we must be able to fit at least * one in the trailing space of the tail packet's mbuf, * ideally more. */ minrec0len = sizeof(struct igmp_grouprec); if (record_has_sources) minrec0len += sizeof(in_addr_t); CTR4(KTR_IGMPV3, "%s: queueing %s for %s/%s", __func__, igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); /* * Check if we have a packet in the tail of the queue for this * group into which the first group record for this group will fit. * Otherwise allocate a new packet. * Always allocate leading space for IP+RA_OPT+IGMP+REPORT. * Note: Group records for G/GSR query responses MUST be sent * in their own packet. */ m0 = ifq->ifq_tail; if (!is_group_query && m0 != NULL && (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) && (m0->m_pkthdr.len + minrec0len) < (ifp->if_mtu - IGMP_LEADINGSPACE)) { m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); m = m0; CTR1(KTR_IGMPV3, "%s: use existing packet", __func__); } else { if (_IF_QFULL(ifq)) { CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__); return (-ENOMEM); } m = NULL; m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); if (!is_state_change && !is_group_query) { m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); if (m) m->m_data += IGMP_LEADINGSPACE; } if (m == NULL) { m = m_gethdr(M_DONTWAIT, MT_DATA); if (m) MH_ALIGN(m, IGMP_LEADINGSPACE); } if (m == NULL) return (-ENOMEM); igmp_save_context(m, ifp); CTR1(KTR_IGMPV3, "%s: allocated first packet", __func__); } /* * Append group record. * If we have sources, we don't know how many yet. */ ig.ig_type = type; ig.ig_datalen = 0; ig.ig_numsrc = 0; ig.ig_group = inm->inm_addr; if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } nbytes += sizeof(struct igmp_grouprec); /* * Append as many sources as will fit in the first packet. * If we are appending to a new packet, the chain allocation * may potentially use clusters; use m_getptr() in this case. * If we are appending to an existing packet, we need to obtain * a pointer to the group record after m_append(), in case a new * mbuf was allocated. * Only append sources which are in-mode at t1. If we are * transitioning to MCAST_UNDEFINED state on the group, do not * include source entries. * Only report recorded sources in our filter set when responding * to a group-source query. */ if (record_has_sources) { if (m == m0) { md = m_last(m); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + md->m_len - nbytes); } else { md = m_getptr(m, 0, &off); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off); } msrcs = 0; RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) { CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, inet_ntoa_haddr(ims->ims_haddr)); now = ims_get_mode(inm, ims, 1); CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now); if ((now != mode) || (now == mode && mode == MCAST_UNDEFINED)) { CTR1(KTR_IGMPV3, "%s: skip node", __func__); continue; } if (is_source_query && ims->ims_stp == 0) { CTR1(KTR_IGMPV3, "%s: skip unrecorded node", __func__); continue; } CTR1(KTR_IGMPV3, "%s: append node", __func__); naddr = htonl(ims->ims_haddr); if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } nbytes += sizeof(in_addr_t); ++msrcs; if (msrcs == m0srcs) break; } CTR2(KTR_IGMPV3, "%s: msrcs is %d this packet", __func__, msrcs); pig->ig_numsrc = htons(msrcs); nbytes += (msrcs * sizeof(in_addr_t)); } if (is_source_query && msrcs == 0) { CTR1(KTR_IGMPV3, "%s: no recorded sources to report", __func__); if (m != m0) m_freem(m); return (0); } /* * We are good to go with first packet. */ if (m != m0) { CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__); m->m_pkthdr.PH_vt.vt_nrecs = 1; _IF_ENQUEUE(ifq, m); } else m->m_pkthdr.PH_vt.vt_nrecs++; /* * No further work needed if no source list in packet(s). */ if (!record_has_sources) return (nbytes); /* * Whilst sources remain to be announced, we need to allocate * a new packet and fill out as many sources as will fit. * Always try for a cluster first. */ while (nims != NULL) { if (_IF_QFULL(ifq)) { CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__); return (-ENOMEM); } m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); if (m) m->m_data += IGMP_LEADINGSPACE; if (m == NULL) { m = m_gethdr(M_DONTWAIT, MT_DATA); if (m) MH_ALIGN(m, IGMP_LEADINGSPACE); } if (m == NULL) return (-ENOMEM); igmp_save_context(m, ifp); md = m_getptr(m, 0, &off); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off); CTR1(KTR_IGMPV3, "%s: allocated next packet", __func__); if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } m->m_pkthdr.PH_vt.vt_nrecs = 1; nbytes += sizeof(struct igmp_grouprec); m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); msrcs = 0; RB_FOREACH_FROM(ims, ip_msource_tree, nims) { CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, inet_ntoa_haddr(ims->ims_haddr)); now = ims_get_mode(inm, ims, 1); if ((now != mode) || (now == mode && mode == MCAST_UNDEFINED)) { CTR1(KTR_IGMPV3, "%s: skip node", __func__); continue; } if (is_source_query && ims->ims_stp == 0) { CTR1(KTR_IGMPV3, "%s: skip unrecorded node", __func__); continue; } CTR1(KTR_IGMPV3, "%s: append node", __func__); naddr = htonl(ims->ims_haddr); if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); return (-ENOMEM); } ++msrcs; if (msrcs == m0srcs) break; } pig->ig_numsrc = htons(msrcs); nbytes += (msrcs * sizeof(in_addr_t)); CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__); _IF_ENQUEUE(ifq, m); } return (nbytes); } /* * Type used to mark record pass completion. * We exploit the fact we can cast to this easily from the * current filter modes on each ip_msource node. */ typedef enum { REC_NONE = 0x00, /* MCAST_UNDEFINED */ REC_ALLOW = 0x01, /* MCAST_INCLUDE */ REC_BLOCK = 0x02, /* MCAST_EXCLUDE */ REC_FULL = REC_ALLOW | REC_BLOCK } rectype_t; /* * Enqueue an IGMPv3 filter list change to the given output queue. * * Source list filter state is held in an RB-tree. When the filter list * for a group is changed without changing its mode, we need to compute * the deltas between T0 and T1 for each source in the filter set, * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records. * * As we may potentially queue two record types, and the entire R-B tree * needs to be walked at once, we break this out into its own function * so we can generate a tightly packed queue of packets. * * XXX This could be written to only use one tree walk, although that makes * serializing into the mbuf chains a bit harder. For now we do two walks * which makes things easier on us, and it may or may not be harder on * the L2 cache. * * If successful the size of all data appended to the queue is returned, * otherwise an error code less than zero is returned, or zero if * no record(s) were appended. */ static int igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm) { static const int MINRECLEN = sizeof(struct igmp_grouprec) + sizeof(in_addr_t); struct ifnet *ifp; struct igmp_grouprec ig; struct igmp_grouprec *pig; struct ip_msource *ims, *nims; struct mbuf *m, *m0, *md; in_addr_t naddr; int m0srcs, nbytes, npbytes, off, rsrcs, schanged; int nallow, nblock; uint8_t mode, now, then; rectype_t crt, drt, nrt; IN_MULTI_LOCK_ASSERT(); if (inm->inm_nsrc == 0 || (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) return (0); ifp = inm->inm_ifp; /* interface */ mode = inm->inm_st[1].iss_fmode; /* filter mode at t1 */ crt = REC_NONE; /* current group record type */ drt = REC_NONE; /* mask of completed group record types */ nrt = REC_NONE; /* record type for current node */ m0srcs = 0; /* # source which will fit in current mbuf chain */ nbytes = 0; /* # of bytes appended to group's state-change queue */ npbytes = 0; /* # of bytes appended this packet */ rsrcs = 0; /* # sources encoded in current record */ schanged = 0; /* # nodes encoded in overall filter change */ nallow = 0; /* # of source entries in ALLOW_NEW */ nblock = 0; /* # of source entries in BLOCK_OLD */ nims = NULL; /* next tree node pointer */ /* * For each possible filter record mode. * The first kind of source we encounter tells us which * is the first kind of record we start appending. * If a node transitioned to UNDEFINED at t1, its mode is treated * as the inverse of the group's filter mode. */ while (drt != REC_FULL) { do { m0 = ifq->ifq_tail; if (m0 != NULL && (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) && (m0->m_pkthdr.len + MINRECLEN) < (ifp->if_mtu - IGMP_LEADINGSPACE)) { m = m0; m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); CTR1(KTR_IGMPV3, "%s: use previous packet", __func__); } else { m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); if (m) m->m_data += IGMP_LEADINGSPACE; if (m == NULL) { m = m_gethdr(M_DONTWAIT, MT_DATA); if (m) MH_ALIGN(m, IGMP_LEADINGSPACE); } if (m == NULL) { CTR1(KTR_IGMPV3, "%s: m_get*() failed", __func__); return (-ENOMEM); } m->m_pkthdr.PH_vt.vt_nrecs = 0; igmp_save_context(m, ifp); m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); npbytes = 0; CTR1(KTR_IGMPV3, "%s: allocated new packet", __func__); } /* * Append the IGMP group record header to the * current packet's data area. * Recalculate pointer to free space for next * group record, in case m_append() allocated * a new mbuf or cluster. */ memset(&ig, 0, sizeof(ig)); ig.ig_group = inm->inm_addr; if (!m_append(m, sizeof(ig), (void *)&ig)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed", __func__); return (-ENOMEM); } npbytes += sizeof(struct igmp_grouprec); if (m != m0) { /* new packet; offset in c hain */ md = m_getptr(m, npbytes - sizeof(struct igmp_grouprec), &off); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off); } else { /* current packet; offset from last append */ md = m_last(m); pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + md->m_len - sizeof(struct igmp_grouprec)); } /* * Begin walking the tree for this record type * pass, or continue from where we left off * previously if we had to allocate a new packet. * Only report deltas in-mode at t1. * We need not report included sources as allowed * if we are in inclusive mode on the group, * however the converse is not true. */ rsrcs = 0; if (nims == NULL) nims = RB_MIN(ip_msource_tree, &inm->inm_srcs); RB_FOREACH_FROM(ims, ip_msource_tree, nims) { CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, inet_ntoa_haddr(ims->ims_haddr)); now = ims_get_mode(inm, ims, 1); then = ims_get_mode(inm, ims, 0); CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d", __func__, then, now); if (now == then) { CTR1(KTR_IGMPV3, "%s: skip unchanged", __func__); continue; } if (mode == MCAST_EXCLUDE && now == MCAST_INCLUDE) { CTR1(KTR_IGMPV3, "%s: skip IN src on EX group", __func__); continue; } nrt = (rectype_t)now; if (nrt == REC_NONE) nrt = (rectype_t)(~mode & REC_FULL); if (schanged++ == 0) { crt = nrt; } else if (crt != nrt) continue; naddr = htonl(ims->ims_haddr); if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { if (m != m0) m_freem(m); CTR1(KTR_IGMPV3, "%s: m_append() failed", __func__); return (-ENOMEM); } nallow += !!(crt == REC_ALLOW); nblock += !!(crt == REC_BLOCK); if (++rsrcs == m0srcs) break; } /* * If we did not append any tree nodes on this * pass, back out of allocations. */ if (rsrcs == 0) { npbytes -= sizeof(struct igmp_grouprec); if (m != m0) { CTR1(KTR_IGMPV3, "%s: m_free(m)", __func__); m_freem(m); } else { CTR1(KTR_IGMPV3, "%s: m_adj(m, -ig)", __func__); m_adj(m, -((int)sizeof( struct igmp_grouprec))); } continue; } npbytes += (rsrcs * sizeof(in_addr_t)); if (crt == REC_ALLOW) pig->ig_type = IGMP_ALLOW_NEW_SOURCES; else if (crt == REC_BLOCK) pig->ig_type = IGMP_BLOCK_OLD_SOURCES; pig->ig_numsrc = htons(rsrcs); /* * Count the new group record, and enqueue this * packet if it wasn't already queued. */ m->m_pkthdr.PH_vt.vt_nrecs++; if (m != m0) _IF_ENQUEUE(ifq, m); nbytes += npbytes; } while (nims != NULL); drt |= crt; crt = (~crt & REC_FULL); } CTR3(KTR_IGMPV3, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__, nallow, nblock); return (nbytes); } static int igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq) { struct ifqueue *gq; struct mbuf *m; /* pending state-change */ struct mbuf *m0; /* copy of pending state-change */ struct mbuf *mt; /* last state-change in packet */ int docopy, domerge; u_int recslen; docopy = 0; domerge = 0; recslen = 0; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); /* * If there are further pending retransmissions, make a writable * copy of each queued state-change message before merging. */ if (inm->inm_scrv > 0) docopy = 1; gq = &inm->inm_scq; #ifdef KTR if (gq->ifq_head == NULL) { CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty", __func__, inm); } #endif m = gq->ifq_head; while (m != NULL) { /* * Only merge the report into the current packet if * there is sufficient space to do so; an IGMPv3 report * packet may only contain 65,535 group records. * Always use a simple mbuf chain concatentation to do this, * as large state changes for single groups may have * allocated clusters. */ domerge = 0; mt = ifscq->ifq_tail; if (mt != NULL) { recslen = m_length(m, NULL); if ((mt->m_pkthdr.PH_vt.vt_nrecs + m->m_pkthdr.PH_vt.vt_nrecs <= IGMP_V3_REPORT_MAXRECS) && (mt->m_pkthdr.len + recslen <= (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) domerge = 1; } if (!domerge && _IF_QFULL(gq)) { CTR2(KTR_IGMPV3, "%s: outbound queue full, skipping whole packet %p", __func__, m); mt = m->m_nextpkt; if (!docopy) m_freem(m); m = mt; continue; } if (!docopy) { CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m); _IF_DEQUEUE(gq, m0); m = m0->m_nextpkt; } else { CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m); m0 = m_dup(m, M_NOWAIT); if (m0 == NULL) return (ENOMEM); m0->m_nextpkt = NULL; m = m->m_nextpkt; } if (!domerge) { CTR3(KTR_IGMPV3, "%s: queueing %p to ifscq %p)", __func__, m0, ifscq); _IF_ENQUEUE(ifscq, m0); } else { struct mbuf *mtl; /* last mbuf of packet mt */ CTR3(KTR_IGMPV3, "%s: merging %p with ifscq tail %p)", __func__, m0, mt); mtl = m_last(mt); m0->m_flags &= ~M_PKTHDR; mt->m_pkthdr.len += recslen; mt->m_pkthdr.PH_vt.vt_nrecs += m0->m_pkthdr.PH_vt.vt_nrecs; mtl->m_next = m0; } } return (0); } /* * Respond to a pending IGMPv3 General Query. */ static void igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi) { struct ifmultiaddr *ifma; struct ifnet *ifp; struct in_multi *inm; int retval, loop; IN_MULTI_LOCK_ASSERT(); IGMP_LOCK_ASSERT(); KASSERT(igi->igi_version == IGMP_VERSION_3, ("%s: called when version %d", __func__, igi->igi_version)); ifp = igi->igi_ifp; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; inm = (struct in_multi *)ifma->ifma_protospec; KASSERT(ifp == inm->inm_ifp, ("%s: inconsistent ifp", __func__)); switch (inm->inm_state) { case IGMP_NOT_MEMBER: case IGMP_SILENT_MEMBER: break; case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: inm->inm_state = IGMP_REPORTING_MEMBER; retval = igmp_v3_enqueue_group_record(&igi->igi_gq, inm, 0, 0, 0); CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); break; case IGMP_G_QUERY_PENDING_MEMBER: case IGMP_SG_QUERY_PENDING_MEMBER: case IGMP_LEAVING_MEMBER: break; } } IF_ADDR_RUNLOCK(ifp); loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop); /* * Slew transmission of bursts over 500ms intervals. */ if (igi->igi_gq.ifq_head != NULL) { igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY( IGMP_RESPONSE_BURST_INTERVAL); V_interface_timers_running = 1; } } /* * Transmit the next pending IGMP message in the output queue. * * We get called from netisr_processqueue(). A mutex private to igmpoq * will be acquired and released around this routine. * * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis. * MRT: Nothing needs to be done, as IGMP traffic is always local to * a link and uses a link-scope multicast address. */ static void igmp_intr(struct mbuf *m) { struct ip_moptions imo; struct ifnet *ifp; struct mbuf *ipopts, *m0; int error; uint32_t ifindex; CTR2(KTR_IGMPV3, "%s: transmit %p", __func__, m); /* * Set VNET image pointer from enqueued mbuf chain * before doing anything else. Whilst we use interface * indexes to guard against interface detach, they are * unique to each VIMAGE and must be retrieved. */ CURVNET_SET((struct vnet *)(m->m_pkthdr.header)); ifindex = igmp_restore_context(m); /* * Check if the ifnet still exists. This limits the scope of * any race in the absence of a global ifp lock for low cost * (an array lookup). */ ifp = ifnet_byindex(ifindex); if (ifp == NULL) { CTR3(KTR_IGMPV3, "%s: dropped %p as ifindex %u went away.", __func__, m, ifindex); m_freem(m); IPSTAT_INC(ips_noroute); goto out; } ipopts = V_igmp_sendra ? m_raopt : NULL; imo.imo_multicast_ttl = 1; imo.imo_multicast_vif = -1; imo.imo_multicast_loop = (V_ip_mrouter != NULL); /* * If the user requested that IGMP traffic be explicitly * redirected to the loopback interface (e.g. they are running a * MANET interface and the routing protocol needs to see the * updates), handle this now. */ if (m->m_flags & M_IGMP_LOOP) imo.imo_multicast_ifp = V_loif; else imo.imo_multicast_ifp = ifp; if (m->m_flags & M_IGMPV2) { m0 = m; } else { m0 = igmp_v3_encap_report(ifp, m); if (m0 == NULL) { CTR2(KTR_IGMPV3, "%s: dropped %p", __func__, m); m_freem(m); IPSTAT_INC(ips_odropped); goto out; } } igmp_scrub_context(m0); m->m_flags &= ~(M_PROTOFLAGS); m0->m_pkthdr.rcvif = V_loif; #ifdef MAC mac_netinet_igmp_send(ifp, m0); #endif error = ip_output(m0, ipopts, NULL, 0, &imo, NULL); if (error) { CTR3(KTR_IGMPV3, "%s: ip_output(%p) = %d", __func__, m0, error); goto out; } IGMPSTAT_INC(igps_snd_reports); out: /* * We must restore the existing vnet pointer before * continuing as we are run from netisr context. */ CURVNET_RESTORE(); } /* * Encapsulate an IGMPv3 report. * * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf * chain has already had its IP/IGMPv3 header prepended. In this case * the function will not attempt to prepend; the lengths and checksums * will however be re-computed. * * Returns a pointer to the new mbuf chain head, or NULL if the * allocation failed. */ static struct mbuf * igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m) { struct igmp_report *igmp; struct ip *ip; int hdrlen, igmpreclen; KASSERT((m->m_flags & M_PKTHDR), ("%s: mbuf chain %p is !M_PKTHDR", __func__, m)); igmpreclen = m_length(m, NULL); hdrlen = sizeof(struct ip) + sizeof(struct igmp_report); if (m->m_flags & M_IGMPV3_HDR) { igmpreclen -= hdrlen; } else { M_PREPEND(m, hdrlen, M_DONTWAIT); if (m == NULL) return (NULL); m->m_flags |= M_IGMPV3_HDR; } CTR2(KTR_IGMPV3, "%s: igmpreclen is %d", __func__, igmpreclen); m->m_data += sizeof(struct ip); m->m_len -= sizeof(struct ip); igmp = mtod(m, struct igmp_report *); igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT; igmp->ir_rsv1 = 0; igmp->ir_rsv2 = 0; igmp->ir_numgrps = htons(m->m_pkthdr.PH_vt.vt_nrecs); igmp->ir_cksum = 0; igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen); m->m_pkthdr.PH_vt.vt_nrecs = 0; m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); ip = mtod(m, struct ip *); ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; ip->ip_len = hdrlen + igmpreclen; ip->ip_off = IP_DF; ip->ip_p = IPPROTO_IGMP; ip->ip_sum = 0; ip->ip_src.s_addr = INADDR_ANY; if (m->m_flags & M_IGMP_LOOP) { struct in_ifaddr *ia; IFP_TO_IA(ifp, ia); if (ia != NULL) { ip->ip_src = ia->ia_addr.sin_addr; ifa_free(&ia->ia_ifa); } } ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP); return (m); } #ifdef KTR static char * igmp_rec_type_to_str(const int type) { switch (type) { case IGMP_CHANGE_TO_EXCLUDE_MODE: return "TO_EX"; break; case IGMP_CHANGE_TO_INCLUDE_MODE: return "TO_IN"; break; case IGMP_MODE_IS_EXCLUDE: return "MODE_EX"; break; case IGMP_MODE_IS_INCLUDE: return "MODE_IN"; break; case IGMP_ALLOW_NEW_SOURCES: return "ALLOW_NEW"; break; case IGMP_BLOCK_OLD_SOURCES: return "BLOCK_OLD"; break; default: break; } return "unknown"; } #endif static void igmp_init(void *unused __unused) { CTR1(KTR_IGMPV3, "%s: initializing", __func__); IGMP_LOCK_INIT(); m_raopt = igmp_ra_alloc(); netisr_register(&igmp_nh); } SYSINIT(igmp_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_init, NULL); static void igmp_uninit(void *unused __unused) { CTR1(KTR_IGMPV3, "%s: tearing down", __func__); netisr_unregister(&igmp_nh); m_free(m_raopt); m_raopt = NULL; IGMP_LOCK_DESTROY(); } SYSUNINIT(igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_uninit, NULL); static void vnet_igmp_init(const void *unused __unused) { CTR1(KTR_IGMPV3, "%s: initializing", __func__); LIST_INIT(&V_igi_head); } VNET_SYSINIT(vnet_igmp_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_igmp_init, NULL); static void vnet_igmp_uninit(const void *unused __unused) { CTR1(KTR_IGMPV3, "%s: tearing down", __func__); KASSERT(LIST_EMPTY(&V_igi_head), ("%s: igi list not empty; ifnets not detached?", __func__)); } VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_igmp_uninit, NULL); static int igmp_modevent(module_t mod, int type, void *unused __unused) { switch (type) { case MOD_LOAD: case MOD_UNLOAD: break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t igmp_mod = { "igmp", igmp_modevent, 0 }; DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); Index: stable/9/sys/netinet6/nd6_rtr.c =================================================================== --- stable/9/sys/netinet6/nd6_rtr.c (revision 281230) +++ stable/9/sys/netinet6/nd6_rtr.c (revision 281231) @@ -1,2196 +1,2204 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int rtpref(struct nd_defrouter *); static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *); static int prelist_update(struct nd_prefixctl *, struct nd_defrouter *, struct mbuf *, int); static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int); static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *, struct nd_defrouter *); static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *); static void pfxrtr_del(struct nd_pfxrouter *); static struct nd_pfxrouter *find_pfxlist_reachable_router (struct nd_prefix *); static void defrouter_delreq(struct nd_defrouter *); static void nd6_rtmsg(int, struct rtentry *); static int in6_init_prefix_ltimes(struct nd_prefix *); static void in6_init_address_ltimes(struct nd_prefix *, struct in6_addrlifetime *); static int nd6_prefix_onlink(struct nd_prefix *); static int nd6_prefix_offlink(struct nd_prefix *); static int rt6_deleteroute(struct radix_node *, void *); VNET_DECLARE(int, nd6_recalc_reachtm_interval); #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) static VNET_DEFINE(struct ifnet *, nd6_defifp); VNET_DEFINE(int, nd6_defifindex); #define V_nd6_defifp VNET(nd6_defifp) VNET_DEFINE(int, ip6_use_tempaddr) = 0; VNET_DEFINE(int, ip6_desync_factor); VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME; VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME; VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE; /* RTPREF_MEDIUM has to be 0! */ #define RTPREF_HIGH 1 #define RTPREF_MEDIUM 0 #define RTPREF_LOW (-1) #define RTPREF_RESERVED (-2) #define RTPREF_INVALID (-3) /* internal */ /* * Receive Router Solicitation Message - just for routers. * Router solicitation/advertisement is mostly managed by userland program * (rtadvd) so here we have no function like nd6_ra_output(). * * Based on RFC 2461 */ void nd6_rs_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_router_solicit *nd_rs; struct in6_addr saddr6 = ip6->ip6_src; char *lladdr = NULL; int lladdrlen = 0; union nd_opts ndopts; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; /* * Accept RS only when V_ip6_forwarding=1 and the interface has * no ND6_IFF_ACCEPT_RTADV. */ if (!V_ip6_forwarding || ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) goto freeit; /* Sanity checks */ if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } /* * Don't update the neighbor cache, if src = ::. * This indicates that the src has no IP address assigned yet. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) goto freeit; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len); if (nd_rs == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif icmp6len -= sizeof(*nd_rs); nd6_option_init(nd_rs + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_rs_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_rs_input: lladdrlen mismatch for %s " "(if %d, RS packet %d)\n", ip6_sprintf(ip6bufs, &saddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0); freeit: m_freem(m); return; bad: ICMP6STAT_INC(icp6s_badrs); m_freem(m); } /* * Receive Router Advertisement Message. * * Based on RFC 2461 * TODO: on-link bit on prefix information * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing */ void nd6_ra_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct nd_ifinfo *ndi = ND_IFINFO(ifp); struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_router_advert *nd_ra; struct in6_addr saddr6 = ip6->ip6_src; int mcast = 0; union nd_opts ndopts; struct nd_defrouter *dr; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; /* * We only accept RAs only when the per-interface flag * ND6_IFF_ACCEPT_RTADV is on the receiving interface. */ if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV)) goto freeit; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) { nd6log((LOG_ERR, "nd6_ra_input: src %s is not link-local\n", ip6_sprintf(ip6bufs, &saddr6))); goto bad; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len); if (nd_ra == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif icmp6len -= sizeof(*nd_ra); nd6_option_init(nd_ra + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_ra_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } { struct nd_defrouter dr0; u_int32_t advreachable = nd_ra->nd_ra_reachable; /* remember if this is a multicasted advertisement */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) mcast = 1; bzero(&dr0, sizeof(dr0)); dr0.rtaddr = saddr6; dr0.flags = nd_ra->nd_ra_flags_reserved; /* * Effectively-disable routes from RA messages when * ND6_IFF_NO_RADR enabled on the receiving interface or * (ip6.forwarding == 1 && ip6.rfc6204w3 != 1). */ if (ndi->flags & ND6_IFF_NO_RADR) dr0.rtlifetime = 0; else if (V_ip6_forwarding && !V_ip6_rfc6204w3) dr0.rtlifetime = 0; else dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime); dr0.expire = time_second + dr0.rtlifetime; dr0.ifp = ifp; /* unspecified or not? (RFC 2461 6.3.4) */ if (advreachable) { advreachable = ntohl(advreachable); if (advreachable <= MAX_REACHABLE_TIME && ndi->basereachable != advreachable) { ndi->basereachable = advreachable; ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable); ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */ } } if (nd_ra->nd_ra_retransmit) ndi->retrans = ntohl(nd_ra->nd_ra_retransmit); - if (nd_ra->nd_ra_curhoplimit) - ndi->chlim = nd_ra->nd_ra_curhoplimit; + if (nd_ra->nd_ra_curhoplimit) { + if (ndi->chlim < nd_ra->nd_ra_curhoplimit) + ndi->chlim = nd_ra->nd_ra_curhoplimit; + else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) { + log(LOG_ERR, "RA with a lower CurHopLimit sent from " + "%s on %s (current = %d, received = %d). " + "Ignored.\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), + if_name(ifp), ndi->chlim, nd_ra->nd_ra_curhoplimit); + } + } dr = defrtrlist_update(&dr0); } /* * prefix */ if (ndopts.nd_opts_pi) { struct nd_opt_hdr *pt; struct nd_opt_prefix_info *pi = NULL; struct nd_prefixctl pr; for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi; pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end; pt = (struct nd_opt_hdr *)((caddr_t)pt + (pt->nd_opt_len << 3))) { if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION) continue; pi = (struct nd_opt_prefix_info *)pt; if (pi->nd_opt_pi_len != 4) { nd6log((LOG_INFO, "nd6_ra_input: invalid option " "len %d for prefix information option, " "ignored\n", pi->nd_opt_pi_len)); continue; } if (128 < pi->nd_opt_pi_prefix_len) { nd6log((LOG_INFO, "nd6_ra_input: invalid prefix " "len %d for prefix information option, " "ignored\n", pi->nd_opt_pi_prefix_len)); continue; } if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix) || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) { nd6log((LOG_INFO, "nd6_ra_input: invalid prefix " "%s, ignored\n", ip6_sprintf(ip6bufs, &pi->nd_opt_pi_prefix))); continue; } bzero(&pr, sizeof(pr)); pr.ndpr_prefix.sin6_family = AF_INET6; pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix); pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix; pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif; pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved & ND_OPT_PI_FLAG_ONLINK) ? 1 : 0; pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved & ND_OPT_PI_FLAG_AUTO) ? 1 : 0; pr.ndpr_plen = pi->nd_opt_pi_prefix_len; pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time); pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time); (void)prelist_update(&pr, dr, m, mcast); } } /* * MTU */ if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) { u_long mtu; u_long maxmtu; mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu); /* lower bound */ if (mtu < IPV6_MMTU) { nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option " "mtu=%lu sent from %s, ignoring\n", mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src))); goto skip; } /* upper bound */ maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu) ? ndi->maxmtu : ifp->if_mtu; if (mtu <= maxmtu) { int change = (ndi->linkmtu != mtu); ndi->linkmtu = mtu; if (change) /* in6_maxmtu may change */ in6_setmaxmtu(); } else { nd6log((LOG_INFO, "nd6_ra_input: bogus mtu " "mtu=%lu sent from %s; " "exceeds maxmtu %lu, ignoring\n", mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu)); } } skip: /* * Source link layer address */ { char *lladdr = NULL; int lladdrlen = 0; if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_ra_input: lladdrlen mismatch for %s " "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_ADVERT, 0); /* * Installing a link-layer address might change the state of the * router's neighbor cache, which might also affect our on-link * detection of adveritsed prefixes. */ pfxlist_onlink_check(); } freeit: m_freem(m); return; bad: ICMP6STAT_INC(icp6s_badra); m_freem(m); } /* * default router list proccessing sub routines */ /* tell the change to user processes watching the routing socket. */ static void nd6_rtmsg(int cmd, struct rtentry *rt) { struct rt_addrinfo info; struct ifnet *ifp; struct ifaddr *ifa; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); ifp = rt->rt_ifp; if (ifp != NULL) { IF_ADDR_RLOCK(ifp); ifa = TAILQ_FIRST(&ifp->if_addrhead); info.rti_info[RTAX_IFP] = ifa->ifa_addr; ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; } else ifa = NULL; rt_missmsg_fib(cmd, &info, rt->rt_flags, 0, rt->rt_fibnum); if (ifa != NULL) ifa_free(ifa); } static void defrouter_addreq(struct nd_defrouter *new) { struct sockaddr_in6 def, mask, gate; struct rtentry *newrt = NULL; int s; int error; bzero(&def, sizeof(def)); bzero(&mask, sizeof(mask)); bzero(&gate, sizeof(gate)); def.sin6_len = mask.sin6_len = gate.sin6_len = sizeof(struct sockaddr_in6); def.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = new->rtaddr; s = splnet(); error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &newrt, RT_DEFAULT_FIB); if (newrt) { nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ RTFREE(newrt); } if (error == 0) new->installed = 1; splx(s); return; } struct nd_defrouter * defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp) { struct nd_defrouter *dr; TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) return (dr); } return (NULL); /* search failed */ } /* * Remove the default route for a given router. * This is just a subroutine function for defrouter_select(), and should * not be called from anywhere else. */ static void defrouter_delreq(struct nd_defrouter *dr) { struct sockaddr_in6 def, mask, gate; struct rtentry *oldrt = NULL; bzero(&def, sizeof(def)); bzero(&mask, sizeof(mask)); bzero(&gate, sizeof(gate)); def.sin6_len = mask.sin6_len = gate.sin6_len = sizeof(struct sockaddr_in6); def.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = dr->rtaddr; in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, RT_DEFAULT_FIB); if (oldrt) { nd6_rtmsg(RTM_DELETE, oldrt); RTFREE(oldrt); } dr->installed = 0; } /* * remove all default routes from default router list */ void defrouter_reset(void) { struct nd_defrouter *dr; TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) defrouter_delreq(dr); /* * XXX should we also nuke any default routers in the kernel, by * going through them by rtalloc1()? */ } void defrtrlist_del(struct nd_defrouter *dr) { struct nd_defrouter *deldr = NULL; struct nd_prefix *pr; /* * Flush all the routing table entries that use the router * as a next hop. */ if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV) rt6_flush(&dr->rtaddr, dr->ifp); if (dr->installed) { deldr = dr; defrouter_delreq(dr); } TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); /* * Also delete all the pointers to the router in each prefix lists. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { struct nd_pfxrouter *pfxrtr; if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL) pfxrtr_del(pfxrtr); } pfxlist_onlink_check(); /* * If the router is the primary one, choose a new one. * Note that defrouter_select() will remove the current gateway * from the routing table. */ if (deldr) defrouter_select(); free(dr, M_IP6NDP); } /* * Default Router Selection according to Section 6.3.6 of RFC 2461 and * draft-ietf-ipngwg-router-selection: * 1) Routers that are reachable or probably reachable should be preferred. * If we have more than one (probably) reachable router, prefer ones * with the highest router preference. * 2) When no routers on the list are known to be reachable or * probably reachable, routers SHOULD be selected in a round-robin * fashion, regardless of router preference values. * 3) If the Default Router List is empty, assume that all * destinations are on-link. * * We assume nd_defrouter is sorted by router preference value. * Since the code below covers both with and without router preference cases, * we do not need to classify the cases by ifdef. * * At this moment, we do not try to install more than one default router, * even when the multipath routing is available, because we're not sure about * the benefits for stub hosts comparing to the risk of making the code * complicated and the possibility of introducing bugs. */ void defrouter_select(void) { int s = splnet(); struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL; struct llentry *ln = NULL; /* * Let's handle easy case (3) first: * If default router list is empty, there's nothing to be done. */ if (TAILQ_EMPTY(&V_nd_defrouter)) { splx(s); return; } /* * Search for a (probably) reachable router from the list. * We just pick up the first reachable one (if any), assuming that * the ordering rule of the list described in defrtrlist_update(). */ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { IF_AFDATA_RLOCK(dr->ifp); if (selected_dr == NULL && (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln)) { selected_dr = dr; } IF_AFDATA_RUNLOCK(dr->ifp); if (ln != NULL) { LLE_RUNLOCK(ln); ln = NULL; } if (dr->installed && installed_dr == NULL) installed_dr = dr; else if (dr->installed && installed_dr) { /* this should not happen. warn for diagnosis. */ log(LOG_ERR, "defrouter_select: more than one router" " is installed\n"); } } /* * If none of the default routers was found to be reachable, * round-robin the list regardless of preference. * Otherwise, if we have an installed router, check if the selected * (reachable) router should really be preferred to the installed one. * We only prefer the new router when the old one is not reachable * or when the new one has a really higher preference value. */ if (selected_dr == NULL) { if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry)) selected_dr = TAILQ_FIRST(&V_nd_defrouter); else selected_dr = TAILQ_NEXT(installed_dr, dr_entry); } else if (installed_dr) { IF_AFDATA_RLOCK(installed_dr->ifp); if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln) && rtpref(selected_dr) <= rtpref(installed_dr)) { selected_dr = installed_dr; } IF_AFDATA_RUNLOCK(installed_dr->ifp); if (ln != NULL) LLE_RUNLOCK(ln); } /* * If the selected router is different than the installed one, * remove the installed router and install the selected one. * Note that the selected router is never NULL here. */ if (installed_dr != selected_dr) { if (installed_dr) defrouter_delreq(installed_dr); defrouter_addreq(selected_dr); } splx(s); return; } /* * for default router selection * regards router-preference field as a 2-bit signed integer */ static int rtpref(struct nd_defrouter *dr) { switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) { case ND_RA_FLAG_RTPREF_HIGH: return (RTPREF_HIGH); case ND_RA_FLAG_RTPREF_MEDIUM: case ND_RA_FLAG_RTPREF_RSV: return (RTPREF_MEDIUM); case ND_RA_FLAG_RTPREF_LOW: return (RTPREF_LOW); default: /* * This case should never happen. If it did, it would mean a * serious bug of kernel internal. We thus always bark here. * Or, can we even panic? */ log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags); return (RTPREF_INVALID); } /* NOTREACHED */ } static struct nd_defrouter * defrtrlist_update(struct nd_defrouter *new) { struct nd_defrouter *dr, *n; int s = splnet(); if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) { /* entry exists */ if (new->rtlifetime == 0) { defrtrlist_del(dr); dr = NULL; } else { int oldpref = rtpref(dr); /* override */ dr->flags = new->flags; /* xxx flag check */ dr->rtlifetime = new->rtlifetime; dr->expire = new->expire; /* * If the preference does not change, there's no need * to sort the entries. Also make sure the selected * router is still installed in the kernel. */ if (dr->installed && rtpref(new) == oldpref) { splx(s); return (dr); } /* * preferred router may be changed, so relocate * this router. * XXX: calling TAILQ_REMOVE directly is a bad manner. * However, since defrtrlist_del() has many side * effects, we intentionally do so here. * defrouter_select() below will handle routing * changes later. */ TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); n = dr; goto insert; } splx(s); return (dr); } /* entry does not exist */ if (new->rtlifetime == 0) { splx(s); return (NULL); } n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT); if (n == NULL) { splx(s); return (NULL); } bzero(n, sizeof(*n)); *n = *new; insert: /* * Insert the new router in the Default Router List; * The Default Router List should be in the descending order * of router-preferece. Routers with the same preference are * sorted in the arriving time order. */ /* insert at the end of the group */ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { if (rtpref(n) > rtpref(dr)) break; } if (dr) TAILQ_INSERT_BEFORE(dr, n, dr_entry); else TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry); defrouter_select(); splx(s); return (n); } static struct nd_pfxrouter * pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr) { struct nd_pfxrouter *search; LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) { if (search->router == dr) break; } return (search); } static void pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr) { struct nd_pfxrouter *new; new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT); if (new == NULL) return; bzero(new, sizeof(*new)); new->router = dr; LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry); pfxlist_onlink_check(); } static void pfxrtr_del(struct nd_pfxrouter *pfr) { LIST_REMOVE(pfr, pfr_entry); free(pfr, M_IP6NDP); } struct nd_prefix * nd6_prefix_lookup(struct nd_prefixctl *key) { struct nd_prefix *search; LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) { if (key->ndpr_ifp == search->ndpr_ifp && key->ndpr_plen == search->ndpr_plen && in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr, &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) { break; } } return (search); } int nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr, struct nd_prefix **newp) { struct nd_prefix *new = NULL; int error = 0; int i, s; char ip6buf[INET6_ADDRSTRLEN]; new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT); if (new == NULL) return(ENOMEM); bzero(new, sizeof(*new)); new->ndpr_ifp = pr->ndpr_ifp; new->ndpr_prefix = pr->ndpr_prefix; new->ndpr_plen = pr->ndpr_plen; new->ndpr_vltime = pr->ndpr_vltime; new->ndpr_pltime = pr->ndpr_pltime; new->ndpr_flags = pr->ndpr_flags; if ((error = in6_init_prefix_ltimes(new)) != 0) { free(new, M_IP6NDP); return(error); } new->ndpr_lastupdate = time_second; if (newp != NULL) *newp = new; /* initialization */ LIST_INIT(&new->ndpr_advrtrs); in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen); /* make prefix in the canonical form */ for (i = 0; i < 4; i++) new->ndpr_prefix.sin6_addr.s6_addr32[i] &= new->ndpr_mask.s6_addr32[i]; s = splnet(); /* link ndpr_entry to nd_prefix list */ LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry); splx(s); /* ND_OPT_PI_FLAG_ONLINK processing */ if (new->ndpr_raf_onlink) { int e; if ((e = nd6_prefix_onlink(new)) != 0) { nd6log((LOG_ERR, "nd6_prelist_add: failed to make " "the prefix %s/%d on-link on %s (errno=%d)\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* proceed anyway. XXX: is it correct? */ } } if (dr) pfxrtr_add(new, dr); return 0; } void prelist_remove(struct nd_prefix *pr) { struct nd_pfxrouter *pfr, *next; int e, s; char ip6buf[INET6_ADDRSTRLEN]; /* make sure to invalidate the prefix until it is really freed. */ pr->ndpr_vltime = 0; pr->ndpr_pltime = 0; /* * Though these flags are now meaningless, we'd rather keep the value * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users * when executing "ndp -p". */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 && (e = nd6_prefix_offlink(pr)) != 0) { nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink " "on %s, errno=%d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* what should we do? */ } if (pr->ndpr_refcnt > 0) return; /* notice here? */ s = splnet(); /* unlink ndpr_entry from nd_prefix list */ LIST_REMOVE(pr, ndpr_entry); /* free list of routers that adversed the prefix */ LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) { free(pfr, M_IP6NDP); } splx(s); free(pr, M_IP6NDP); pfxlist_onlink_check(); } /* * dr - may be NULL */ static int prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, struct mbuf *m, int mcast) { struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL; struct ifaddr *ifa; struct ifnet *ifp = new->ndpr_ifp; struct nd_prefix *pr; int s = splnet(); int error = 0; int newprefix = 0; int auth; struct in6_addrlifetime lt6_tmp; char ip6buf[INET6_ADDRSTRLEN]; auth = 0; if (m) { /* * Authenticity for NA consists authentication for * both IP header and IP datagrams, doesn't it ? */ #if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM) auth = ((m->m_flags & M_AUTHIPHDR) && (m->m_flags & M_AUTHIPDGM)); #endif } if ((pr = nd6_prefix_lookup(new)) != NULL) { /* * nd6_prefix_lookup() ensures that pr and new have the same * prefix on a same interface. */ /* * Update prefix information. Note that the on-link (L) bit * and the autonomous (A) bit should NOT be changed from 1 * to 0. */ if (new->ndpr_raf_onlink == 1) pr->ndpr_raf_onlink = 1; if (new->ndpr_raf_auto == 1) pr->ndpr_raf_auto = 1; if (new->ndpr_raf_onlink) { pr->ndpr_vltime = new->ndpr_vltime; pr->ndpr_pltime = new->ndpr_pltime; (void)in6_init_prefix_ltimes(pr); /* XXX error case? */ pr->ndpr_lastupdate = time_second; } if (new->ndpr_raf_onlink && (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { int e; if ((e = nd6_prefix_onlink(pr)) != 0) { nd6log((LOG_ERR, "prelist_update: failed to make " "the prefix %s/%d on-link on %s " "(errno=%d)\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* proceed anyway. XXX: is it correct? */ } } if (dr && pfxrtr_lookup(pr, dr) == NULL) pfxrtr_add(pr, dr); } else { struct nd_prefix *newpr = NULL; newprefix = 1; if (new->ndpr_vltime == 0) goto end; if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0) goto end; error = nd6_prelist_add(new, dr, &newpr); if (error != 0 || newpr == NULL) { nd6log((LOG_NOTICE, "prelist_update: " "nd6_prelist_add failed for %s/%d on %s " "errno=%d, returnpr=%p\n", ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr), new->ndpr_plen, if_name(new->ndpr_ifp), error, newpr)); goto end; /* we should just give up in this case. */ } /* * XXX: from the ND point of view, we can ignore a prefix * with the on-link bit being zero. However, we need a * prefix structure for references from autoconfigured * addresses. Thus, we explicitly make sure that the prefix * itself expires now. */ if (newpr->ndpr_raf_onlink == 0) { newpr->ndpr_vltime = 0; newpr->ndpr_pltime = 0; in6_init_prefix_ltimes(newpr); } pr = newpr; } /* * Address autoconfiguration based on Section 5.5.3 of RFC 2462. * Note that pr must be non NULL at this point. */ /* 5.5.3 (a). Ignore the prefix without the A bit set. */ if (!new->ndpr_raf_auto) goto end; /* * 5.5.3 (b). the link-local prefix should have been ignored in * nd6_ra_input. */ /* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */ if (new->ndpr_pltime > new->ndpr_vltime) { error = EINVAL; /* XXX: won't be used */ goto end; } /* * 5.5.3 (d). If the prefix advertised is not equal to the prefix of * an address configured by stateless autoconfiguration already in the * list of addresses associated with the interface, and the Valid * Lifetime is not 0, form an address. We first check if we have * a matching prefix. * Note: we apply a clarification in rfc2462bis-02 here. We only * consider autoconfigured addresses while RFC2462 simply said * "address". */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in6_ifaddr *ifa6; u_int32_t remaininglifetime; if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; /* * We only consider autoconfigured addresses as per rfc2462bis. */ if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF)) continue; /* * Spec is not clear here, but I believe we should concentrate * on unicast (i.e. not anycast) addresses. * XXX: other ia6_flags? detached or duplicated? */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0) continue; /* * Ignore the address if it is not associated with a prefix * or is associated with a prefix that is different from this * one. (pr is never NULL here) */ if (ifa6->ia6_ndpr != pr) continue; if (ia6_match == NULL) /* remember the first one */ ia6_match = ifa6; /* * An already autoconfigured address matched. Now that we * are sure there is at least one matched address, we can * proceed to 5.5.3. (e): update the lifetimes according to the * "two hours" rule and the privacy extension. * We apply some clarifications in rfc2462bis: * - use remaininglifetime instead of storedlifetime as a * variable name * - remove the dead code in the "two-hour" rule */ #define TWOHOUR (120*60) lt6_tmp = ifa6->ia6_lifetime; if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME) remaininglifetime = ND6_INFINITE_LIFETIME; else if (time_second - ifa6->ia6_updatetime > lt6_tmp.ia6t_vltime) { /* * The case of "invalid" address. We should usually * not see this case. */ remaininglifetime = 0; } else remaininglifetime = lt6_tmp.ia6t_vltime - (time_second - ifa6->ia6_updatetime); /* when not updating, keep the current stored lifetime. */ lt6_tmp.ia6t_vltime = remaininglifetime; if (TWOHOUR < new->ndpr_vltime || remaininglifetime < new->ndpr_vltime) { lt6_tmp.ia6t_vltime = new->ndpr_vltime; } else if (remaininglifetime <= TWOHOUR) { if (auth) { lt6_tmp.ia6t_vltime = new->ndpr_vltime; } } else { /* * new->ndpr_vltime <= TWOHOUR && * TWOHOUR < remaininglifetime */ lt6_tmp.ia6t_vltime = TWOHOUR; } /* The 2 hour rule is not imposed for preferred lifetime. */ lt6_tmp.ia6t_pltime = new->ndpr_pltime; in6_init_address_ltimes(pr, <6_tmp); /* * We need to treat lifetimes for temporary addresses * differently, according to * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1); * we only update the lifetimes when they are in the maximum * intervals. */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { u_int32_t maxvltime, maxpltime; if (V_ip6_temp_valid_lifetime > (u_int32_t)((time_second - ifa6->ia6_createtime) + V_ip6_desync_factor)) { maxvltime = V_ip6_temp_valid_lifetime - (time_second - ifa6->ia6_createtime) - V_ip6_desync_factor; } else maxvltime = 0; if (V_ip6_temp_preferred_lifetime > (u_int32_t)((time_second - ifa6->ia6_createtime) + V_ip6_desync_factor)) { maxpltime = V_ip6_temp_preferred_lifetime - (time_second - ifa6->ia6_createtime) - V_ip6_desync_factor; } else maxpltime = 0; if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME || lt6_tmp.ia6t_vltime > maxvltime) { lt6_tmp.ia6t_vltime = maxvltime; } if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME || lt6_tmp.ia6t_pltime > maxpltime) { lt6_tmp.ia6t_pltime = maxpltime; } } ifa6->ia6_lifetime = lt6_tmp; ifa6->ia6_updatetime = time_second; } IF_ADDR_RUNLOCK(ifp); if (ia6_match == NULL && new->ndpr_vltime) { int ifidlen; /* * 5.5.3 (d) (continued) * No address matched and the valid lifetime is non-zero. * Create a new address. */ /* * Prefix Length check: * If the sum of the prefix length and interface identifier * length does not equal 128 bits, the Prefix Information * option MUST be ignored. The length of the interface * identifier is defined in a separate link-type specific * document. */ ifidlen = in6_if2idlen(ifp); if (ifidlen < 0) { /* this should not happen, so we always log it. */ log(LOG_ERR, "prelist_update: IFID undefined (%s)\n", if_name(ifp)); goto end; } if (ifidlen + pr->ndpr_plen != 128) { nd6log((LOG_INFO, "prelist_update: invalid prefixlen " "%d for %s, ignored\n", pr->ndpr_plen, if_name(ifp))); goto end; } if ((ia6 = in6_ifadd(new, mcast)) != NULL) { /* * note that we should use pr (not new) for reference. */ pr->ndpr_refcnt++; ia6->ia6_ndpr = pr; /* * RFC 3041 3.3 (2). * When a new public address is created as described * in RFC2462, also create a new temporary address. * * RFC 3041 3.5. * When an interface connects to a new link, a new * randomized interface identifier should be generated * immediately together with a new set of temporary * addresses. Thus, we specifiy 1 as the 2nd arg of * in6_tmpifadd(). */ if (V_ip6_use_tempaddr) { int e; if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) { nd6log((LOG_NOTICE, "prelist_update: " "failed to create a temporary " "address, errno=%d\n", e)); } } ifa_free(&ia6->ia_ifa); /* * A newly added address might affect the status * of other addresses, so we check and update it. * XXX: what if address duplication happens? */ pfxlist_onlink_check(); } else { /* just set an error. do not bark here. */ error = EADDRNOTAVAIL; /* XXX: might be unused. */ } } end: splx(s); return error; } /* * A supplement function used in the on-link detection below; * detect if a given prefix has a (probably) reachable advertising router. * XXX: lengthy function name... */ static struct nd_pfxrouter * find_pfxlist_reachable_router(struct nd_prefix *pr) { struct nd_pfxrouter *pfxrtr; struct llentry *ln; int canreach; LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) { IF_AFDATA_RLOCK(pfxrtr->router->ifp); ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp); IF_AFDATA_RUNLOCK(pfxrtr->router->ifp); if (ln == NULL) continue; canreach = ND6_IS_LLINFO_PROBREACH(ln); LLE_RUNLOCK(ln); if (canreach) break; } return (pfxrtr); } /* * Check if each prefix in the prefix list has at least one available router * that advertised the prefix (a router is "available" if its neighbor cache * entry is reachable or probably reachable). * If the check fails, the prefix may be off-link, because, for example, * we have moved from the network but the lifetime of the prefix has not * expired yet. So we should not use the prefix if there is another prefix * that has an available router. * But, if there is no prefix that has an available router, we still regards * all the prefixes as on-link. This is because we can't tell if all the * routers are simply dead or if we really moved from the network and there * is no router around us. */ void pfxlist_onlink_check() { struct nd_prefix *pr; struct in6_ifaddr *ifa; struct nd_defrouter *dr; struct nd_pfxrouter *pfxrtr = NULL; /* * Check if there is a prefix that has a reachable advertising * router. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr)) break; } /* * If we have no such prefix, check whether we still have a router * that does not advertise any prefixes. */ if (pr == NULL) { TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { struct nd_prefix *pr0; LIST_FOREACH(pr0, &V_nd_prefix, ndpr_entry) { if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL) break; } if (pfxrtr != NULL) break; } } if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) { /* * There is at least one prefix that has a reachable router, * or at least a router which probably does not advertise * any prefixes. The latter would be the case when we move * to a new link where we have a router that does not provide * prefixes and we configure an address by hand. * Detach prefixes which have no reachable advertising * router, and attach other prefixes. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { /* XXX: a link-local prefix should never be detached */ if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; /* * we aren't interested in prefixes without the L bit * set. */ if (pr->ndpr_raf_onlink == 0) continue; if (pr->ndpr_raf_auto == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && find_pfxlist_reachable_router(pr) == NULL) pr->ndpr_stateflags |= NDPRF_DETACHED; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && find_pfxlist_reachable_router(pr) != 0) pr->ndpr_stateflags &= ~NDPRF_DETACHED; } } else { /* there is no prefix that has a reachable router */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; if (pr->ndpr_raf_onlink == 0) continue; if (pr->ndpr_raf_auto == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0) pr->ndpr_stateflags &= ~NDPRF_DETACHED; } } /* * Remove each interface route associated with a (just) detached * prefix, and reinstall the interface route for a (just) attached * prefix. Note that all attempt of reinstallation does not * necessarily success, when a same prefix is shared among multiple * interfaces. Such cases will be handled in nd6_prefix_onlink, * so we don't have to care about them. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { int e; char ip6buf[INET6_ADDRSTRLEN]; if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; if (pr->ndpr_raf_onlink == 0) continue; if (pr->ndpr_raf_auto == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { if ((e = nd6_prefix_offlink(pr)) != 0) { nd6log((LOG_ERR, "pfxlist_onlink_check: failed to " "make %s/%d offlink, errno=%d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, e)); } } if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 && pr->ndpr_raf_onlink) { if ((e = nd6_prefix_onlink(pr)) != 0) { nd6log((LOG_ERR, "pfxlist_onlink_check: failed to " "make %s/%d onlink, errno=%d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, e)); } } } /* * Changes on the prefix status might affect address status as well. * Make sure that all addresses derived from an attached prefix are * attached, and that all addresses derived from a detached prefix are * detached. Note, however, that a manually configured address should * always be attached. * The precise detection logic is same as the one for prefixes. * * XXXRW: in6_ifaddrhead locking. */ TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF)) continue; if (ifa->ia6_ndpr == NULL) { /* * This can happen when we first configure the address * (i.e. the address exists, but the prefix does not). * XXX: complicated relationships... */ continue; } if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) break; } if (ifa) { TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ifa->ia6_ndpr == NULL) /* XXX: see above. */ continue; if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) { if (ifa->ia6_flags & IN6_IFF_DETACHED) { ifa->ia6_flags &= ~IN6_IFF_DETACHED; ifa->ia6_flags |= IN6_IFF_TENTATIVE; nd6_dad_start((struct ifaddr *)ifa, 0); } } else { ifa->ia6_flags |= IN6_IFF_DETACHED; } } } else { TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ifa->ia6_flags & IN6_IFF_DETACHED) { ifa->ia6_flags &= ~IN6_IFF_DETACHED; ifa->ia6_flags |= IN6_IFF_TENTATIVE; /* Do we need a delay in this case? */ nd6_dad_start((struct ifaddr *)ifa, 0); } } } } static int nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) { static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; struct radix_node_head *rnh; struct rtentry *rt; struct sockaddr_in6 mask6; u_long rtflags; int error, a_failure, fibnum; /* * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs. * ifa->ifa_rtrequest = nd6_rtrequest; */ bzero(&mask6, sizeof(mask6)); mask6.sin6_len = sizeof(mask6); mask6.sin6_addr = pr->ndpr_mask; rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP; a_failure = 0; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { rt = NULL; error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt, fibnum); if (error == 0) { KASSERT(rt != NULL, ("%s: in6_rtrequest return no " "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__, error, pr, ifa)); rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6); /* XXX what if rhn == NULL? */ RADIX_NODE_HEAD_LOCK(rnh); RT_LOCK(rt); if (rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl) == 0) { struct sockaddr_dl *dl; dl = (struct sockaddr_dl *)rt->rt_gateway; dl->sdl_type = rt->rt_ifp->if_type; dl->sdl_index = rt->rt_ifp->if_index; } RADIX_NODE_HEAD_UNLOCK(rnh); nd6_rtmsg(RTM_ADD, rt); RT_UNLOCK(rt); pr->ndpr_stateflags |= NDPRF_ONLINK; } else { char ip6buf[INET6_ADDRSTRLEN]; char ip6bufg[INET6_ADDRSTRLEN]; char ip6bufm[INET6_ADDRSTRLEN]; struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add " "route for a prefix (%s/%d) on %s, gw=%s, mask=%s, " "flags=%lx errno = %d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), ip6_sprintf(ip6bufg, &sin6->sin6_addr), ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error)); /* Save last error to return, see rtinit(). */ a_failure = error; } if (rt != NULL) { RT_LOCK(rt); RT_REMREF(rt); RT_UNLOCK(rt); } } /* Return the last error we got. */ return (a_failure); } static int nd6_prefix_onlink(struct nd_prefix *pr) { struct ifaddr *ifa; struct ifnet *ifp = pr->ndpr_ifp; struct nd_prefix *opr; int error = 0; char ip6buf[INET6_ADDRSTRLEN]; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { nd6log((LOG_ERR, "nd6_prefix_onlink: %s/%d is already on-link\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen)); return (EEXIST); } /* * Add the interface route associated with the prefix. Before * installing the route, check if there's the same prefix on another * interface, and the prefix has already installed the interface route. * Although such a configuration is expected to be rare, we explicitly * allow it. */ LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) { if (opr == pr) continue; if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0) continue; if (opr->ndpr_plen == pr->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) return (0); } /* * We prefer link-local addresses as the associated interface address. */ /* search for a link-local addr */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST); if (ifa == NULL) { /* XXX: freebsd does not have ifa_ifwithaf */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET6) break; } if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); /* should we care about ia6_flags? */ } if (ifa == NULL) { /* * This can still happen, when, for example, we receive an RA * containing a prefix with the L bit set and the A bit clear, * after removing all IPv6 addresses on the receiving * interface. This should, of course, be rare though. */ nd6log((LOG_NOTICE, "nd6_prefix_onlink: failed to find any ifaddr" " to add route for a prefix(%s/%d) on %s\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(ifp))); return (0); } error = nd6_prefix_onlink_rtrequest(pr, ifa); if (ifa != NULL) ifa_free(ifa); return (error); } static int nd6_prefix_offlink(struct nd_prefix *pr) { int error = 0; struct ifnet *ifp = pr->ndpr_ifp; struct nd_prefix *opr; struct sockaddr_in6 sa6, mask6; struct rtentry *rt; char ip6buf[INET6_ADDRSTRLEN]; int fibnum, a_failure; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { nd6log((LOG_ERR, "nd6_prefix_offlink: %s/%d is already off-link\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen)); return (EEXIST); } bzero(&sa6, sizeof(sa6)); sa6.sin6_family = AF_INET6; sa6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr, sizeof(struct in6_addr)); bzero(&mask6, sizeof(mask6)); mask6.sin6_family = AF_INET6; mask6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); a_failure = 0; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { rt = NULL; error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, (struct sockaddr *)&mask6, 0, &rt, fibnum); if (error == 0) { /* report the route deletion to the routing socket. */ if (rt != NULL) nd6_rtmsg(RTM_DELETE, rt); } else { /* Save last error to return, see rtinit(). */ a_failure = error; } if (rt != NULL) { RTFREE(rt); } } error = a_failure; if (error == 0) { pr->ndpr_stateflags &= ~NDPRF_ONLINK; /* * There might be the same prefix on another interface, * the prefix which could not be on-link just because we have * the interface route (see comments in nd6_prefix_onlink). * If there's one, try to make the prefix on-link on the * interface. */ LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) { if (opr == pr) continue; if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0) continue; /* * KAME specific: detached prefixes should not be * on-link. */ if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0) continue; if (opr->ndpr_plen == pr->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) { int e; if ((e = nd6_prefix_onlink(opr)) != 0) { nd6log((LOG_ERR, "nd6_prefix_offlink: failed to " "recover a prefix %s/%d from %s " "to %s (errno = %d)\n", ip6_sprintf(ip6buf, &opr->ndpr_prefix.sin6_addr), opr->ndpr_plen, if_name(ifp), if_name(opr->ndpr_ifp), e)); } } } } else { /* XXX: can we still set the NDPRF_ONLINK flag? */ nd6log((LOG_ERR, "nd6_prefix_offlink: failed to delete route: " "%s/%d on %s (errno = %d)\n", ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen, if_name(ifp), error)); } return (error); } static struct in6_ifaddr * in6_ifadd(struct nd_prefixctl *pr, int mcast) { struct ifnet *ifp = pr->ndpr_ifp; struct ifaddr *ifa; struct in6_aliasreq ifra; struct in6_ifaddr *ia, *ib; int error, plen0; struct in6_addr mask; int prefixlen = pr->ndpr_plen; int updateflags; char ip6buf[INET6_ADDRSTRLEN]; in6_prefixlen2mask(&mask, prefixlen); /* * find a link-local address (will be interface ID). * Is it really mandatory? Theoretically, a global or a site-local * address can be configured without a link-local address, if we * have a unique interface identifier... * * it is not mandatory to have a link-local address, we can generate * interface identifier on the fly. we do this because: * (1) it should be the easiest way to find interface identifier. * (2) RFC2462 5.4 suggesting the use of the same interface identifier * for multiple addresses on a single interface, and possible shortcut * of DAD. we omitted DAD for this reason in the past. * (3) a user can prevent autoconfiguration of global address * by removing link-local address by hand (this is partly because we * don't have other way to control the use of IPv6 on an interface. * this has been our design choice - cf. NRL's "ifconfig auto"). * (4) it is easier to manage when an interface has addresses * with the same interface identifier, than to have multiple addresses * with different interface identifiers. */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */ if (ifa) ib = (struct in6_ifaddr *)ifa; else return NULL; /* prefixlen + ifidlen must be equal to 128 */ plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL); if (prefixlen != plen0) { ifa_free(ifa); nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s " "(prefix=%d ifid=%d)\n", if_name(ifp), prefixlen, 128 - plen0)); return NULL; } /* make ifaddr */ bzero(&ifra, sizeof(ifra)); /* * in6_update_ifa() does not use ifra_name, but we accurately set it * for safety. */ strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); ifra.ifra_addr.sin6_family = AF_INET6; ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); /* prefix */ ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr; ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0]; ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1]; ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2]; ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3]; /* interface ID */ ifra.ifra_addr.sin6_addr.s6_addr32[0] |= (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]); ifra.ifra_addr.sin6_addr.s6_addr32[1] |= (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]); ifra.ifra_addr.sin6_addr.s6_addr32[2] |= (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]); ifra.ifra_addr.sin6_addr.s6_addr32[3] |= (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]); ifa_free(ifa); /* new prefix mask. */ ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_prefixmask.sin6_family = AF_INET6; bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr, sizeof(ifra.ifra_prefixmask.sin6_addr)); /* lifetimes. */ ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime; ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime; /* XXX: scope zone ID? */ ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */ /* * Make sure that we do not have this address already. This should * usually not happen, but we can still see this case, e.g., if we * have manually configured the exact address to be configured. */ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); if (ifa != NULL) { ifa_free(ifa); /* this should be rare enough to make an explicit log */ log(LOG_INFO, "in6_ifadd: %s is already configured\n", ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr)); return (NULL); } /* * Allocate ifaddr structure, link into chain, etc. * If we are going to create a new address upon receiving a multicasted * RA, we need to impose a random delay before starting DAD. * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2] */ updateflags = 0; if (mcast) updateflags |= IN6_IFAUPDATE_DADDELAY; if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) { nd6log((LOG_ERR, "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n", ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr), if_name(ifp), error)); return (NULL); /* ifaddr must not have been allocated. */ } ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); /* * XXXRW: Assumption of non-NULLness here might not be true with * fine-grained locking -- should we validate it? Or just return * earlier ifa rather than looking it up again? */ return (ia); /* this is always non-NULL and referenced. */ } /* * ia0 - corresponding public address */ int in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay) { struct ifnet *ifp = ia0->ia_ifa.ifa_ifp; struct in6_ifaddr *newia, *ia; struct in6_aliasreq ifra; int i, error; int trylimit = 3; /* XXX: adhoc value */ int updateflags; u_int32_t randid[2]; time_t vltime0, pltime0; bzero(&ifra, sizeof(ifra)); strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); ifra.ifra_addr = ia0->ia_addr; /* copy prefix mask */ ifra.ifra_prefixmask = ia0->ia_prefixmask; /* clear the old IFID */ for (i = 0; i < 4; i++) { ifra.ifra_addr.sin6_addr.s6_addr32[i] &= ifra.ifra_prefixmask.sin6_addr.s6_addr32[i]; } again: if (in6_get_tmpifid(ifp, (u_int8_t *)randid, (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) { nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good " "random IFID\n")); return (EINVAL); } ifra.ifra_addr.sin6_addr.s6_addr32[2] |= (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2])); ifra.ifra_addr.sin6_addr.s6_addr32[3] |= (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3])); /* * in6_get_tmpifid() quite likely provided a unique interface ID. * However, we may still have a chance to see collision, because * there may be a time lag between generation of the ID and generation * of the address. So, we'll do one more sanity check. */ IN6_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &ifra.ifra_addr.sin6_addr)) { if (trylimit-- == 0) { IN6_IFADDR_RUNLOCK(); /* * Give up. Something strange should have * happened. */ nd6log((LOG_NOTICE, "in6_tmpifadd: failed to " "find a unique random IFID\n")); return (EEXIST); } IN6_IFADDR_RUNLOCK(); forcegen = 1; goto again; } } IN6_IFADDR_RUNLOCK(); /* * The Valid Lifetime is the lower of the Valid Lifetime of the * public address or TEMP_VALID_LIFETIME. * The Preferred Lifetime is the lower of the Preferred Lifetime * of the public address or TEMP_PREFERRED_LIFETIME - * DESYNC_FACTOR. */ if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { vltime0 = IFA6_IS_INVALID(ia0) ? 0 : (ia0->ia6_lifetime.ia6t_vltime - (time_second - ia0->ia6_updatetime)); if (vltime0 > V_ip6_temp_valid_lifetime) vltime0 = V_ip6_temp_valid_lifetime; } else vltime0 = V_ip6_temp_valid_lifetime; if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 : (ia0->ia6_lifetime.ia6t_pltime - (time_second - ia0->ia6_updatetime)); if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){ pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor; } } else pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor; ifra.ifra_lifetime.ia6t_vltime = vltime0; ifra.ifra_lifetime.ia6t_pltime = pltime0; /* * A temporary address is created only if this calculated Preferred * Lifetime is greater than REGEN_ADVANCE time units. */ if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance) return (0); /* XXX: scope zone ID? */ ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY); /* allocate ifaddr structure, link into chain, etc. */ updateflags = 0; if (delay) updateflags |= IN6_IFAUPDATE_DADDELAY; if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) return (error); newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); if (newia == NULL) { /* XXX: can it happen? */ nd6log((LOG_ERR, "in6_tmpifadd: ifa update succeeded, but we got " "no ifaddr\n")); return (EINVAL); /* XXX */ } newia->ia6_ndpr = ia0->ia6_ndpr; newia->ia6_ndpr->ndpr_refcnt++; ifa_free(&newia->ia_ifa); /* * A newly added address might affect the status of other addresses. * XXX: when the temporary address is generated with a new public * address, the onlink check is redundant. However, it would be safe * to do the check explicitly everywhere a new address is generated, * and, in fact, we surely need the check when we create a new * temporary address due to deprecation of an old temporary address. */ pfxlist_onlink_check(); return (0); } static int in6_init_prefix_ltimes(struct nd_prefix *ndpr) { if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_preferred = 0; else ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime; if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_expire = 0; else ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime; return 0; } static void in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) { /* init ia6t_expire */ if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME) lt6->ia6t_expire = 0; else { lt6->ia6t_expire = time_second; lt6->ia6t_expire += lt6->ia6t_vltime; } /* init ia6t_preferred */ if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME) lt6->ia6t_preferred = 0; else { lt6->ia6t_preferred = time_second; lt6->ia6t_preferred += lt6->ia6t_pltime; } } /* * Delete all the routing table entries that use the specified gateway. * XXX: this function causes search through all entries of routing table, so * it shouldn't be called when acting as a router. */ void rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) { struct radix_node_head *rnh; u_int fibnum; int s = splnet(); /* We'll care only link-local addresses */ if (!IN6_IS_ADDR_LINKLOCAL(gateway)) { splx(s); return; } /* XXX Do we really need to walk any but the default FIB? */ for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { rnh = rt_tables_get_rnh(fibnum, AF_INET6); if (rnh == NULL) continue; RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); RADIX_NODE_HEAD_UNLOCK(rnh); } splx(s); } static int rt6_deleteroute(struct radix_node *rn, void *arg) { #define SIN6(s) ((struct sockaddr_in6 *)s) struct rtentry *rt = (struct rtentry *)rn; struct in6_addr *gate = (struct in6_addr *)arg; if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) return (0); if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) { return (0); } /* * Do not delete a static route. * XXX: this seems to be a bit ad-hoc. Should we consider the * 'cloned' bit instead? */ if ((rt->rt_flags & RTF_STATIC) != 0) return (0); /* * We delete only host route. This means, in particular, we don't * delete default route. */ if ((rt->rt_flags & RTF_HOST) == 0) return (0); return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum)); #undef SIN6 } int nd6_setdefaultiface(int ifindex) { int error = 0; if (ifindex < 0 || V_if_index < ifindex) return (EINVAL); if (ifindex != 0 && !ifnet_byindex(ifindex)) return (EINVAL); if (V_nd6_defifindex != ifindex) { V_nd6_defifindex = ifindex; if (V_nd6_defifindex > 0) V_nd6_defifp = ifnet_byindex(V_nd6_defifindex); else V_nd6_defifp = NULL; /* * Our current implementation assumes one-to-one maping between * interfaces and links, so it would be natural to use the * default interface as the default link. */ scope6_setdefault(V_nd6_defifp); } return (error); }