diff --git a/contrib/netcat/netcat.c b/contrib/netcat/netcat.c index 04e31e8a5c0c..efbe9bb9bbd7 100644 --- a/contrib/netcat/netcat.c +++ b/contrib/netcat/netcat.c @@ -1,1397 +1,1396 @@ /* $OpenBSD: netcat.c,v 1.130 2015/07/26 19:12:28 chl Exp $ */ /* * Copyright (c) 2001 Eric Jackson * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * Re-written nc(1) for OpenBSD. Original implementation by * *Hobbit* . */ #include #include #include #include #include #include #include #include #ifdef IPSEC #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "atomicio.h" #ifndef SUN_LEN #define SUN_LEN(su) \ (sizeof(*(su)) - sizeof((su)->sun_path) + strlen((su)->sun_path)) #endif #define PORT_MAX 65535 #define PORT_MAX_LEN 6 #define UNIX_DG_TMP_SOCKET_SIZE 19 #define POLL_STDIN 0 #define POLL_NETOUT 1 #define POLL_NETIN 2 #define POLL_STDOUT 3 #define BUFSIZE 16384 /* Command Line Options */ int dflag; /* detached, no stdin */ int Fflag; /* fdpass sock to stdout */ unsigned int iflag; /* Interval Flag */ int kflag; /* More than one connect */ int lflag; /* Bind to local port */ int Nflag; /* shutdown() network socket */ int nflag; /* Don't do name look up */ int FreeBSD_Oflag; /* Do not use TCP options */ char *Pflag; /* Proxy username */ char *pflag; /* Localport flag */ int rflag; /* Random ports flag */ char *sflag; /* Source Address */ int tflag; /* Telnet Emulation */ int uflag; /* UDP - Default to TCP */ int vflag; /* Verbosity */ int xflag; /* Socks proxy */ int zflag; /* Port Scan Flag */ int Dflag; /* sodebug */ int Iflag; /* TCP receive buffer size */ int Oflag; /* TCP send buffer size */ int Sflag; /* TCP MD5 signature option */ int Tflag = -1; /* IP Type of Service */ int rtableid = -1; int timeout = -1; int family = AF_UNSPEC; char *portlist[PORT_MAX+1]; char *unix_dg_tmp_socket; void atelnet(int, unsigned char *, unsigned int); void build_ports(char *); void help(void); int local_listen(char *, char *, struct addrinfo); void readwrite(int); void fdpass(int nfd) __attribute__((noreturn)); int remote_connect(const char *, const char *, struct addrinfo); int timeout_connect(int, const struct sockaddr *, socklen_t); int socks_connect(const char *, const char *, struct addrinfo, const char *, const char *, struct addrinfo, int, const char *); int udptest(int); int unix_bind(char *); int unix_connect(char *); int unix_listen(char *); void set_common_sockopts(int, int); int map_tos(char *, int *); void report_connect(const struct sockaddr *, socklen_t); void usage(int); ssize_t drainbuf(int, unsigned char *, size_t *); ssize_t fillbuf(int, unsigned char *, size_t *); #ifdef IPSEC -void add_ipsec_policy(int, char *); +void add_ipsec_policy(int, int, char *); char *ipsec_policy[2]; #endif int main(int argc, char *argv[]) { int ch, s, ret, socksv, ipsec_count; int numfibs; size_t intsize = sizeof(int); char *host, *uport; struct addrinfo hints; struct servent *sv; socklen_t len; struct sockaddr_storage cliaddr; char *proxy; const char *errstr, *proxyhost = "", *proxyport = NULL; struct addrinfo proxyhints; char unix_dg_tmp_socket_buf[UNIX_DG_TMP_SOCKET_SIZE]; struct option longopts[] = { { "no-tcpopt", no_argument, &FreeBSD_Oflag, 1 }, { NULL, 0, NULL, 0 } }; ret = 1; ipsec_count = 0; s = 0; socksv = 5; host = NULL; uport = NULL; sv = NULL; signal(SIGPIPE, SIG_IGN); while ((ch = getopt_long(argc, argv, "46DdEe:FhI:i:klNnoO:P:p:rSs:tT:UuV:vw:X:x:z", longopts, NULL)) != -1) { switch (ch) { case '4': family = AF_INET; break; case '6': family = AF_INET6; break; case 'U': family = AF_UNIX; break; case 'X': if (strcasecmp(optarg, "connect") == 0) socksv = -1; /* HTTP proxy CONNECT */ else if (strcmp(optarg, "4") == 0) socksv = 4; /* SOCKS v.4 */ else if (strcmp(optarg, "5") == 0) socksv = 5; /* SOCKS v.5 */ else errx(1, "unsupported proxy protocol"); break; case 'd': dflag = 1; break; case 'e': #ifdef IPSEC ipsec_policy[ipsec_count++ % 2] = optarg; #else errx(1, "IPsec support unavailable."); #endif break; case 'E': #ifdef IPSEC ipsec_policy[0] = "in ipsec esp/transport//require"; ipsec_policy[1] = "out ipsec esp/transport//require"; #else errx(1, "IPsec support unavailable."); #endif break; case 'F': Fflag = 1; break; case 'h': help(); break; case 'i': iflag = strtonum(optarg, 0, UINT_MAX, &errstr); if (errstr) errx(1, "interval %s: %s", errstr, optarg); break; case 'k': kflag = 1; break; case 'l': lflag = 1; break; case 'N': Nflag = 1; break; case 'n': nflag = 1; break; case 'o': fprintf(stderr, "option -o is deprecated.\n"); break; case 'P': Pflag = optarg; break; case 'p': pflag = optarg; break; case 'r': rflag = 1; break; case 's': sflag = optarg; break; case 't': tflag = 1; break; case 'u': uflag = 1; break; case 'V': if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1) errx(1, "Multiple FIBS not supported"); rtableid = (int)strtonum(optarg, 0, numfibs - 1, &errstr); if (errstr) errx(1, "rtable %s: %s", errstr, optarg); break; case 'v': vflag = 1; break; case 'w': timeout = strtonum(optarg, 0, INT_MAX / 1000, &errstr); if (errstr) errx(1, "timeout %s: %s", errstr, optarg); timeout *= 1000; break; case 'x': xflag = 1; if ((proxy = strdup(optarg)) == NULL) err(1, NULL); break; case 'z': zflag = 1; break; case 'D': Dflag = 1; break; case 'I': Iflag = strtonum(optarg, 1, 65536 << 14, &errstr); if (errstr != NULL) errx(1, "TCP receive window %s: %s", errstr, optarg); break; case 'O': Oflag = strtonum(optarg, 1, 65536 << 14, &errstr); if (errstr != NULL) { if (strcmp(errstr, "invalid") != 0) errx(1, "TCP send window %s: %s", errstr, optarg); } break; case 'S': Sflag = 1; break; case 'T': errstr = NULL; errno = 0; if (map_tos(optarg, &Tflag)) break; if (strlen(optarg) > 1 && optarg[0] == '0' && optarg[1] == 'x') Tflag = (int)strtol(optarg, NULL, 16); else Tflag = (int)strtonum(optarg, 0, 255, &errstr); if (Tflag < 0 || Tflag > 255 || errstr || errno) errx(1, "illegal tos value %s", optarg); break; default: usage(1); } } argc -= optind; argv += optind; /* Cruft to make sure options are clean, and used properly. */ if (argv[0] && !argv[1] && family == AF_UNIX) { host = argv[0]; uport = NULL; } else if (argv[0] && !argv[1]) { if (!lflag) usage(1); uport = argv[0]; host = NULL; } else if (argv[0] && argv[1]) { host = argv[0]; uport = argv[1]; } else usage(1); if (lflag && sflag) errx(1, "cannot use -s and -l"); if (lflag && pflag) errx(1, "cannot use -p and -l"); if (lflag && zflag) errx(1, "cannot use -z and -l"); if (!lflag && kflag) errx(1, "must use -l with -k"); /* Get name of temporary socket for unix datagram client */ if ((family == AF_UNIX) && uflag && !lflag) { if (sflag) { unix_dg_tmp_socket = sflag; } else { strlcpy(unix_dg_tmp_socket_buf, "/tmp/nc.XXXXXXXXXX", UNIX_DG_TMP_SOCKET_SIZE); if (mktemp(unix_dg_tmp_socket_buf) == NULL) err(1, "mktemp"); unix_dg_tmp_socket = unix_dg_tmp_socket_buf; } } /* Initialize addrinfo structure. */ if (family != AF_UNIX) { memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = family; hints.ai_socktype = uflag ? SOCK_DGRAM : SOCK_STREAM; hints.ai_protocol = uflag ? IPPROTO_UDP : IPPROTO_TCP; if (nflag) hints.ai_flags |= AI_NUMERICHOST; } if (xflag) { if (uflag) errx(1, "no proxy support for UDP mode"); if (lflag) errx(1, "no proxy support for listen"); if (family == AF_UNIX) errx(1, "no proxy support for unix sockets"); /* XXX IPv6 transport to proxy would probably work */ if (family == AF_INET6) errx(1, "no proxy support for IPv6"); if (sflag) errx(1, "no proxy support for local source address"); proxyhost = strsep(&proxy, ":"); proxyport = proxy; memset(&proxyhints, 0, sizeof(struct addrinfo)); proxyhints.ai_family = family; proxyhints.ai_socktype = SOCK_STREAM; proxyhints.ai_protocol = IPPROTO_TCP; if (nflag) proxyhints.ai_flags |= AI_NUMERICHOST; } if (lflag) { int connfd; ret = 0; if (family == AF_UNIX) { if (uflag) s = unix_bind(host); else s = unix_listen(host); } /* Allow only one connection at a time, but stay alive. */ for (;;) { if (family != AF_UNIX) s = local_listen(host, uport, hints); if (s < 0) err(1, NULL); /* * For UDP and -k, don't connect the socket, let it * receive datagrams from multiple socket pairs. */ if (uflag && kflag) readwrite(s); /* * For UDP and not -k, we will use recvfrom() initially * to wait for a caller, then use the regular functions * to talk to the caller. */ else if (uflag && !kflag) { int rv, plen; char buf[16384]; struct sockaddr_storage z; len = sizeof(z); plen = 2048; rv = recvfrom(s, buf, plen, MSG_PEEK, (struct sockaddr *)&z, &len); if (rv < 0) err(1, "recvfrom"); rv = connect(s, (struct sockaddr *)&z, len); if (rv < 0) err(1, "connect"); if (vflag) report_connect((struct sockaddr *)&z, len); readwrite(s); } else { len = sizeof(cliaddr); connfd = accept(s, (struct sockaddr *)&cliaddr, &len); if (connfd == -1) { /* For now, all errnos are fatal */ err(1, "accept"); } if (vflag) report_connect((struct sockaddr *)&cliaddr, len); readwrite(connfd); close(connfd); } if (family != AF_UNIX) close(s); else if (uflag) { if (connect(s, NULL, 0) < 0) err(1, "connect"); } if (!kflag) break; } } else if (family == AF_UNIX) { ret = 0; if ((s = unix_connect(host)) > 0 && !zflag) { readwrite(s); close(s); } else ret = 1; if (uflag) unlink(unix_dg_tmp_socket); exit(ret); } else { int i = 0; /* Construct the portlist[] array. */ build_ports(uport); /* Cycle through portlist, connecting to each port. */ for (i = 0; portlist[i] != NULL; i++) { if (s) close(s); if (xflag) s = socks_connect(host, portlist[i], hints, proxyhost, proxyport, proxyhints, socksv, Pflag); else s = remote_connect(host, portlist[i], hints); if (s < 0) continue; ret = 0; if (vflag || zflag) { /* For UDP, make sure we are connected. */ if (uflag) { if (udptest(s) == -1) { ret = 1; continue; } } /* Don't look up port if -n. */ if (nflag) sv = NULL; else { sv = getservbyport( ntohs(atoi(portlist[i])), uflag ? "udp" : "tcp"); } fprintf(stderr, "Connection to %s %s port [%s/%s] " "succeeded!\n", host, portlist[i], uflag ? "udp" : "tcp", sv ? sv->s_name : "*"); } if (Fflag) fdpass(s); else if (!zflag) readwrite(s); } } if (s) close(s); exit(ret); } /* * unix_bind() * Returns a unix socket bound to the given path */ int unix_bind(char *path) { struct sockaddr_un sun; int s; /* Create unix domain socket. */ if ((s = socket(AF_UNIX, uflag ? SOCK_DGRAM : SOCK_STREAM, 0)) < 0) return (-1); memset(&sun, 0, sizeof(struct sockaddr_un)); sun.sun_family = AF_UNIX; if (strlcpy(sun.sun_path, path, sizeof(sun.sun_path)) >= sizeof(sun.sun_path)) { close(s); errno = ENAMETOOLONG; return (-1); } if (bind(s, (struct sockaddr *)&sun, SUN_LEN(&sun)) < 0) { close(s); return (-1); } return (s); } /* * unix_connect() * Returns a socket connected to a local unix socket. Returns -1 on failure. */ int unix_connect(char *path) { struct sockaddr_un sun; int s; if (uflag) { if ((s = unix_bind(unix_dg_tmp_socket)) < 0) return (-1); } else { if ((s = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) return (-1); } (void)fcntl(s, F_SETFD, FD_CLOEXEC); memset(&sun, 0, sizeof(struct sockaddr_un)); sun.sun_family = AF_UNIX; if (strlcpy(sun.sun_path, path, sizeof(sun.sun_path)) >= sizeof(sun.sun_path)) { close(s); errno = ENAMETOOLONG; return (-1); } if (connect(s, (struct sockaddr *)&sun, SUN_LEN(&sun)) < 0) { close(s); return (-1); } return (s); } /* * unix_listen() * Create a unix domain socket, and listen on it. */ int unix_listen(char *path) { int s; if ((s = unix_bind(path)) < 0) return (-1); if (listen(s, 5) < 0) { close(s); return (-1); } return (s); } /* * remote_connect() * Returns a socket connected to a remote host. Properly binds to a local * port or source address if needed. Returns -1 on failure. */ int remote_connect(const char *host, const char *port, struct addrinfo hints) { struct addrinfo *res, *res0; int s, error, on = 1; if ((error = getaddrinfo(host, port, &hints, &res))) errx(1, "getaddrinfo: %s", gai_strerror(error)); res0 = res; do { if ((s = socket(res0->ai_family, res0->ai_socktype, res0->ai_protocol)) < 0) continue; -#ifdef IPSEC - if (ipsec_policy[0] != NULL) - add_ipsec_policy(s, ipsec_policy[0]); - if (ipsec_policy[1] != NULL) - add_ipsec_policy(s, ipsec_policy[1]); -#endif if (rtableid >= 0 && (setsockopt(s, SOL_SOCKET, SO_SETFIB, &rtableid, sizeof(rtableid)) == -1)) err(1, "setsockopt SO_SETFIB"); /* Bind to a local port or source address if specified. */ if (sflag || pflag) { struct addrinfo ahints, *ares; /* try IP_BINDANY, but don't insist */ setsockopt(s, IPPROTO_IP, IP_BINDANY, &on, sizeof(on)); memset(&ahints, 0, sizeof(struct addrinfo)); ahints.ai_family = res0->ai_family; ahints.ai_socktype = uflag ? SOCK_DGRAM : SOCK_STREAM; ahints.ai_protocol = uflag ? IPPROTO_UDP : IPPROTO_TCP; ahints.ai_flags = AI_PASSIVE; if ((error = getaddrinfo(sflag, pflag, &ahints, &ares))) errx(1, "getaddrinfo: %s", gai_strerror(error)); if (bind(s, (struct sockaddr *)ares->ai_addr, ares->ai_addrlen) < 0) err(1, "bind failed"); freeaddrinfo(ares); } set_common_sockopts(s, res0->ai_family); if (timeout_connect(s, res0->ai_addr, res0->ai_addrlen) == 0) break; else if (vflag) warn("connect to %s port %s (%s) failed", host, port, uflag ? "udp" : "tcp"); close(s); s = -1; } while ((res0 = res0->ai_next) != NULL); freeaddrinfo(res); return (s); } int timeout_connect(int s, const struct sockaddr *name, socklen_t namelen) { struct pollfd pfd; socklen_t optlen; int flags, optval; int ret; if (timeout != -1) { flags = fcntl(s, F_GETFL, 0); if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) err(1, "set non-blocking mode"); } if ((ret = connect(s, name, namelen)) != 0 && errno == EINPROGRESS) { pfd.fd = s; pfd.events = POLLOUT; if ((ret = poll(&pfd, 1, timeout)) == 1) { optlen = sizeof(optval); if ((ret = getsockopt(s, SOL_SOCKET, SO_ERROR, &optval, &optlen)) == 0) { errno = optval; ret = optval == 0 ? 0 : -1; } } else if (ret == 0) { errno = ETIMEDOUT; ret = -1; } else err(1, "poll failed"); } if (timeout != -1 && fcntl(s, F_SETFL, flags) == -1) err(1, "restoring flags"); return (ret); } /* * local_listen() * Returns a socket listening on a local port, binds to specified source * address. Returns -1 on failure. */ int local_listen(char *host, char *port, struct addrinfo hints) { struct addrinfo *res, *res0; int s, ret, x = 1; int error; /* Allow nodename to be null. */ hints.ai_flags |= AI_PASSIVE; /* * In the case of binding to a wildcard address * default to binding to an ipv4 address. */ if (host == NULL && hints.ai_family == AF_UNSPEC) hints.ai_family = AF_INET; if ((error = getaddrinfo(host, port, &hints, &res))) errx(1, "getaddrinfo: %s", gai_strerror(error)); res0 = res; do { if ((s = socket(res0->ai_family, res0->ai_socktype, res0->ai_protocol)) < 0) continue; if (rtableid >= 0 && (setsockopt(s, SOL_SOCKET, SO_SETFIB, &rtableid, sizeof(rtableid)) == -1)) err(1, "setsockopt SO_SETFIB"); ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &x, sizeof(x)); if (ret == -1) err(1, NULL); -#ifdef IPSEC - if (ipsec_policy[0] != NULL) - add_ipsec_policy(s, ipsec_policy[0]); - if (ipsec_policy[1] != NULL) - add_ipsec_policy(s, ipsec_policy[1]); -#endif + if (FreeBSD_Oflag) { if (setsockopt(s, IPPROTO_TCP, TCP_NOOPT, &FreeBSD_Oflag, sizeof(FreeBSD_Oflag)) == -1) err(1, "disable TCP options"); } set_common_sockopts(s, res0->ai_family); if (bind(s, (struct sockaddr *)res0->ai_addr, res0->ai_addrlen) == 0) break; close(s); s = -1; } while ((res0 = res0->ai_next) != NULL); if (!uflag && s != -1) { if (listen(s, 1) < 0) err(1, "listen"); } freeaddrinfo(res); return (s); } /* * readwrite() * Loop that polls on the network file descriptor and stdin. */ void readwrite(int net_fd) { struct pollfd pfd[4]; int stdin_fd = STDIN_FILENO; int stdout_fd = STDOUT_FILENO; unsigned char netinbuf[BUFSIZE]; size_t netinbufpos = 0; unsigned char stdinbuf[BUFSIZE]; size_t stdinbufpos = 0; int n, num_fds; ssize_t ret; /* don't read from stdin if requested */ if (dflag) stdin_fd = -1; /* stdin */ pfd[POLL_STDIN].fd = stdin_fd; pfd[POLL_STDIN].events = POLLIN; /* network out */ pfd[POLL_NETOUT].fd = net_fd; pfd[POLL_NETOUT].events = 0; /* network in */ pfd[POLL_NETIN].fd = net_fd; pfd[POLL_NETIN].events = POLLIN; /* stdout */ pfd[POLL_STDOUT].fd = stdout_fd; pfd[POLL_STDOUT].events = 0; while (1) { /* both inputs are gone, buffers are empty, we are done */ if (pfd[POLL_STDIN].fd == -1 && pfd[POLL_NETIN].fd == -1 && stdinbufpos == 0 && netinbufpos == 0) { close(net_fd); return; } /* both outputs are gone, we can't continue */ if (pfd[POLL_NETOUT].fd == -1 && pfd[POLL_STDOUT].fd == -1) { close(net_fd); return; } /* listen and net in gone, queues empty, done */ if (lflag && pfd[POLL_NETIN].fd == -1 && stdinbufpos == 0 && netinbufpos == 0) { close(net_fd); return; } /* help says -i is for "wait between lines sent". We read and * write arbitrary amounts of data, and we don't want to start * scanning for newlines, so this is as good as it gets */ if (iflag) sleep(iflag); /* poll */ num_fds = poll(pfd, 4, timeout); /* treat poll errors */ if (num_fds == -1) { close(net_fd); err(1, "polling error"); } /* timeout happened */ if (num_fds == 0) return; /* treat socket error conditions */ for (n = 0; n < 4; n++) { if (pfd[n].revents & (POLLERR|POLLNVAL)) { pfd[n].fd = -1; } } /* reading is possible after HUP */ if (pfd[POLL_STDIN].events & POLLIN && pfd[POLL_STDIN].revents & POLLHUP && ! (pfd[POLL_STDIN].revents & POLLIN)) pfd[POLL_STDIN].fd = -1; if (pfd[POLL_NETIN].events & POLLIN && pfd[POLL_NETIN].revents & POLLHUP && ! (pfd[POLL_NETIN].revents & POLLIN)) pfd[POLL_NETIN].fd = -1; if (pfd[POLL_NETOUT].revents & POLLHUP) { if (Nflag) shutdown(pfd[POLL_NETOUT].fd, SHUT_WR); pfd[POLL_NETOUT].fd = -1; } /* if HUP, stop watching stdout */ if (pfd[POLL_STDOUT].revents & POLLHUP) pfd[POLL_STDOUT].fd = -1; /* if no net out, stop watching stdin */ if (pfd[POLL_NETOUT].fd == -1) pfd[POLL_STDIN].fd = -1; /* if no stdout, stop watching net in */ if (pfd[POLL_STDOUT].fd == -1) { if (pfd[POLL_NETIN].fd != -1) shutdown(pfd[POLL_NETIN].fd, SHUT_RD); pfd[POLL_NETIN].fd = -1; } /* try to read from stdin */ if (pfd[POLL_STDIN].revents & POLLIN && stdinbufpos < BUFSIZE) { ret = fillbuf(pfd[POLL_STDIN].fd, stdinbuf, &stdinbufpos); /* error or eof on stdin - remove from pfd */ if (ret == 0 || ret == -1) pfd[POLL_STDIN].fd = -1; /* read something - poll net out */ if (stdinbufpos > 0) pfd[POLL_NETOUT].events = POLLOUT; /* filled buffer - remove self from polling */ if (stdinbufpos == BUFSIZE) pfd[POLL_STDIN].events = 0; } /* try to write to network */ if (pfd[POLL_NETOUT].revents & POLLOUT && stdinbufpos > 0) { ret = drainbuf(pfd[POLL_NETOUT].fd, stdinbuf, &stdinbufpos); if (ret == -1) pfd[POLL_NETOUT].fd = -1; /* buffer empty - remove self from polling */ if (stdinbufpos == 0) pfd[POLL_NETOUT].events = 0; /* buffer no longer full - poll stdin again */ if (stdinbufpos < BUFSIZE) pfd[POLL_STDIN].events = POLLIN; } /* try to read from network */ if (pfd[POLL_NETIN].revents & POLLIN && netinbufpos < BUFSIZE) { ret = fillbuf(pfd[POLL_NETIN].fd, netinbuf, &netinbufpos); if (ret == -1) pfd[POLL_NETIN].fd = -1; /* eof on net in - remove from pfd */ if (ret == 0) { shutdown(pfd[POLL_NETIN].fd, SHUT_RD); pfd[POLL_NETIN].fd = -1; } /* read something - poll stdout */ if (netinbufpos > 0) pfd[POLL_STDOUT].events = POLLOUT; /* filled buffer - remove self from polling */ if (netinbufpos == BUFSIZE) pfd[POLL_NETIN].events = 0; /* handle telnet */ if (tflag) atelnet(pfd[POLL_NETIN].fd, netinbuf, netinbufpos); } /* try to write to stdout */ if (pfd[POLL_STDOUT].revents & POLLOUT && netinbufpos > 0) { ret = drainbuf(pfd[POLL_STDOUT].fd, netinbuf, &netinbufpos); if (ret == -1) pfd[POLL_STDOUT].fd = -1; /* buffer empty - remove self from polling */ if (netinbufpos == 0) pfd[POLL_STDOUT].events = 0; /* buffer no longer full - poll net in again */ if (netinbufpos < BUFSIZE) pfd[POLL_NETIN].events = POLLIN; } /* stdin gone and queue empty? */ if (pfd[POLL_STDIN].fd == -1 && stdinbufpos == 0) { if (pfd[POLL_NETOUT].fd != -1 && Nflag) shutdown(pfd[POLL_NETOUT].fd, SHUT_WR); pfd[POLL_NETOUT].fd = -1; } /* net in gone and queue empty? */ if (pfd[POLL_NETIN].fd == -1 && netinbufpos == 0) { pfd[POLL_STDOUT].fd = -1; } } } ssize_t drainbuf(int fd, unsigned char *buf, size_t *bufpos) { ssize_t n; ssize_t adjust; n = write(fd, buf, *bufpos); /* don't treat EAGAIN, EINTR as error */ if (n == -1 && (errno == EAGAIN || errno == EINTR)) n = -2; if (n <= 0) return n; /* adjust buffer */ adjust = *bufpos - n; if (adjust > 0) memmove(buf, buf + n, adjust); *bufpos -= n; return n; } ssize_t fillbuf(int fd, unsigned char *buf, size_t *bufpos) { size_t num = BUFSIZE - *bufpos; ssize_t n; n = read(fd, buf + *bufpos, num); /* don't treat EAGAIN, EINTR as error */ if (n == -1 && (errno == EAGAIN || errno == EINTR)) n = -2; if (n <= 0) return n; *bufpos += n; return n; } /* * fdpass() * Pass the connected file descriptor to stdout and exit. */ void fdpass(int nfd) { struct msghdr mh; union { struct cmsghdr hdr; char buf[CMSG_SPACE(sizeof(int))]; } cmsgbuf; struct cmsghdr *cmsg; struct iovec iov; char c = '\0'; ssize_t r; struct pollfd pfd; /* Avoid obvious stupidity */ if (isatty(STDOUT_FILENO)) errx(1, "Cannot pass file descriptor to tty"); bzero(&mh, sizeof(mh)); bzero(&cmsgbuf, sizeof(cmsgbuf)); bzero(&iov, sizeof(iov)); mh.msg_control = (caddr_t)&cmsgbuf.buf; mh.msg_controllen = sizeof(cmsgbuf.buf); cmsg = CMSG_FIRSTHDR(&mh); cmsg->cmsg_len = CMSG_LEN(sizeof(int)); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; *(int *)CMSG_DATA(cmsg) = nfd; iov.iov_base = &c; iov.iov_len = 1; mh.msg_iov = &iov; mh.msg_iovlen = 1; bzero(&pfd, sizeof(pfd)); pfd.fd = STDOUT_FILENO; pfd.events = POLLOUT; for (;;) { r = sendmsg(STDOUT_FILENO, &mh, 0); if (r == -1) { if (errno == EAGAIN || errno == EINTR) { if (poll(&pfd, 1, -1) == -1) err(1, "poll"); continue; } err(1, "sendmsg"); } else if (r != 1) errx(1, "sendmsg: unexpected return value %zd", r); else break; } exit(0); } /* Deal with RFC 854 WILL/WONT DO/DONT negotiation. */ void atelnet(int nfd, unsigned char *buf, unsigned int size) { unsigned char *p, *end; unsigned char obuf[4]; if (size < 3) return; end = buf + size - 2; for (p = buf; p < end; p++) { if (*p != IAC) continue; obuf[0] = IAC; p++; if ((*p == WILL) || (*p == WONT)) obuf[1] = DONT; else if ((*p == DO) || (*p == DONT)) obuf[1] = WONT; else continue; p++; obuf[2] = *p; if (atomicio(vwrite, nfd, obuf, 3) != 3) warn("Write Error!"); } } /* * build_ports() * Build an array of ports in portlist[], listing each port * that we should try to connect to. */ void build_ports(char *p) { const char *errstr; char *n; int hi, lo, cp; int x = 0; if ((n = strchr(p, '-')) != NULL) { *n = '\0'; n++; /* Make sure the ports are in order: lowest->highest. */ hi = strtonum(n, 1, PORT_MAX, &errstr); if (errstr) errx(1, "port number %s: %s", errstr, n); lo = strtonum(p, 1, PORT_MAX, &errstr); if (errstr) errx(1, "port number %s: %s", errstr, p); if (lo > hi) { cp = hi; hi = lo; lo = cp; } /* Load ports sequentially. */ for (cp = lo; cp <= hi; cp++) { portlist[x] = calloc(1, PORT_MAX_LEN); if (portlist[x] == NULL) err(1, NULL); snprintf(portlist[x], PORT_MAX_LEN, "%d", cp); x++; } /* Randomly swap ports. */ if (rflag) { int y; char *c; for (x = 0; x <= (hi - lo); x++) { y = (arc4random() & 0xFFFF) % (hi - lo); c = portlist[x]; portlist[x] = portlist[y]; portlist[y] = c; } } } else { hi = strtonum(p, 1, PORT_MAX, &errstr); if (errstr) errx(1, "port number %s: %s", errstr, p); portlist[0] = strdup(p); if (portlist[0] == NULL) err(1, NULL); } } /* * udptest() * Do a few writes to see if the UDP port is there. * Fails once PF state table is full. */ int udptest(int s) { int i, ret; for (i = 0; i <= 3; i++) { if (write(s, "X", 1) == 1) ret = 1; else ret = -1; } return (ret); } void set_common_sockopts(int s, int af) { int x = 1; if (Sflag) { if (setsockopt(s, IPPROTO_TCP, TCP_MD5SIG, &x, sizeof(x)) == -1) err(1, NULL); } if (Dflag) { if (setsockopt(s, SOL_SOCKET, SO_DEBUG, &x, sizeof(x)) == -1) err(1, NULL); } if (Tflag != -1) { int proto, option; if (af == AF_INET6) { proto = IPPROTO_IPV6; option = IPV6_TCLASS; } else { proto = IPPROTO_IP; option = IP_TOS; } if (setsockopt(s, proto, option, &Tflag, sizeof(Tflag)) == -1) err(1, "set IP ToS"); } if (Iflag) { if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &Iflag, sizeof(Iflag)) == -1) err(1, "set TCP receive buffer size"); } if (Oflag) { if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &Oflag, sizeof(Oflag)) == -1) err(1, "set TCP send buffer size"); } if (FreeBSD_Oflag) { if (setsockopt(s, IPPROTO_TCP, TCP_NOOPT, &FreeBSD_Oflag, sizeof(FreeBSD_Oflag)) == -1) err(1, "disable TCP options"); } +#ifdef IPSEC + if (ipsec_policy[0] != NULL) + add_ipsec_policy(s, af, ipsec_policy[0]); + if (ipsec_policy[1] != NULL) + add_ipsec_policy(s, af, ipsec_policy[1]); +#endif } int map_tos(char *s, int *val) { /* DiffServ Codepoints and other TOS mappings */ const struct toskeywords { const char *keyword; int val; } *t, toskeywords[] = { { "af11", IPTOS_DSCP_AF11 }, { "af12", IPTOS_DSCP_AF12 }, { "af13", IPTOS_DSCP_AF13 }, { "af21", IPTOS_DSCP_AF21 }, { "af22", IPTOS_DSCP_AF22 }, { "af23", IPTOS_DSCP_AF23 }, { "af31", IPTOS_DSCP_AF31 }, { "af32", IPTOS_DSCP_AF32 }, { "af33", IPTOS_DSCP_AF33 }, { "af41", IPTOS_DSCP_AF41 }, { "af42", IPTOS_DSCP_AF42 }, { "af43", IPTOS_DSCP_AF43 }, { "critical", IPTOS_PREC_CRITIC_ECP }, { "cs0", IPTOS_DSCP_CS0 }, { "cs1", IPTOS_DSCP_CS1 }, { "cs2", IPTOS_DSCP_CS2 }, { "cs3", IPTOS_DSCP_CS3 }, { "cs4", IPTOS_DSCP_CS4 }, { "cs5", IPTOS_DSCP_CS5 }, { "cs6", IPTOS_DSCP_CS6 }, { "cs7", IPTOS_DSCP_CS7 }, { "ef", IPTOS_DSCP_EF }, { "inetcontrol", IPTOS_PREC_INTERNETCONTROL }, { "lowdelay", IPTOS_LOWDELAY }, { "netcontrol", IPTOS_PREC_NETCONTROL }, { "reliability", IPTOS_RELIABILITY }, { "throughput", IPTOS_THROUGHPUT }, { NULL, -1 }, }; for (t = toskeywords; t->keyword != NULL; t++) { if (strcmp(s, t->keyword) == 0) { *val = t->val; return (1); } } return (0); } void report_connect(const struct sockaddr *sa, socklen_t salen) { char remote_host[NI_MAXHOST]; char remote_port[NI_MAXSERV]; int herr; int flags = NI_NUMERICSERV; if (nflag) flags |= NI_NUMERICHOST; if ((herr = getnameinfo(sa, salen, remote_host, sizeof(remote_host), remote_port, sizeof(remote_port), flags)) != 0) { if (herr == EAI_SYSTEM) err(1, "getnameinfo"); else errx(1, "getnameinfo: %s", gai_strerror(herr)); } fprintf(stderr, "Connection from %s %s " "received!\n", remote_host, remote_port); } void help(void) { usage(0); fprintf(stderr, "\tCommand Summary:\n\ \t-4 Use IPv4\n\ \t-6 Use IPv6\n\ \t-D Enable the debug socket option\n\ \t-d Detach from stdin\n"); #ifdef IPSEC fprintf(stderr, "\ \t-E Use IPsec ESP\n\ \t-e policy Use specified IPsec policy\n"); #endif fprintf(stderr, "\ \t-F Pass socket fd\n\ \t-h This help text\n\ \t-I length TCP receive buffer length\n\ \t-i secs\t Delay interval for lines sent, ports scanned\n\ \t-k Keep inbound sockets open for multiple connects\n\ \t-l Listen mode, for inbound connects\n\ \t-N Shutdown the network socket after EOF on stdin\n\ \t-n Suppress name/port resolutions\n\ \t--no-tcpopt Disable TCP options\n\ \t-O length TCP send buffer length\n\ \t-P proxyuser\tUsername for proxy authentication\n\ \t-p port\t Specify local port for remote connects\n\ \t-r Randomize remote ports\n\ \t-S Enable the TCP MD5 signature option\n\ \t-s addr\t Local source address\n\ \t-T toskeyword\tSet IP Type of Service\n\ \t-t Answer TELNET negotiation\n\ \t-U Use UNIX domain socket\n\ \t-u UDP mode\n\ \t-V rtable Specify alternate routing table\n\ \t-v Verbose\n\ \t-w secs\t Timeout for connects and final net reads\n\ \t-X proto Proxy protocol: \"4\", \"5\" (SOCKS) or \"connect\"\n\ \t-x addr[:port]\tSpecify proxy address and port\n\ \t-z Zero-I/O mode [used for scanning]\n\ Port numbers can be individual or ranges: lo-hi [inclusive]\n"); #ifdef IPSEC fprintf(stderr, "See ipsec_set_policy(3) for -e argument format\n"); #endif exit(1); } #ifdef IPSEC void -add_ipsec_policy(int s, char *policy) +add_ipsec_policy(int s, int af, char *policy) { char *raw; int e; raw = ipsec_set_policy(policy, strlen(policy)); if (raw == NULL) errx(1, "ipsec_set_policy `%s': %s", policy, ipsec_strerror()); - e = setsockopt(s, IPPROTO_IP, IP_IPSEC_POLICY, raw, - ipsec_get_policylen(raw)); + if (af == AF_INET) + e = setsockopt(s, IPPROTO_IP, IP_IPSEC_POLICY, raw, + ipsec_get_policylen(raw)); + if (af == AF_INET6) + e = setsockopt(s, IPPROTO_IPV6, IPV6_IPSEC_POLICY, raw, + ipsec_get_policylen(raw)); if (e < 0) err(1, "ipsec policy cannot be configured"); free(raw); if (vflag) fprintf(stderr, "ipsec policy configured: `%s'\n", policy); return; } #endif /* IPSEC */ void usage(int ret) { fprintf(stderr, #ifdef IPSEC "usage: nc [-46DdEFhklNnrStUuvz] [-e policy] [-I length] [-i interval] [-O length]\n" #else "usage: nc [-46DdFhklNnrStUuvz] [-I length] [-i interval] [-O length]\n" #endif "\t [-P proxy_username] [-p source_port] [-s source] [-T ToS]\n" "\t [-V rtable] [-w timeout] [-X proxy_protocol]\n" "\t [-x proxy_address[:port]] [destination] [port]\n"); if (ret) exit(1); } diff --git a/lib/libipsec/pfkey.c b/lib/libipsec/pfkey.c index fe907b394960..34a3ac300989 100644 --- a/lib/libipsec/pfkey.c +++ b/lib/libipsec/pfkey.c @@ -1,2138 +1,2134 @@ /* $KAME: pfkey.c,v 1.46 2003/08/26 03:37:06 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, 1998, and 1999 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include "ipsec_strerror.h" #include "libpfkey.h" #define CALLOC(size, cast) (cast)calloc(1, (size)) static int findsupportedmap(int); static int setsupportedmap(struct sadb_supported *); static struct sadb_alg *findsupportedalg(u_int, u_int); static int pfkey_send_x1(int, u_int, u_int, u_int, struct sockaddr *, struct sockaddr *, u_int32_t, u_int32_t, u_int, caddr_t, u_int, u_int, u_int, u_int, u_int, u_int32_t, u_int32_t, u_int32_t, u_int32_t, u_int32_t); static int pfkey_send_x2(int, u_int, u_int, u_int, struct sockaddr *, struct sockaddr *, u_int32_t); static int pfkey_send_x3(int, u_int, u_int); static int pfkey_send_x4(int, u_int, struct sockaddr *, u_int, struct sockaddr *, u_int, u_int, u_int64_t, u_int64_t, char *, int, u_int32_t); static int pfkey_send_x5(int, u_int, u_int32_t); static caddr_t pfkey_setsadbmsg(caddr_t, caddr_t, u_int, u_int, u_int, u_int32_t, pid_t); static caddr_t pfkey_setsadbsa(caddr_t, caddr_t, u_int32_t, u_int, u_int, u_int, u_int32_t); static caddr_t pfkey_setsadbaddr(caddr_t, caddr_t, u_int, struct sockaddr *, u_int, u_int); static caddr_t pfkey_setsadbkey(caddr_t, caddr_t, u_int, caddr_t, u_int); static caddr_t pfkey_setsadblifetime(caddr_t, caddr_t, u_int, u_int32_t, u_int32_t, u_int32_t, u_int32_t); static caddr_t pfkey_setsadbxsa2(caddr_t, caddr_t, u_int32_t, u_int32_t); /* * make and search supported algorithm structure. */ static struct sadb_supported *ipsec_supported[] = { NULL, NULL, NULL, NULL }; static int supported_map[] = { SADB_SATYPE_AH, SADB_SATYPE_ESP, SADB_X_SATYPE_IPCOMP, SADB_X_SATYPE_TCPSIGNATURE }; static int findsupportedmap(satype) int satype; { int i; for (i = 0; i < sizeof(supported_map)/sizeof(supported_map[0]); i++) if (supported_map[i] == satype) return i; return -1; } static struct sadb_alg * findsupportedalg(satype, alg_id) u_int satype, alg_id; { int algno; int tlen; caddr_t p; /* validity check */ algno = findsupportedmap(satype); if (algno == -1) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return NULL; } if (ipsec_supported[algno] == NULL) { __ipsec_errcode = EIPSEC_DO_GET_SUPP_LIST; return NULL; } tlen = ipsec_supported[algno]->sadb_supported_len - sizeof(struct sadb_supported); p = (caddr_t)(ipsec_supported[algno] + 1); while (tlen > 0) { if (tlen < sizeof(struct sadb_alg)) { /* invalid format */ break; } if (((struct sadb_alg *)p)->sadb_alg_id == alg_id) return (struct sadb_alg *)p; tlen -= sizeof(struct sadb_alg); p += sizeof(struct sadb_alg); } __ipsec_errcode = EIPSEC_NOT_SUPPORTED; return NULL; } static int setsupportedmap(sup) struct sadb_supported *sup; { struct sadb_supported **ipsup; switch (sup->sadb_supported_exttype) { case SADB_EXT_SUPPORTED_AUTH: ipsup = &ipsec_supported[findsupportedmap(SADB_SATYPE_AH)]; break; case SADB_EXT_SUPPORTED_ENCRYPT: ipsup = &ipsec_supported[findsupportedmap(SADB_SATYPE_ESP)]; break; default: __ipsec_errcode = EIPSEC_INVAL_SATYPE; return -1; } if (*ipsup) free(*ipsup); *ipsup = malloc(sup->sadb_supported_len); if (!*ipsup) { __ipsec_set_strerror(strerror(errno)); return -1; } memcpy(*ipsup, sup, sup->sadb_supported_len); return 0; } /* * check key length against algorithm specified. * This function is called with SADB_EXT_SUPPORTED_{AUTH,ENCRYPT} as the * augument, and only calls to ipsec_check_keylen2(); * keylen is the unit of bit. * OUT: * -1: invalid. * 0: valid. */ int ipsec_check_keylen(supported, alg_id, keylen) u_int supported; u_int alg_id; u_int keylen; { int satype; /* validity check */ switch (supported) { case SADB_EXT_SUPPORTED_AUTH: satype = SADB_SATYPE_AH; break; case SADB_EXT_SUPPORTED_ENCRYPT: satype = SADB_SATYPE_ESP; break; default: __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } return ipsec_check_keylen2(satype, alg_id, keylen); } /* * check key length against algorithm specified. * satype is one of satype defined at pfkeyv2.h. * keylen is the unit of bit. * OUT: * -1: invalid. * 0: valid. */ int ipsec_check_keylen2(satype, alg_id, keylen) u_int satype; u_int alg_id; u_int keylen; { struct sadb_alg *alg; alg = findsupportedalg(satype, alg_id); if (!alg) return -1; if (keylen < alg->sadb_alg_minbits || keylen > alg->sadb_alg_maxbits) { __ipsec_errcode = EIPSEC_INVAL_KEYLEN; return -1; } __ipsec_errcode = EIPSEC_NO_ERROR; return 0; } /* * get max/min key length against algorithm specified. * satype is one of satype defined at pfkeyv2.h. * keylen is the unit of bit. * OUT: * -1: invalid. * 0: valid. */ int ipsec_get_keylen(supported, alg_id, alg0) u_int supported, alg_id; struct sadb_alg *alg0; { struct sadb_alg *alg; u_int satype; /* validity check */ if (!alg0) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } switch (supported) { case SADB_EXT_SUPPORTED_AUTH: satype = SADB_SATYPE_AH; break; case SADB_EXT_SUPPORTED_ENCRYPT: satype = SADB_SATYPE_ESP; break; default: __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } alg = findsupportedalg(satype, alg_id); if (!alg) return -1; memcpy(alg0, alg, sizeof(*alg0)); __ipsec_errcode = EIPSEC_NO_ERROR; return 0; } /* * set the rate for SOFT lifetime against HARD one. * If rate is more than 100 or equal to zero, then set to 100. */ static u_int soft_lifetime_allocations_rate = PFKEY_SOFT_LIFETIME_RATE; static u_int soft_lifetime_bytes_rate = PFKEY_SOFT_LIFETIME_RATE; static u_int soft_lifetime_addtime_rate = PFKEY_SOFT_LIFETIME_RATE; static u_int soft_lifetime_usetime_rate = PFKEY_SOFT_LIFETIME_RATE; u_int pfkey_set_softrate(type, rate) u_int type, rate; { __ipsec_errcode = EIPSEC_NO_ERROR; if (rate > 100 || rate == 0) rate = 100; switch (type) { case SADB_X_LIFETIME_ALLOCATIONS: soft_lifetime_allocations_rate = rate; return 0; case SADB_X_LIFETIME_BYTES: soft_lifetime_bytes_rate = rate; return 0; case SADB_X_LIFETIME_ADDTIME: soft_lifetime_addtime_rate = rate; return 0; case SADB_X_LIFETIME_USETIME: soft_lifetime_usetime_rate = rate; return 0; } __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return 1; } /* * get current rate for SOFT lifetime against HARD one. * ATTENTION: ~0 is returned if invalid type was passed. */ u_int pfkey_get_softrate(type) u_int type; { switch (type) { case SADB_X_LIFETIME_ALLOCATIONS: return soft_lifetime_allocations_rate; case SADB_X_LIFETIME_BYTES: return soft_lifetime_bytes_rate; case SADB_X_LIFETIME_ADDTIME: return soft_lifetime_addtime_rate; case SADB_X_LIFETIME_USETIME: return soft_lifetime_usetime_rate; } return ~0; } /* * sending SADB_GETSPI message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_getspi(so, satype, mode, src, dst, min, max, reqid, seq) int so; u_int satype, mode; struct sockaddr *src, *dst; u_int32_t min, max, reqid, seq; { struct sadb_msg *newmsg; caddr_t ep; int len; int need_spirange = 0; caddr_t p; int plen; /* validity check */ if (src == NULL || dst == NULL) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } if (src->sa_family != dst->sa_family) { __ipsec_errcode = EIPSEC_FAMILY_MISMATCH; return -1; } if (min > max || (min > 0 && min <= 255)) { __ipsec_errcode = EIPSEC_INVAL_SPI; return -1; } switch (src->sa_family) { case AF_INET: plen = sizeof(struct in_addr) << 3; break; case AF_INET6: plen = sizeof(struct in6_addr) << 3; break; default: __ipsec_errcode = EIPSEC_INVAL_FAMILY; return -1; } /* create new sadb_msg to send. */ len = sizeof(struct sadb_msg) + sizeof(struct sadb_x_sa2) + sizeof(struct sadb_address) + PFKEY_ALIGN8(src->sa_len) + sizeof(struct sadb_address) + PFKEY_ALIGN8(dst->sa_len); if (min > 255 && max < ~0) { need_spirange++; len += sizeof(struct sadb_spirange); } if ((newmsg = CALLOC(len, struct sadb_msg *)) == NULL) { __ipsec_set_strerror(strerror(errno)); return -1; } ep = ((caddr_t)newmsg) + len; p = pfkey_setsadbmsg((caddr_t)newmsg, ep, SADB_GETSPI, len, satype, seq, getpid()); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbxsa2(p, ep, mode, reqid); if (!p) { free(newmsg); return -1; } /* set sadb_address for source */ p = pfkey_setsadbaddr(p, ep, SADB_EXT_ADDRESS_SRC, src, plen, IPSEC_ULPROTO_ANY); if (!p) { free(newmsg); return -1; } /* set sadb_address for destination */ p = pfkey_setsadbaddr(p, ep, SADB_EXT_ADDRESS_DST, dst, plen, IPSEC_ULPROTO_ANY); if (!p) { free(newmsg); return -1; } /* proccessing spi range */ if (need_spirange) { struct sadb_spirange spirange; if (p + sizeof(spirange) > ep) { free(newmsg); return -1; } memset(&spirange, 0, sizeof(spirange)); spirange.sadb_spirange_len = PFKEY_UNIT64(sizeof(spirange)); spirange.sadb_spirange_exttype = SADB_EXT_SPIRANGE; spirange.sadb_spirange_min = min; spirange.sadb_spirange_max = max; memcpy(p, &spirange, sizeof(spirange)); p += sizeof(spirange); } if (p != ep) { free(newmsg); return -1; } /* send message */ len = pfkey_send(so, newmsg, len); free(newmsg); if (len < 0) return -1; __ipsec_errcode = EIPSEC_NO_ERROR; return len; } /* * sending SADB_UPDATE message to the kernel. * The length of key material is a_keylen + e_keylen. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_update(so, satype, mode, src, dst, spi, reqid, wsize, keymat, e_type, e_keylen, a_type, a_keylen, flags, l_alloc, l_bytes, l_addtime, l_usetime, seq) int so; u_int satype, mode, wsize; struct sockaddr *src, *dst; u_int32_t spi, reqid; caddr_t keymat; u_int e_type, e_keylen, a_type, a_keylen, flags; u_int32_t l_alloc; u_int64_t l_bytes, l_addtime, l_usetime; u_int32_t seq; { int len; if ((len = pfkey_send_x1(so, SADB_UPDATE, satype, mode, src, dst, spi, reqid, wsize, keymat, e_type, e_keylen, a_type, a_keylen, flags, l_alloc, l_bytes, l_addtime, l_usetime, seq)) < 0) return -1; return len; } /* * sending SADB_ADD message to the kernel. * The length of key material is a_keylen + e_keylen. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_add(so, satype, mode, src, dst, spi, reqid, wsize, keymat, e_type, e_keylen, a_type, a_keylen, flags, l_alloc, l_bytes, l_addtime, l_usetime, seq) int so; u_int satype, mode, wsize; struct sockaddr *src, *dst; u_int32_t spi, reqid; caddr_t keymat; u_int e_type, e_keylen, a_type, a_keylen, flags; u_int32_t l_alloc; u_int64_t l_bytes, l_addtime, l_usetime; u_int32_t seq; { int len; if ((len = pfkey_send_x1(so, SADB_ADD, satype, mode, src, dst, spi, reqid, wsize, keymat, e_type, e_keylen, a_type, a_keylen, flags, l_alloc, l_bytes, l_addtime, l_usetime, seq)) < 0) return -1; return len; } /* * sending SADB_DELETE message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_delete(so, satype, mode, src, dst, spi) int so; u_int satype, mode; struct sockaddr *src, *dst; u_int32_t spi; { int len; if ((len = pfkey_send_x2(so, SADB_DELETE, satype, mode, src, dst, spi)) < 0) return -1; return len; } /* * sending SADB_DELETE without spi to the kernel. This is * the "delete all" request (an extension also present in * Solaris). * * OUT: * positive: success and return length sent * -1 : error occured, and set errno */ int pfkey_send_delete_all(so, satype, mode, src, dst) int so; u_int satype, mode; struct sockaddr *src, *dst; { struct sadb_msg *newmsg; int len; caddr_t p; int plen; caddr_t ep; /* validity check */ if (src == NULL || dst == NULL) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } if (src->sa_family != dst->sa_family) { __ipsec_errcode = EIPSEC_FAMILY_MISMATCH; return -1; } switch (src->sa_family) { case AF_INET: plen = sizeof(struct in_addr) << 3; break; case AF_INET6: plen = sizeof(struct in6_addr) << 3; break; default: __ipsec_errcode = EIPSEC_INVAL_FAMILY; return -1; } /* create new sadb_msg to reply. */ len = sizeof(struct sadb_msg) + sizeof(struct sadb_address) + PFKEY_ALIGN8(src->sa_len) + sizeof(struct sadb_address) + PFKEY_ALIGN8(dst->sa_len); if ((newmsg = CALLOC(len, struct sadb_msg *)) == NULL) { __ipsec_set_strerror(strerror(errno)); return -1; } ep = ((caddr_t)newmsg) + len; p = pfkey_setsadbmsg((caddr_t)newmsg, ep, SADB_DELETE, len, satype, 0, getpid()); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbaddr(p, ep, SADB_EXT_ADDRESS_SRC, src, plen, IPSEC_ULPROTO_ANY); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbaddr(p, ep, SADB_EXT_ADDRESS_DST, dst, plen, IPSEC_ULPROTO_ANY); if (!p || p != ep) { free(newmsg); return -1; } /* send message */ len = pfkey_send(so, newmsg, len); free(newmsg); if (len < 0) return -1; __ipsec_errcode = EIPSEC_NO_ERROR; return len; } /* * sending SADB_GET message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_get(so, satype, mode, src, dst, spi) int so; u_int satype, mode; struct sockaddr *src, *dst; u_int32_t spi; { int len; if ((len = pfkey_send_x2(so, SADB_GET, satype, mode, src, dst, spi)) < 0) return -1; return len; } /* * sending SADB_REGISTER message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_register(so, satype) int so; u_int satype; { int len, algno; if (satype == SADB_SATYPE_UNSPEC) { for (algno = 0; algno < sizeof(supported_map)/sizeof(supported_map[0]); algno++) { if (ipsec_supported[algno]) { free(ipsec_supported[algno]); ipsec_supported[algno] = NULL; } } } else { algno = findsupportedmap(satype); if (algno == -1) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } if (ipsec_supported[algno]) { free(ipsec_supported[algno]); ipsec_supported[algno] = NULL; } } if ((len = pfkey_send_x3(so, SADB_REGISTER, satype)) < 0) return -1; return len; } /* * receiving SADB_REGISTER message from the kernel, and copy buffer for * sadb_supported returned into ipsec_supported. * OUT: * 0: success and return length sent. * -1: error occured, and set errno. */ int pfkey_recv_register(so) int so; { pid_t pid = getpid(); struct sadb_msg *newmsg; int error = -1; /* receive message */ for (;;) { if ((newmsg = pfkey_recv(so)) == NULL) return -1; if (newmsg->sadb_msg_type == SADB_REGISTER && newmsg->sadb_msg_pid == pid) break; free(newmsg); } /* check and fix */ newmsg->sadb_msg_len = PFKEY_UNUNIT64(newmsg->sadb_msg_len); error = pfkey_set_supported(newmsg, newmsg->sadb_msg_len); free(newmsg); if (error == 0) __ipsec_errcode = EIPSEC_NO_ERROR; return error; } /* * receiving SADB_REGISTER message from the kernel, and copy buffer for * sadb_supported returned into ipsec_supported. * NOTE: sadb_msg_len must be host order. * IN: * tlen: msg length, it's to makeing sure. * OUT: * 0: success and return length sent. * -1: error occured, and set errno. */ int pfkey_set_supported(msg, tlen) struct sadb_msg *msg; int tlen; { struct sadb_supported *sup; caddr_t p; caddr_t ep; /* validity */ if (msg->sadb_msg_len != tlen) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } p = (caddr_t)msg; ep = p + tlen; p += sizeof(struct sadb_msg); while (p < ep) { sup = (struct sadb_supported *)p; if (ep < p + sizeof(*sup) || PFKEY_EXTLEN(sup) < sizeof(*sup) || ep < p + sup->sadb_supported_len) { /* invalid format */ break; } switch (sup->sadb_supported_exttype) { case SADB_EXT_SUPPORTED_AUTH: case SADB_EXT_SUPPORTED_ENCRYPT: break; default: __ipsec_errcode = EIPSEC_INVAL_SATYPE; return -1; } /* fixed length */ sup->sadb_supported_len = PFKEY_EXTLEN(sup); /* set supported map */ if (setsupportedmap(sup) != 0) return -1; p += sup->sadb_supported_len; } if (p != ep) { __ipsec_errcode = EIPSEC_INVAL_SATYPE; return -1; } __ipsec_errcode = EIPSEC_NO_ERROR; return 0; } /* * sending SADB_FLUSH message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_flush(so, satype) int so; u_int satype; { int len; if ((len = pfkey_send_x3(so, SADB_FLUSH, satype)) < 0) return -1; return len; } /* * sending SADB_DUMP message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_dump(so, satype) int so; u_int satype; { int len; if ((len = pfkey_send_x3(so, SADB_DUMP, satype)) < 0) return -1; return len; } /* * sending SADB_X_PROMISC message to the kernel. * NOTE that this function handles promisc mode toggle only. * IN: * flag: set promisc off if zero, set promisc on if non-zero. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. * 0 : error occured, and set errno. * others: a pointer to new allocated buffer in which supported * algorithms is. */ int pfkey_send_promisc_toggle(so, flag) int so; int flag; { int len; if ((len = pfkey_send_x3(so, SADB_X_PROMISC, (flag ? 1 : 0))) < 0) return -1; return len; } /* * sending SADB_X_SPDADD message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_spdadd(so, src, prefs, dst, prefd, proto, policy, policylen, seq) int so; struct sockaddr *src, *dst; u_int prefs, prefd, proto; caddr_t policy; int policylen; u_int32_t seq; { int len; if ((len = pfkey_send_x4(so, SADB_X_SPDADD, src, prefs, dst, prefd, proto, 0, 0, policy, policylen, seq)) < 0) return -1; return len; } /* * sending SADB_X_SPDADD message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_spdadd2(so, src, prefs, dst, prefd, proto, ltime, vtime, policy, policylen, seq) int so; struct sockaddr *src, *dst; u_int prefs, prefd, proto; u_int64_t ltime, vtime; caddr_t policy; int policylen; u_int32_t seq; { int len; if ((len = pfkey_send_x4(so, SADB_X_SPDADD, src, prefs, dst, prefd, proto, ltime, vtime, policy, policylen, seq)) < 0) return -1; return len; } /* * sending SADB_X_SPDUPDATE message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_spdupdate(so, src, prefs, dst, prefd, proto, policy, policylen, seq) int so; struct sockaddr *src, *dst; u_int prefs, prefd, proto; caddr_t policy; int policylen; u_int32_t seq; { int len; if ((len = pfkey_send_x4(so, SADB_X_SPDUPDATE, src, prefs, dst, prefd, proto, 0, 0, policy, policylen, seq)) < 0) return -1; return len; } /* * sending SADB_X_SPDUPDATE message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_spdupdate2(so, src, prefs, dst, prefd, proto, ltime, vtime, policy, policylen, seq) int so; struct sockaddr *src, *dst; u_int prefs, prefd, proto; u_int64_t ltime, vtime; caddr_t policy; int policylen; u_int32_t seq; { int len; if ((len = pfkey_send_x4(so, SADB_X_SPDUPDATE, src, prefs, dst, prefd, proto, ltime, vtime, policy, policylen, seq)) < 0) return -1; return len; } /* * sending SADB_X_SPDDELETE message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_spddelete(so, src, prefs, dst, prefd, proto, policy, policylen, seq) int so; struct sockaddr *src, *dst; u_int prefs, prefd, proto; caddr_t policy; int policylen; u_int32_t seq; { int len; if (policylen != sizeof(struct sadb_x_policy)) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } if ((len = pfkey_send_x4(so, SADB_X_SPDDELETE, src, prefs, dst, prefd, proto, 0, 0, policy, policylen, seq)) < 0) return -1; return len; } /* * sending SADB_X_SPDDELETE message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_spddelete2(so, spid) int so; u_int32_t spid; { int len; if ((len = pfkey_send_x5(so, SADB_X_SPDDELETE2, spid)) < 0) return -1; return len; } /* * sending SADB_X_SPDGET message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_spdget(so, spid) int so; u_int32_t spid; { int len; if ((len = pfkey_send_x5(so, SADB_X_SPDGET, spid)) < 0) return -1; return len; } /* * sending SADB_X_SPDSETIDX message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_spdsetidx(so, src, prefs, dst, prefd, proto, policy, policylen, seq) int so; struct sockaddr *src, *dst; u_int prefs, prefd, proto; caddr_t policy; int policylen; u_int32_t seq; { int len; if (policylen != sizeof(struct sadb_x_policy)) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } if ((len = pfkey_send_x4(so, SADB_X_SPDSETIDX, src, prefs, dst, prefd, proto, 0, 0, policy, policylen, seq)) < 0) return -1; return len; } /* * sending SADB_SPDFLUSH message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_spdflush(so) int so; { int len; if ((len = pfkey_send_x3(so, SADB_X_SPDFLUSH, SADB_SATYPE_UNSPEC)) < 0) return -1; return len; } /* * sending SADB_SPDDUMP message to the kernel. * OUT: * positive: success and return length sent. * -1 : error occured, and set errno. */ int pfkey_send_spddump(so) int so; { int len; if ((len = pfkey_send_x3(so, SADB_X_SPDDUMP, SADB_SATYPE_UNSPEC)) < 0) return -1; return len; } /* sending SADB_ADD or SADB_UPDATE message to the kernel */ static int pfkey_send_x1(so, type, satype, mode, src, dst, spi, reqid, wsize, keymat, e_type, e_keylen, a_type, a_keylen, flags, l_alloc, l_bytes, l_addtime, l_usetime, seq) int so; u_int type, satype, mode; struct sockaddr *src, *dst; u_int32_t spi, reqid; u_int wsize; caddr_t keymat; u_int e_type, e_keylen, a_type, a_keylen, flags; u_int32_t l_alloc, l_bytes, l_addtime, l_usetime, seq; { struct sadb_msg *newmsg; int len; caddr_t p; int plen; caddr_t ep; /* validity check */ if (src == NULL || dst == NULL) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } if (src->sa_family != dst->sa_family) { __ipsec_errcode = EIPSEC_FAMILY_MISMATCH; return -1; } switch (src->sa_family) { case AF_INET: plen = sizeof(struct in_addr) << 3; break; case AF_INET6: plen = sizeof(struct in6_addr) << 3; break; default: __ipsec_errcode = EIPSEC_INVAL_FAMILY; return -1; } switch (satype) { case SADB_SATYPE_ESP: if (e_type == SADB_EALG_NONE) { __ipsec_errcode = EIPSEC_NO_ALGS; return -1; } break; case SADB_SATYPE_AH: if (e_type != SADB_EALG_NONE) { __ipsec_errcode = EIPSEC_INVAL_ALGS; return -1; } if (a_type == SADB_AALG_NONE) { __ipsec_errcode = EIPSEC_NO_ALGS; return -1; } break; case SADB_X_SATYPE_IPCOMP: if (e_type == SADB_X_CALG_NONE) { __ipsec_errcode = EIPSEC_INVAL_ALGS; return -1; } if (a_type != SADB_AALG_NONE) { __ipsec_errcode = EIPSEC_NO_ALGS; return -1; } break; case SADB_X_SATYPE_TCPSIGNATURE: if (e_type != SADB_EALG_NONE) { __ipsec_errcode = EIPSEC_INVAL_ALGS; return -1; } if (a_type != SADB_X_AALG_TCP_MD5) { __ipsec_errcode = EIPSEC_INVAL_ALGS; return -1; } break; default: __ipsec_errcode = EIPSEC_INVAL_SATYPE; return -1; } /* create new sadb_msg to reply. */ len = sizeof(struct sadb_msg) + sizeof(struct sadb_sa) + sizeof(struct sadb_x_sa2) + sizeof(struct sadb_address) + PFKEY_ALIGN8(src->sa_len) + sizeof(struct sadb_address) + PFKEY_ALIGN8(dst->sa_len) + sizeof(struct sadb_lifetime) + sizeof(struct sadb_lifetime); if (e_type != SADB_EALG_NONE) len += (sizeof(struct sadb_key) + PFKEY_ALIGN8(e_keylen)); if (a_type != SADB_AALG_NONE) len += (sizeof(struct sadb_key) + PFKEY_ALIGN8(a_keylen)); if ((newmsg = CALLOC(len, struct sadb_msg *)) == NULL) { __ipsec_set_strerror(strerror(errno)); return -1; } ep = ((caddr_t)newmsg) + len; p = pfkey_setsadbmsg((caddr_t)newmsg, ep, type, len, satype, seq, getpid()); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbsa(p, ep, spi, wsize, a_type, e_type, flags); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbxsa2(p, ep, mode, reqid); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbaddr(p, ep, SADB_EXT_ADDRESS_SRC, src, plen, IPSEC_ULPROTO_ANY); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbaddr(p, ep, SADB_EXT_ADDRESS_DST, dst, plen, IPSEC_ULPROTO_ANY); if (!p) { free(newmsg); return -1; } if (e_type != SADB_EALG_NONE) { p = pfkey_setsadbkey(p, ep, SADB_EXT_KEY_ENCRYPT, keymat, e_keylen); if (!p) { free(newmsg); return -1; } } if (a_type != SADB_AALG_NONE) { p = pfkey_setsadbkey(p, ep, SADB_EXT_KEY_AUTH, keymat + e_keylen, a_keylen); if (!p) { free(newmsg); return -1; } } /* set sadb_lifetime for destination */ p = pfkey_setsadblifetime(p, ep, SADB_EXT_LIFETIME_HARD, l_alloc, l_bytes, l_addtime, l_usetime); if (!p) { free(newmsg); return -1; } p = pfkey_setsadblifetime(p, ep, SADB_EXT_LIFETIME_SOFT, l_alloc, l_bytes, l_addtime, l_usetime); if (!p || p != ep) { free(newmsg); return -1; } /* send message */ len = pfkey_send(so, newmsg, len); free(newmsg); if (len < 0) return -1; __ipsec_errcode = EIPSEC_NO_ERROR; return len; } /* sending SADB_DELETE or SADB_GET message to the kernel */ static int pfkey_send_x2(so, type, satype, mode, src, dst, spi) int so; u_int type, satype, mode; struct sockaddr *src, *dst; u_int32_t spi; { struct sadb_msg *newmsg; int len; caddr_t p; int plen; caddr_t ep; /* validity check */ if (src == NULL || dst == NULL) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } if (src->sa_family != dst->sa_family) { __ipsec_errcode = EIPSEC_FAMILY_MISMATCH; return -1; } switch (src->sa_family) { case AF_INET: plen = sizeof(struct in_addr) << 3; break; case AF_INET6: plen = sizeof(struct in6_addr) << 3; break; default: __ipsec_errcode = EIPSEC_INVAL_FAMILY; return -1; } /* create new sadb_msg to reply. */ len = sizeof(struct sadb_msg) + sizeof(struct sadb_sa) + sizeof(struct sadb_address) + PFKEY_ALIGN8(src->sa_len) + sizeof(struct sadb_address) + PFKEY_ALIGN8(dst->sa_len); if ((newmsg = CALLOC(len, struct sadb_msg *)) == NULL) { __ipsec_set_strerror(strerror(errno)); return -1; } ep = ((caddr_t)newmsg) + len; p = pfkey_setsadbmsg((caddr_t)newmsg, ep, type, len, satype, 0, getpid()); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbsa(p, ep, spi, 0, 0, 0, 0); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbaddr(p, ep, SADB_EXT_ADDRESS_SRC, src, plen, IPSEC_ULPROTO_ANY); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbaddr(p, ep, SADB_EXT_ADDRESS_DST, dst, plen, IPSEC_ULPROTO_ANY); if (!p || p != ep) { free(newmsg); return -1; } /* send message */ len = pfkey_send(so, newmsg, len); free(newmsg); if (len < 0) return -1; __ipsec_errcode = EIPSEC_NO_ERROR; return len; } /* * sending SADB_REGISTER, SADB_FLUSH, SADB_DUMP or SADB_X_PROMISC message * to the kernel */ static int pfkey_send_x3(so, type, satype) int so; u_int type, satype; { struct sadb_msg *newmsg; int len; caddr_t p; caddr_t ep; /* validity check */ switch (type) { case SADB_X_PROMISC: if (satype != 0 && satype != 1) { __ipsec_errcode = EIPSEC_INVAL_SATYPE; return -1; } break; default: switch (satype) { case SADB_SATYPE_UNSPEC: case SADB_SATYPE_AH: case SADB_SATYPE_ESP: case SADB_X_SATYPE_IPCOMP: case SADB_X_SATYPE_TCPSIGNATURE: break; default: __ipsec_errcode = EIPSEC_INVAL_SATYPE; return -1; } } /* create new sadb_msg to send. */ len = sizeof(struct sadb_msg); if ((newmsg = CALLOC(len, struct sadb_msg *)) == NULL) { __ipsec_set_strerror(strerror(errno)); return -1; } ep = ((caddr_t)newmsg) + len; p = pfkey_setsadbmsg((caddr_t)newmsg, ep, type, len, satype, 0, getpid()); if (!p || p != ep) { free(newmsg); return -1; } /* send message */ len = pfkey_send(so, newmsg, len); free(newmsg); if (len < 0) return -1; __ipsec_errcode = EIPSEC_NO_ERROR; return len; } /* sending SADB_X_SPDADD message to the kernel */ static int pfkey_send_x4(so, type, src, prefs, dst, prefd, proto, ltime, vtime, policy, policylen, seq) int so; struct sockaddr *src, *dst; u_int type, prefs, prefd, proto; u_int64_t ltime, vtime; char *policy; int policylen; u_int32_t seq; { struct sadb_msg *newmsg; int len; caddr_t p; int plen; caddr_t ep; /* validity check */ if (src == NULL || dst == NULL) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } if (src->sa_family != dst->sa_family) { __ipsec_errcode = EIPSEC_FAMILY_MISMATCH; return -1; } switch (src->sa_family) { case AF_INET: plen = sizeof(struct in_addr) << 3; break; case AF_INET6: plen = sizeof(struct in6_addr) << 3; break; default: __ipsec_errcode = EIPSEC_INVAL_FAMILY; return -1; } if (prefs > plen || prefd > plen) { __ipsec_errcode = EIPSEC_INVAL_PREFIXLEN; return -1; } /* create new sadb_msg to reply. */ len = sizeof(struct sadb_msg) + sizeof(struct sadb_address) + PFKEY_ALIGN8(src->sa_len) + sizeof(struct sadb_address) + PFKEY_ALIGN8(src->sa_len) + sizeof(struct sadb_lifetime) + policylen; if ((newmsg = CALLOC(len, struct sadb_msg *)) == NULL) { __ipsec_set_strerror(strerror(errno)); return -1; } ep = ((caddr_t)newmsg) + len; p = pfkey_setsadbmsg((caddr_t)newmsg, ep, type, len, SADB_SATYPE_UNSPEC, seq, getpid()); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbaddr(p, ep, SADB_EXT_ADDRESS_SRC, src, prefs, proto); if (!p) { free(newmsg); return -1; } p = pfkey_setsadbaddr(p, ep, SADB_EXT_ADDRESS_DST, dst, prefd, proto); if (!p) { free(newmsg); return -1; } p = pfkey_setsadblifetime(p, ep, SADB_EXT_LIFETIME_HARD, 0, 0, ltime, vtime); if (!p || p + policylen != ep) { free(newmsg); return -1; } memcpy(p, policy, policylen); /* send message */ len = pfkey_send(so, newmsg, len); free(newmsg); if (len < 0) return -1; __ipsec_errcode = EIPSEC_NO_ERROR; return len; } /* sending SADB_X_SPDGET or SADB_X_SPDDELETE message to the kernel */ static int pfkey_send_x5(so, type, spid) int so; u_int type; u_int32_t spid; { struct sadb_msg *newmsg; struct sadb_x_policy xpl; int len; caddr_t p; caddr_t ep; /* create new sadb_msg to reply. */ len = sizeof(struct sadb_msg) + sizeof(xpl); if ((newmsg = CALLOC(len, struct sadb_msg *)) == NULL) { __ipsec_set_strerror(strerror(errno)); return -1; } ep = ((caddr_t)newmsg) + len; p = pfkey_setsadbmsg((caddr_t)newmsg, ep, type, len, SADB_SATYPE_UNSPEC, 0, getpid()); if (!p) { free(newmsg); return -1; } if (p + sizeof(xpl) != ep) { free(newmsg); return -1; } memset(&xpl, 0, sizeof(xpl)); xpl.sadb_x_policy_len = PFKEY_UNIT64(sizeof(xpl)); xpl.sadb_x_policy_exttype = SADB_X_EXT_POLICY; xpl.sadb_x_policy_id = spid; memcpy(p, &xpl, sizeof(xpl)); /* send message */ len = pfkey_send(so, newmsg, len); free(newmsg); if (len < 0) return -1; __ipsec_errcode = EIPSEC_NO_ERROR; return len; } /* * open a socket. * OUT: * -1: fail. * others : success and return value of socket. */ int pfkey_open() { int so; const int bufsiz = 128 * 1024; /*is 128K enough?*/ if ((so = socket(PF_KEY, SOCK_RAW, PF_KEY_V2)) < 0) { __ipsec_set_strerror(strerror(errno)); return -1; } /* * This is a temporary workaround for KAME PR 154. * Don't really care even if it fails. */ (void)setsockopt(so, SOL_SOCKET, SO_SNDBUF, &bufsiz, sizeof(bufsiz)); (void)setsockopt(so, SOL_SOCKET, SO_RCVBUF, &bufsiz, sizeof(bufsiz)); __ipsec_errcode = EIPSEC_NO_ERROR; return so; } /* * close a socket. * OUT: * 0: success. * -1: fail. */ void pfkey_close(so) int so; { (void)close(so); __ipsec_errcode = EIPSEC_NO_ERROR; return; } /* * receive sadb_msg data, and return pointer to new buffer allocated. * Must free this buffer later. * OUT: * NULL : error occured. * others : a pointer to sadb_msg structure. * * XXX should be rewritten to pass length explicitly */ struct sadb_msg * pfkey_recv(so) int so; { struct sadb_msg buf, *newmsg; int len, reallen; while ((len = recv(so, (caddr_t)&buf, sizeof(buf), MSG_PEEK)) < 0) { if (errno == EINTR) continue; __ipsec_set_strerror(strerror(errno)); return NULL; } if (len < sizeof(buf)) { recv(so, (caddr_t)&buf, sizeof(buf), 0); __ipsec_errcode = EIPSEC_MAX; return NULL; } /* read real message */ reallen = PFKEY_UNUNIT64(buf.sadb_msg_len); if ((newmsg = CALLOC(reallen, struct sadb_msg *)) == NULL) { __ipsec_set_strerror(strerror(errno)); return NULL; } while ((len = recv(so, (caddr_t)newmsg, reallen, 0)) < 0) { if (errno == EINTR) continue; __ipsec_set_strerror(strerror(errno)); free(newmsg); return NULL; } if (len != reallen) { __ipsec_errcode = EIPSEC_SYSTEM_ERROR; free(newmsg); return NULL; } /* don't trust what the kernel says, validate! */ if (PFKEY_UNUNIT64(newmsg->sadb_msg_len) != len) { __ipsec_errcode = EIPSEC_SYSTEM_ERROR; free(newmsg); return NULL; } __ipsec_errcode = EIPSEC_NO_ERROR; return newmsg; } /* * send message to a socket. * OUT: * others: success and return length sent. * -1 : fail. */ int pfkey_send(so, msg, len) int so; struct sadb_msg *msg; int len; { if ((len = send(so, (caddr_t)msg, len, 0)) < 0) { __ipsec_set_strerror(strerror(errno)); return -1; } __ipsec_errcode = EIPSEC_NO_ERROR; return len; } /* * %%% Utilities * NOTE: These functions are derived from netkey/key.c in KAME. */ /* * set the pointer to each header in this message buffer. * IN: msg: pointer to message buffer. * mhp: pointer to the buffer initialized like below: * caddr_t mhp[SADB_EXT_MAX + 1]; * OUT: -1: invalid. * 0: valid. * * XXX should be rewritten to obtain length explicitly */ int pfkey_align(msg, mhp) struct sadb_msg *msg; caddr_t *mhp; { struct sadb_ext *ext; int i; caddr_t p; caddr_t ep; /* XXX should be passed from upper layer */ /* validity check */ if (msg == NULL || mhp == NULL) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } /* initialize */ for (i = 0; i < SADB_EXT_MAX + 1; i++) mhp[i] = NULL; mhp[0] = (caddr_t)msg; /* initialize */ p = (caddr_t) msg; ep = p + PFKEY_UNUNIT64(msg->sadb_msg_len); /* skip base header */ p += sizeof(struct sadb_msg); while (p < ep) { ext = (struct sadb_ext *)p; if (ep < p + sizeof(*ext) || PFKEY_EXTLEN(ext) < sizeof(*ext) || ep < p + PFKEY_EXTLEN(ext)) { /* invalid format */ break; } /* duplicate check */ /* XXX Are there duplication either KEY_AUTH or KEY_ENCRYPT ?*/ if (mhp[ext->sadb_ext_type] != NULL) { __ipsec_errcode = EIPSEC_INVAL_EXTTYPE; return -1; } /* set pointer */ switch (ext->sadb_ext_type) { case SADB_EXT_SA: case SADB_EXT_LIFETIME_CURRENT: case SADB_EXT_LIFETIME_HARD: case SADB_EXT_LIFETIME_SOFT: case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: case SADB_EXT_KEY_AUTH: /* XXX should to be check weak keys. */ case SADB_EXT_KEY_ENCRYPT: /* XXX should to be check weak keys. */ case SADB_EXT_IDENTITY_SRC: case SADB_EXT_IDENTITY_DST: case SADB_EXT_SENSITIVITY: case SADB_EXT_PROPOSAL: case SADB_EXT_SUPPORTED_AUTH: case SADB_EXT_SUPPORTED_ENCRYPT: case SADB_EXT_SPIRANGE: case SADB_X_EXT_POLICY: case SADB_X_EXT_SA2: - case SADB_X_EXT_SA_REPLAY: - mhp[ext->sadb_ext_type] = (caddr_t)ext; - break; case SADB_X_EXT_NAT_T_TYPE: case SADB_X_EXT_NAT_T_SPORT: case SADB_X_EXT_NAT_T_DPORT: - /* case SADB_X_EXT_NAT_T_OA: is OAI */ case SADB_X_EXT_NAT_T_OAI: case SADB_X_EXT_NAT_T_OAR: case SADB_X_EXT_NAT_T_FRAG: - if (feature_present("ipsec_natt")) { - mhp[ext->sadb_ext_type] = (caddr_t)ext; - break; - } - /* FALLTHROUGH */ + case SADB_X_EXT_SA_REPLAY: + case SADB_X_EXT_NEW_ADDRESS_SRC: + case SADB_X_EXT_NEW_ADDRESS_DST: + mhp[ext->sadb_ext_type] = (caddr_t)ext; + break; default: __ipsec_errcode = EIPSEC_INVAL_EXTTYPE; return -1; } p += PFKEY_EXTLEN(ext); } if (p != ep) { __ipsec_errcode = EIPSEC_INVAL_SADBMSG; return -1; } __ipsec_errcode = EIPSEC_NO_ERROR; return 0; } /* * check basic usage for sadb_msg, * NOTE: This routine is derived from netkey/key.c in KAME. * IN: msg: pointer to message buffer. * mhp: pointer to the buffer initialized like below: * * caddr_t mhp[SADB_EXT_MAX + 1]; * * OUT: -1: invalid. * 0: valid. */ int pfkey_check(mhp) caddr_t *mhp; { struct sadb_msg *msg; /* validity check */ if (mhp == NULL || mhp[0] == NULL) { __ipsec_errcode = EIPSEC_INVAL_ARGUMENT; return -1; } msg = (struct sadb_msg *)mhp[0]; /* check version */ if (msg->sadb_msg_version != PF_KEY_V2) { __ipsec_errcode = EIPSEC_INVAL_VERSION; return -1; } /* check type */ if (msg->sadb_msg_type > SADB_MAX) { __ipsec_errcode = EIPSEC_INVAL_MSGTYPE; return -1; } /* check SA type */ switch (msg->sadb_msg_satype) { case SADB_SATYPE_UNSPEC: switch (msg->sadb_msg_type) { case SADB_GETSPI: case SADB_UPDATE: case SADB_ADD: case SADB_DELETE: case SADB_GET: case SADB_ACQUIRE: case SADB_EXPIRE: __ipsec_errcode = EIPSEC_INVAL_SATYPE; return -1; } break; case SADB_SATYPE_ESP: case SADB_SATYPE_AH: case SADB_X_SATYPE_IPCOMP: case SADB_X_SATYPE_TCPSIGNATURE: switch (msg->sadb_msg_type) { case SADB_X_SPDADD: case SADB_X_SPDDELETE: case SADB_X_SPDGET: case SADB_X_SPDDUMP: case SADB_X_SPDFLUSH: __ipsec_errcode = EIPSEC_INVAL_SATYPE; return -1; } break; case SADB_SATYPE_RSVP: case SADB_SATYPE_OSPFV2: case SADB_SATYPE_RIPV2: case SADB_SATYPE_MIP: __ipsec_errcode = EIPSEC_NOT_SUPPORTED; return -1; case 1: /* XXX: What does it do ? */ if (msg->sadb_msg_type == SADB_X_PROMISC) break; /*FALLTHROUGH*/ default: __ipsec_errcode = EIPSEC_INVAL_SATYPE; return -1; } /* check field of upper layer protocol and address family */ if (mhp[SADB_EXT_ADDRESS_SRC] != NULL && mhp[SADB_EXT_ADDRESS_DST] != NULL) { struct sadb_address *src0, *dst0; src0 = (struct sadb_address *)(mhp[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp[SADB_EXT_ADDRESS_DST]); if (src0->sadb_address_proto != dst0->sadb_address_proto) { __ipsec_errcode = EIPSEC_PROTO_MISMATCH; return -1; } if (PFKEY_ADDR_SADDR(src0)->sa_family != PFKEY_ADDR_SADDR(dst0)->sa_family) { __ipsec_errcode = EIPSEC_FAMILY_MISMATCH; return -1; } switch (PFKEY_ADDR_SADDR(src0)->sa_family) { case AF_INET: case AF_INET6: break; default: __ipsec_errcode = EIPSEC_INVAL_FAMILY; return -1; } /* * prefixlen == 0 is valid because there must be the case * all addresses are matched. */ } __ipsec_errcode = EIPSEC_NO_ERROR; return 0; } /* * set data into sadb_msg. * `buf' must has been allocated sufficiently. */ static caddr_t pfkey_setsadbmsg(buf, lim, type, tlen, satype, seq, pid) caddr_t buf; caddr_t lim; u_int type, satype; u_int tlen; u_int32_t seq; pid_t pid; { struct sadb_msg *p; u_int len; p = (struct sadb_msg *)buf; len = sizeof(struct sadb_msg); if (buf + len > lim) return NULL; memset(p, 0, len); p->sadb_msg_version = PF_KEY_V2; p->sadb_msg_type = type; p->sadb_msg_errno = 0; p->sadb_msg_satype = satype; p->sadb_msg_len = PFKEY_UNIT64(tlen); p->sadb_msg_reserved = 0; p->sadb_msg_seq = seq; p->sadb_msg_pid = (u_int32_t)pid; return(buf + len); } /* * copy secasvar data into sadb_address. * `buf' must has been allocated sufficiently. */ static caddr_t pfkey_setsadbsa(buf, lim, spi, wsize, auth, enc, flags) caddr_t buf; caddr_t lim; u_int32_t spi, flags; u_int wsize, auth, enc; { struct sadb_sa *p; u_int len; p = (struct sadb_sa *)buf; len = sizeof(struct sadb_sa); if (buf + len > lim) return NULL; memset(p, 0, len); p->sadb_sa_len = PFKEY_UNIT64(len); p->sadb_sa_exttype = SADB_EXT_SA; p->sadb_sa_spi = spi; p->sadb_sa_replay = wsize; p->sadb_sa_state = SADB_SASTATE_LARVAL; p->sadb_sa_auth = auth; p->sadb_sa_encrypt = enc; p->sadb_sa_flags = flags; return(buf + len); } /* * set data into sadb_address. * `buf' must has been allocated sufficiently. * prefixlen is in bits. */ static caddr_t pfkey_setsadbaddr(buf, lim, exttype, saddr, prefixlen, ul_proto) caddr_t buf; caddr_t lim; u_int exttype; struct sockaddr *saddr; u_int prefixlen; u_int ul_proto; { struct sadb_address *p; u_int len; p = (struct sadb_address *)buf; len = sizeof(struct sadb_address) + PFKEY_ALIGN8(saddr->sa_len); if (buf + len > lim) return NULL; memset(p, 0, len); p->sadb_address_len = PFKEY_UNIT64(len); p->sadb_address_exttype = exttype & 0xffff; p->sadb_address_proto = ul_proto & 0xff; p->sadb_address_prefixlen = prefixlen; p->sadb_address_reserved = 0; memcpy(p + 1, saddr, saddr->sa_len); return(buf + len); } /* * set sadb_key structure after clearing buffer with zero. * OUT: the pointer of buf + len. */ static caddr_t pfkey_setsadbkey(buf, lim, type, key, keylen) caddr_t buf; caddr_t lim; caddr_t key; u_int type, keylen; { struct sadb_key *p; u_int len; p = (struct sadb_key *)buf; len = sizeof(struct sadb_key) + PFKEY_ALIGN8(keylen); if (buf + len > lim) return NULL; memset(p, 0, len); p->sadb_key_len = PFKEY_UNIT64(len); p->sadb_key_exttype = type; p->sadb_key_bits = keylen << 3; p->sadb_key_reserved = 0; memcpy(p + 1, key, keylen); return buf + len; } /* * set sadb_lifetime structure after clearing buffer with zero. * OUT: the pointer of buf + len. */ static caddr_t pfkey_setsadblifetime(buf, lim, type, l_alloc, l_bytes, l_addtime, l_usetime) caddr_t buf; caddr_t lim; u_int type; u_int32_t l_alloc, l_bytes, l_addtime, l_usetime; { struct sadb_lifetime *p; u_int len; p = (struct sadb_lifetime *)buf; len = sizeof(struct sadb_lifetime); if (buf + len > lim) return NULL; memset(p, 0, len); p->sadb_lifetime_len = PFKEY_UNIT64(len); p->sadb_lifetime_exttype = type; switch (type) { case SADB_EXT_LIFETIME_SOFT: p->sadb_lifetime_allocations = (l_alloc * soft_lifetime_allocations_rate) /100; p->sadb_lifetime_bytes = (l_bytes * soft_lifetime_bytes_rate) /100; p->sadb_lifetime_addtime = (l_addtime * soft_lifetime_addtime_rate) /100; p->sadb_lifetime_usetime = (l_usetime * soft_lifetime_usetime_rate) /100; break; case SADB_EXT_LIFETIME_HARD: p->sadb_lifetime_allocations = l_alloc; p->sadb_lifetime_bytes = l_bytes; p->sadb_lifetime_addtime = l_addtime; p->sadb_lifetime_usetime = l_usetime; break; } return buf + len; } /* * copy secasvar data into sadb_address. * `buf' must has been allocated sufficiently. */ static caddr_t pfkey_setsadbxsa2(buf, lim, mode0, reqid) caddr_t buf; caddr_t lim; u_int32_t mode0; u_int32_t reqid; { struct sadb_x_sa2 *p; u_int8_t mode = mode0 & 0xff; u_int len; p = (struct sadb_x_sa2 *)buf; len = sizeof(struct sadb_x_sa2); if (buf + len > lim) return NULL; memset(p, 0, len); p->sadb_x_sa2_len = PFKEY_UNIT64(len); p->sadb_x_sa2_exttype = SADB_X_EXT_SA2; p->sadb_x_sa2_mode = mode; p->sadb_x_sa2_reqid = reqid; return(buf + len); } diff --git a/lib/libipsec/pfkey_dump.c b/lib/libipsec/pfkey_dump.c index 04df27ea07b1..83a003405ca3 100644 --- a/lib/libipsec/pfkey_dump.c +++ b/lib/libipsec/pfkey_dump.c @@ -1,640 +1,668 @@ /* $KAME: pfkey_dump.c,v 1.45 2003/09/08 10:14:56 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, 1998, and 1999 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ipsec_strerror.h" #include "libpfkey.h" /* cope with old kame headers - ugly */ #ifndef SADB_X_AALG_MD5 #define SADB_X_AALG_MD5 SADB_AALG_MD5 #endif #ifndef SADB_X_AALG_SHA #define SADB_X_AALG_SHA SADB_AALG_SHA #endif #ifndef SADB_X_AALG_NULL #define SADB_X_AALG_NULL SADB_AALG_NULL #endif #ifndef SADB_X_EALG_BLOWFISHCBC #define SADB_X_EALG_BLOWFISHCBC SADB_EALG_BLOWFISHCBC #endif #ifndef SADB_X_EALG_CAST128CBC #define SADB_X_EALG_CAST128CBC SADB_EALG_CAST128CBC #endif #ifndef SADB_X_EALG_RC5CBC #ifdef SADB_EALG_RC5CBC #define SADB_X_EALG_RC5CBC SADB_EALG_RC5CBC #endif #endif #define GETMSGSTR(str, num) \ do { \ if (sizeof((str)[0]) == 0 \ || num >= sizeof(str)/sizeof((str)[0])) \ printf("%u ", (num)); \ else if (strlen((str)[(num)]) == 0) \ printf("%u ", (num)); \ else \ printf("%s ", (str)[(num)]); \ } while (0) #define GETMSGV2S(v2s, num) \ do { \ struct val2str *p; \ for (p = (v2s); p && p->str; p++) { \ if (p->val == (num)) \ break; \ } \ if (p && p->str) \ printf("%s ", p->str); \ else \ printf("%u ", (num)); \ } while (0) static char *str_ipaddr(struct sockaddr *); static char *str_prefport(u_int, u_int, u_int, u_int); static void str_upperspec(u_int, u_int, u_int); static char *str_time(time_t); static void str_lifetime_byte(struct sadb_lifetime *, char *); struct val2str { int val; const char *str; }; /* * Must to be re-written about following strings. */ static char *str_satype[] = { "unspec", "unknown", "ah", "esp", "unknown", "rsvp", "ospfv2", "ripv2", "mip", "ipcomp", "policy", "tcp" }; static char *str_mode[] = { "any", "transport", "tunnel", }; static char *str_state[] = { "larval", "mature", "dying", "dead", }; static struct val2str str_alg_auth[] = { { SADB_AALG_NONE, "none", }, { SADB_AALG_MD5HMAC, "hmac-md5", }, { SADB_AALG_SHA1HMAC, "hmac-sha1", }, { SADB_X_AALG_MD5, "md5", }, { SADB_X_AALG_SHA, "sha", }, { SADB_X_AALG_NULL, "null", }, { SADB_X_AALG_TCP_MD5, "tcp-md5", }, #ifdef SADB_X_AALG_SHA2_256 { SADB_X_AALG_SHA2_256, "hmac-sha2-256", }, #endif #ifdef SADB_X_AALG_SHA2_384 { SADB_X_AALG_SHA2_384, "hmac-sha2-384", }, #endif #ifdef SADB_X_AALG_SHA2_512 { SADB_X_AALG_SHA2_512, "hmac-sha2-512", }, #endif #ifdef SADB_X_AALG_RIPEMD160HMAC { SADB_X_AALG_RIPEMD160HMAC, "hmac-ripemd160", }, #endif #ifdef SADB_X_AALG_AES_XCBC_MAC { SADB_X_AALG_AES_XCBC_MAC, "aes-xcbc-mac", }, #endif { -1, NULL, }, }; static struct val2str str_alg_enc[] = { { SADB_EALG_NONE, "none", }, { SADB_EALG_DESCBC, "des-cbc", }, { SADB_EALG_3DESCBC, "3des-cbc", }, { SADB_EALG_NULL, "null", }, #ifdef SADB_X_EALG_RC5CBC { SADB_X_EALG_RC5CBC, "rc5-cbc", }, #endif { SADB_X_EALG_CAST128CBC, "cast128-cbc", }, { SADB_X_EALG_BLOWFISHCBC, "blowfish-cbc", }, #ifdef SADB_X_EALG_RIJNDAELCBC { SADB_X_EALG_RIJNDAELCBC, "rijndael-cbc", }, #endif #ifdef SADB_X_EALG_TWOFISHCBC { SADB_X_EALG_TWOFISHCBC, "twofish-cbc", }, #endif #ifdef SADB_X_EALG_AESCTR { SADB_X_EALG_AESCTR, "aes-ctr", }, #endif #ifdef SADB_X_EALG_AESGCM16 { SADB_X_EALG_AESGCM16, "aes-gcm-16", }, #endif #ifdef SADB_X_EALG_CAMELLIACBC { SADB_X_EALG_CAMELLIACBC, "camellia-cbc", }, #endif { -1, NULL, }, }; static struct val2str str_alg_comp[] = { { SADB_X_CALG_NONE, "none", }, { SADB_X_CALG_OUI, "oui", }, { SADB_X_CALG_DEFLATE, "deflate", }, { SADB_X_CALG_LZS, "lzs", }, { -1, NULL, }, }; /* * dump SADB_MSG formated. For debugging, you should use kdebug_sadb(). */ void pfkey_sadump(m) struct sadb_msg *m; { caddr_t mhp[SADB_EXT_MAX + 1]; struct sadb_sa *m_sa; struct sadb_x_sa2 *m_sa2; struct sadb_lifetime *m_lftc, *m_lfth, *m_lfts; struct sadb_address *m_saddr, *m_daddr, *m_paddr; struct sadb_key *m_auth, *m_enc; struct sadb_ident *m_sid, *m_did; struct sadb_sens *m_sens; struct sadb_x_sa_replay *m_sa_replay; + struct sadb_x_nat_t_type *natt_type; + struct sadb_x_nat_t_port *natt_sport, *natt_dport; + struct sadb_address *natt_oai, *natt_oar; /* check pfkey message. */ if (pfkey_align(m, mhp)) { printf("%s\n", ipsec_strerror()); return; } if (pfkey_check(mhp)) { printf("%s\n", ipsec_strerror()); return; } m_sa = (struct sadb_sa *)mhp[SADB_EXT_SA]; m_sa2 = (struct sadb_x_sa2 *)mhp[SADB_X_EXT_SA2]; m_lftc = (struct sadb_lifetime *)mhp[SADB_EXT_LIFETIME_CURRENT]; m_lfth = (struct sadb_lifetime *)mhp[SADB_EXT_LIFETIME_HARD]; m_lfts = (struct sadb_lifetime *)mhp[SADB_EXT_LIFETIME_SOFT]; m_saddr = (struct sadb_address *)mhp[SADB_EXT_ADDRESS_SRC]; m_daddr = (struct sadb_address *)mhp[SADB_EXT_ADDRESS_DST]; m_paddr = (struct sadb_address *)mhp[SADB_EXT_ADDRESS_PROXY]; m_auth = (struct sadb_key *)mhp[SADB_EXT_KEY_AUTH]; m_enc = (struct sadb_key *)mhp[SADB_EXT_KEY_ENCRYPT]; m_sid = (struct sadb_ident *)mhp[SADB_EXT_IDENTITY_SRC]; m_did = (struct sadb_ident *)mhp[SADB_EXT_IDENTITY_DST]; m_sens = (struct sadb_sens *)mhp[SADB_EXT_SENSITIVITY]; m_sa_replay = (struct sadb_x_sa_replay *)mhp[SADB_X_EXT_SA_REPLAY]; + natt_type = (struct sadb_x_nat_t_type *)mhp[SADB_X_EXT_NAT_T_TYPE]; + natt_sport = (struct sadb_x_nat_t_port *)mhp[SADB_X_EXT_NAT_T_SPORT]; + natt_dport = (struct sadb_x_nat_t_port *)mhp[SADB_X_EXT_NAT_T_DPORT]; + natt_oai = (struct sadb_address *)mhp[SADB_X_EXT_NAT_T_OAI]; + natt_oar = (struct sadb_address *)mhp[SADB_X_EXT_NAT_T_OAR]; + /* source address */ if (m_saddr == NULL) { printf("no ADDRESS_SRC extension.\n"); return; } - printf("%s ", str_ipaddr((struct sockaddr *)(m_saddr + 1))); + printf("%s", str_ipaddr((struct sockaddr *)(m_saddr + 1))); + if (natt_type != NULL && natt_sport != NULL) + printf("[%u]", ntohs(natt_sport->sadb_x_nat_t_port_port)); /* destination address */ if (m_daddr == NULL) { - printf("no ADDRESS_DST extension.\n"); + printf("\nno ADDRESS_DST extension.\n"); return; } - printf("%s ", str_ipaddr((struct sockaddr *)(m_daddr + 1))); + printf(" %s", str_ipaddr((struct sockaddr *)(m_daddr + 1))); + if (natt_type != NULL && natt_dport != NULL) + printf("[%u]", ntohs(natt_dport->sadb_x_nat_t_port_port)); /* SA type */ if (m_sa == NULL) { - printf("no SA extension.\n"); + printf("\nno SA extension.\n"); return; } if (m_sa2 == NULL) { - printf("no SA2 extension.\n"); + printf("\nno SA2 extension.\n"); return; } printf("\n\t"); - GETMSGSTR(str_satype, m->sadb_msg_satype); + if (m->sadb_msg_satype == SADB_SATYPE_ESP && natt_type != NULL) + printf("esp-udp "); + else + GETMSGSTR(str_satype, m->sadb_msg_satype); printf("mode="); GETMSGSTR(str_mode, m_sa2->sadb_x_sa2_mode); printf("spi=%u(0x%08x) reqid=%u(0x%08x)\n", (u_int32_t)ntohl(m_sa->sadb_sa_spi), (u_int32_t)ntohl(m_sa->sadb_sa_spi), (u_int32_t)m_sa2->sadb_x_sa2_reqid, (u_int32_t)m_sa2->sadb_x_sa2_reqid); + /* other NAT-T information */ + if (natt_type != NULL && (natt_oai != NULL || natt_oar != NULL)) { + printf("\tNAT:"); + if (natt_oai != NULL) + printf(" OAI=%s", + str_ipaddr((struct sockaddr *)(natt_oai + 1))); + if (natt_oar != NULL) + printf(" OAR=%s", + str_ipaddr((struct sockaddr *)(natt_oar + 1))); + printf("\n"); + } + /* encryption key */ if (m->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) { printf("\tC: "); GETMSGV2S(str_alg_comp, m_sa->sadb_sa_encrypt); } else if (m->sadb_msg_satype == SADB_SATYPE_ESP) { if (m_enc != NULL) { printf("\tE: "); GETMSGV2S(str_alg_enc, m_sa->sadb_sa_encrypt); ipsec_hexdump((caddr_t)m_enc + sizeof(*m_enc), m_enc->sadb_key_bits / 8); printf("\n"); } } /* authentication key */ if (m_auth != NULL) { printf("\tA: "); GETMSGV2S(str_alg_auth, m_sa->sadb_sa_auth); ipsec_hexdump((caddr_t)m_auth + sizeof(*m_auth), m_auth->sadb_key_bits / 8); printf("\n"); } /* replay windoe size & flags */ printf("\tseq=0x%08x replay=%u flags=0x%08x ", m_sa2->sadb_x_sa2_sequence, m_sa_replay ? (m_sa_replay->sadb_x_sa_replay_replay >> 3) : m_sa->sadb_sa_replay, m_sa->sadb_sa_flags); /* state */ printf("state="); GETMSGSTR(str_state, m_sa->sadb_sa_state); printf("\n"); /* lifetime */ if (m_lftc != NULL) { time_t tmp_time = time(0); printf("\tcreated: %s", str_time(m_lftc->sadb_lifetime_addtime)); printf("\tcurrent: %s\n", str_time(tmp_time)); printf("\tdiff: %lu(s)", (u_long)(m_lftc->sadb_lifetime_addtime == 0 ? 0 : (tmp_time - m_lftc->sadb_lifetime_addtime))); printf("\thard: %lu(s)", (u_long)(m_lfth == NULL ? 0 : m_lfth->sadb_lifetime_addtime)); printf("\tsoft: %lu(s)\n", (u_long)(m_lfts == NULL ? 0 : m_lfts->sadb_lifetime_addtime)); printf("\tlast: %s", str_time(m_lftc->sadb_lifetime_usetime)); printf("\thard: %lu(s)", (u_long)(m_lfth == NULL ? 0 : m_lfth->sadb_lifetime_usetime)); printf("\tsoft: %lu(s)\n", (u_long)(m_lfts == NULL ? 0 : m_lfts->sadb_lifetime_usetime)); str_lifetime_byte(m_lftc, "current"); str_lifetime_byte(m_lfth, "hard"); str_lifetime_byte(m_lfts, "soft"); printf("\n"); printf("\tallocated: %lu", (unsigned long)m_lftc->sadb_lifetime_allocations); printf("\thard: %lu", (u_long)(m_lfth == NULL ? 0 : m_lfth->sadb_lifetime_allocations)); printf("\tsoft: %lu\n", (u_long)(m_lfts == NULL ? 0 : m_lfts->sadb_lifetime_allocations)); } printf("\tsadb_seq=%lu pid=%lu ", (u_long)m->sadb_msg_seq, (u_long)m->sadb_msg_pid); /* XXX DEBUG */ printf("refcnt=%u\n", m->sadb_msg_reserved); return; } void pfkey_spdump(m) struct sadb_msg *m; { char pbuf[NI_MAXSERV]; caddr_t mhp[SADB_EXT_MAX + 1]; struct sadb_address *m_saddr, *m_daddr; struct sadb_x_policy *m_xpl; struct sadb_lifetime *m_lftc = NULL, *m_lfth = NULL; struct sockaddr *sa; u_int16_t sport = 0, dport = 0; /* check pfkey message. */ if (pfkey_align(m, mhp)) { printf("%s\n", ipsec_strerror()); return; } if (pfkey_check(mhp)) { printf("%s\n", ipsec_strerror()); return; } m_saddr = (struct sadb_address *)mhp[SADB_EXT_ADDRESS_SRC]; m_daddr = (struct sadb_address *)mhp[SADB_EXT_ADDRESS_DST]; m_xpl = (struct sadb_x_policy *)mhp[SADB_X_EXT_POLICY]; m_lftc = (struct sadb_lifetime *)mhp[SADB_EXT_LIFETIME_CURRENT]; m_lfth = (struct sadb_lifetime *)mhp[SADB_EXT_LIFETIME_HARD]; if (m_saddr && m_daddr) { /* source address */ sa = (struct sockaddr *)(m_saddr + 1); switch (sa->sa_family) { case AF_INET: case AF_INET6: if (getnameinfo(sa, sa->sa_len, NULL, 0, pbuf, sizeof(pbuf), NI_NUMERICSERV) != 0) sport = 0; /*XXX*/ else sport = atoi(pbuf); printf("%s%s ", str_ipaddr(sa), str_prefport(sa->sa_family, m_saddr->sadb_address_prefixlen, sport, m_saddr->sadb_address_proto)); break; default: printf("unknown-af "); break; } /* destination address */ sa = (struct sockaddr *)(m_daddr + 1); switch (sa->sa_family) { case AF_INET: case AF_INET6: if (getnameinfo(sa, sa->sa_len, NULL, 0, pbuf, sizeof(pbuf), NI_NUMERICSERV) != 0) dport = 0; /*XXX*/ else dport = atoi(pbuf); printf("%s%s ", str_ipaddr(sa), str_prefport(sa->sa_family, m_daddr->sadb_address_prefixlen, dport, m_saddr->sadb_address_proto)); break; default: printf("unknown-af "); break; } /* upper layer protocol */ if (m_saddr->sadb_address_proto != m_daddr->sadb_address_proto) { printf("upper layer protocol mismatched.\n"); return; } str_upperspec(m_saddr->sadb_address_proto, sport, dport); } else printf("(no selector, probably per-socket policy) "); /* policy */ { char *d_xpl; if (m_xpl == NULL) { printf("no X_POLICY extension.\n"); return; } d_xpl = ipsec_dump_policy((char *)m_xpl, "\n\t"); /* dump SPD */ printf("\n\t%s\n", d_xpl); free(d_xpl); } /* lifetime */ if (m_lftc) { printf("\tcreated: %s ", str_time(m_lftc->sadb_lifetime_addtime)); printf("lastused: %s\n", str_time(m_lftc->sadb_lifetime_usetime)); } if (m_lfth) { printf("\tlifetime: %lu(s) ", (u_long)m_lfth->sadb_lifetime_addtime); printf("validtime: %lu(s)\n", (u_long)m_lfth->sadb_lifetime_usetime); } printf("\tspid=%ld seq=%ld pid=%ld\n", (u_long)m_xpl->sadb_x_policy_id, (u_long)m->sadb_msg_seq, (u_long)m->sadb_msg_pid); /* XXX TEST */ printf("\trefcnt=%u\n", m->sadb_msg_reserved); return; } /* * set "ipaddress" to buffer. */ static char * str_ipaddr(sa) struct sockaddr *sa; { static char buf[NI_MAXHOST]; const int niflag = NI_NUMERICHOST; if (sa == NULL) return ""; if (getnameinfo(sa, sa->sa_len, buf, sizeof(buf), NULL, 0, niflag) == 0) return buf; return NULL; } /* * set "/prefix[port number]" to buffer. */ static char * str_prefport(family, pref, port, ulp) u_int family, pref, port, ulp; { static char buf[128]; char prefbuf[128]; char portbuf[128]; int plen; switch (family) { case AF_INET: plen = sizeof(struct in_addr) << 3; break; case AF_INET6: plen = sizeof(struct in6_addr) << 3; break; default: return "?"; } if (pref == plen) prefbuf[0] = '\0'; else snprintf(prefbuf, sizeof(prefbuf), "/%u", pref); if (ulp == IPPROTO_ICMPV6) memset(portbuf, 0, sizeof(portbuf)); else { if (port == IPSEC_PORT_ANY) snprintf(portbuf, sizeof(portbuf), "[%s]", "any"); else snprintf(portbuf, sizeof(portbuf), "[%u]", port); } snprintf(buf, sizeof(buf), "%s%s", prefbuf, portbuf); return buf; } static void str_upperspec(ulp, p1, p2) u_int ulp, p1, p2; { if (ulp == IPSEC_ULPROTO_ANY) printf("any"); else if (ulp == IPPROTO_ICMPV6) { printf("icmp6"); if (!(p1 == IPSEC_PORT_ANY && p2 == IPSEC_PORT_ANY)) printf(" %u,%u", p1, p2); } else { struct protoent *ent; switch (ulp) { case IPPROTO_IPV4: printf("ip4"); break; default: ent = getprotobynumber(ulp); if (ent) printf("%s", ent->p_name); else printf("%u", ulp); endprotoent(); break; } } } /* * set "Mon Day Time Year" to buffer */ static char * str_time(t) time_t t; { static char buf[128]; if (t == 0) { int i = 0; for (;i < 20;) buf[i++] = ' '; } else { char *t0; t0 = ctime(&t); memcpy(buf, t0 + 4, 20); } buf[20] = '\0'; return(buf); } static void str_lifetime_byte(x, str) struct sadb_lifetime *x; char *str; { double y; char *unit; int w; if (x == NULL) { printf("\t%s: 0(bytes)", str); return; } #if 0 if ((x->sadb_lifetime_bytes) / 1024 / 1024) { y = (x->sadb_lifetime_bytes) * 1.0 / 1024 / 1024; unit = "M"; w = 1; } else if ((x->sadb_lifetime_bytes) / 1024) { y = (x->sadb_lifetime_bytes) * 1.0 / 1024; unit = "K"; w = 1; } else { y = (x->sadb_lifetime_bytes) * 1.0; unit = ""; w = 0; } #else y = (x->sadb_lifetime_bytes) * 1.0; unit = ""; w = 0; #endif printf("\t%s: %.*f(%sbytes)", str, w, y, unit); } diff --git a/sbin/ifconfig/Makefile b/sbin/ifconfig/Makefile index a3623d507b77..543392600395 100644 --- a/sbin/ifconfig/Makefile +++ b/sbin/ifconfig/Makefile @@ -1,73 +1,74 @@ # From: @(#)Makefile 8.1 (Berkeley) 6/5/93 # $FreeBSD$ .include PACKAGE=runtime PROG= ifconfig SRCS= ifconfig.c # base support # # NB: The order here defines the order in which the constructors # are called. This in turn defines the default order in which # status is displayed. Probably should add a priority mechanism # to the registration process so we don't depend on this aspect # of the toolchain. # SRCS+= af_link.c # LLC support .if ${MK_INET_SUPPORT} != "no" SRCS+= af_inet.c # IPv4 support .endif .if ${MK_INET6_SUPPORT} != "no" SRCS+= af_inet6.c # IPv6 support .endif .if ${MK_INET6_SUPPORT} != "no" SRCS+= af_nd6.c # ND6 support .endif SRCS+= ifclone.c # clone device support SRCS+= ifmac.c # MAC support SRCS+= ifmedia.c # SIOC[GS]IFMEDIA support SRCS+= iffib.c # non-default FIB support SRCS+= ifvlan.c # SIOC[GS]ETVLAN support SRCS+= ifvxlan.c # VXLAN support SRCS+= ifgre.c # GRE keys etc SRCS+= ifgif.c # GIF reversed header workaround +SRCS+= ifipsec.c # IPsec VTI SRCS+= sfp.c # SFP/SFP+ information LIBADD+= m SRCS+= ifieee80211.c # SIOC[GS]IEEE80211 support LIBADD+= 80211 SRCS+= carp.c # SIOC[GS]VH support SRCS+= ifgroup.c # ... .if ${MK_PF} != "no" SRCS+= ifpfsync.c # pfsync(4) support .endif SRCS+= ifbridge.c # bridge support SRCS+= iflagg.c # lagg support .if ${MK_INET6_SUPPORT} != "no" CFLAGS+= -DINET6 .endif .if ${MK_INET_SUPPORT} != "no" CFLAGS+= -DINET .endif .if ${MK_JAIL} != "no" && !defined(RELEASE_CRUNCH) && !defined(RESCUE) CFLAGS+= -DJAIL LIBADD+= jail .endif MAN= ifconfig.8 CFLAGS+= -Wall -Wmissing-prototypes -Wcast-qual -Wwrite-strings -Wnested-externs WARNS?= 2 .if ${MK_TESTS} != "no" SUBDIR+= tests .endif .include diff --git a/sbin/ifconfig/ifipsec.c b/sbin/ifconfig/ifipsec.c new file mode 100644 index 000000000000..fed5a842235c --- /dev/null +++ b/sbin/ifconfig/ifipsec.c @@ -0,0 +1,101 @@ +/*- + * Copyright (c) 2016 Yandex LLC + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ifconfig.h" + +static void +ipsec_status(int s) +{ + uint32_t reqid; + + ifr.ifr_data = (caddr_t)&reqid; + if (ioctl(s, IPSECGREQID, &ifr) == -1) + return; + printf("\treqid: %u\n", reqid); +} + +static +DECL_CMD_FUNC(setreqid, val, arg) +{ + char *ep; + uint32_t v; + + v = strtoul(val, &ep, 0); + if (*ep != '\0') { + warn("Invalid reqid value %s", val); + return; + } + ifr.ifr_data = (char *)&v; + if (ioctl(s, IPSECSREQID, &ifr) == -1) { + warn("ioctl(IPSECSREQID)"); + return; + } +} + +static struct cmd ipsec_cmds[] = { + DEF_CMD_ARG("reqid", setreqid), +}; + +static struct afswtch af_ipsec = { + .af_name = "af_ipsec", + .af_af = AF_UNSPEC, + .af_other_status = ipsec_status, +}; + +static __constructor void +ipsec_ctor(void) +{ + size_t i; + + for (i = 0; i < nitems(ipsec_cmds); i++) + cmd_register(&ipsec_cmds[i]); + af_register(&af_ipsec); +#undef N +} diff --git a/sbin/setkey/setkey.8 b/sbin/setkey/setkey.8 index 1eab81490ad9..4eff17783642 100644 --- a/sbin/setkey/setkey.8 +++ b/sbin/setkey/setkey.8 @@ -1,731 +1,729 @@ .\" $KAME: setkey.8,v 1.89 2003/09/07 22:17:41 itojun Exp $ .\" .\" Copyright (C) 1995, 1996, 1997, 1998, and 1999 WIDE Project. .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the project nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd October 3, 2016 +.Dd February 6, 2017 .Dt SETKEY 8 .Os .\" .Sh NAME .Nm setkey .Nd "manually manipulate the IPsec SA/SP database" .\" .Sh SYNOPSIS .Nm .Op Fl v .Fl c .Nm .Op Fl v .Fl f Ar filename .Nm .Op Fl aPlv .Fl D .Nm .Op Fl Pv .Fl F .Nm .Op Fl h .Fl x .\" .Sh DESCRIPTION The .Nm utility adds, updates, dumps, or flushes Security Association Database (SAD) entries as well as Security Policy Database (SPD) entries in the kernel. .Pp The .Nm utility takes a series of operations from the standard input (if invoked with .Fl c ) or the file named .Ar filename (if invoked with .Fl f Ar filename ) . .Bl -tag -width indent .It Fl D Dump the SAD entries. If with .Fl P , the SPD entries are dumped. .It Fl F Flush the SAD entries. If with .Fl P , the SPD entries are flushed. .It Fl a The .Nm utility usually does not display dead SAD entries with .Fl D . If with .Fl a , the dead SAD entries will be displayed as well. A dead SAD entry means that it has been expired but remains in the system because it is referenced by some SPD entries. .It Fl h Add hexadecimal dump on .Fl x mode. .It Fl l Loop forever with short output on .Fl D . .It Fl v Be verbose. The program will dump messages exchanged on .Dv PF_KEY socket, including messages sent from other processes to the kernel. .It Fl x Loop forever and dump all the messages transmitted to .Dv PF_KEY socket. .Fl xx makes each timestamp unformatted. .El .Ss Configuration syntax With .Fl c or .Fl f on the command line, .Nm accepts the following configuration syntax. Lines starting with hash signs .Pq Ql # are treated as comment lines. .Bl -tag -width indent .It Xo .Li add .Op Fl 46n .Ar src Ar dst Ar protocol Ar spi .Op Ar extensions .Ar algorithm ... .Li \&; .Xc Add an SAD entry. .Li add can fail with multiple reasons, including when the key length does not match the specified algorithm. .\" .It Xo .Li get .Op Fl 46n .Ar src Ar dst Ar protocol Ar spi .Li \&; .Xc Show an SAD entry. .\" .It Xo .Li delete .Op Fl 46n .Ar src Ar dst Ar protocol Ar spi .Li \&; .Xc Remove an SAD entry. .\" .It Xo .Li deleteall .Op Fl 46n .Ar src Ar dst Ar protocol .Li \&; .Xc Remove all SAD entries that match the specification. .\" .It Xo .Li flush .Op Ar protocol .Li \&; .Xc Clear all SAD entries matched by the options. .Fl F on the command line achieves the same functionality. .\" .It Xo .Li dump .Op Ar protocol .Li \&; .Xc Dumps all SAD entries matched by the options. .Fl D on the command line achieves the same functionality. .\" .It Xo .Li spdadd .Op Fl 46n .Ar src_range Ar dst_range Ar upperspec Ar policy .Li \&; .Xc Add an SPD entry. .\" .It Xo .Li spddelete .Op Fl 46n .Ar src_range Ar dst_range Ar upperspec Fl P Ar direction .Li \&; .Xc Delete an SPD entry. .\" .It Xo .Li spdflush .Li \&; .Xc Clear all SPD entries. .Fl FP on the command line achieves the same functionality. .\" .It Xo .Li spddump .Li \&; .Xc Dumps all SPD entries. .Fl DP on the command line achieves the same functionality. .El .\" .Pp Meta-arguments are as follows: .Pp .Bl -tag -compact -width indent .It Ar src .It Ar dst Source/destination of the secure communication is specified as IPv4/v6 address. The .Nm utility can resolve a FQDN into numeric addresses. If the FQDN resolves into multiple addresses, .Nm will install multiple SAD/SPD entries into the kernel by trying all possible combinations. .Fl 4 , .Fl 6 and .Fl n restricts the address resolution of FQDN in certain ways. .Fl 4 and .Fl 6 restrict results into IPv4/v6 addresses only, respectively. .Fl n avoids FQDN resolution and requires addresses to be numeric addresses. .\" .Pp .It Ar protocol .Ar protocol is one of following: .Bl -tag -width Fl -compact .It Li esp ESP based on rfc2406 .It Li esp-old ESP based on rfc1827 .It Li ah AH based on rfc2402 .It Li ah-old AH based on rfc1826 .It Li ipcomp IPComp .It Li tcp TCP-MD5 based on rfc2385 .El .\" .Pp .It Ar spi Security Parameter Index (SPI) for the SAD and the SPD. .Ar spi must be a decimal number, or a hexadecimal number with .Ql 0x prefix. SPI values between 0 and 255 are reserved for future use by IANA and they cannot be used. -TCP-MD5 associations must use 0x1000 and therefore only have per-host -granularity at this time. .\" .Pp .It Ar extensions take some of the following: .Bl -tag -width Fl -compact .\" .It Fl m Ar mode Specify a security protocol mode for use. .Ar mode is one of following: .Li transport , tunnel or .Li any . The default value is .Li any . .\" .It Fl r Ar size Specify window size of bytes for replay prevention. .Ar size must be decimal number in 32-bit word. If .Ar size is zero or not specified, replay check does not take place. .\" .It Fl u Ar id Specify the identifier of the policy entry in SPD. See .Ar policy . .\" .It Fl f Ar pad_option defines the content of the ESP padding. .Ar pad_option is one of following: .Bl -tag -width random-pad -compact .It Li zero-pad All of the padding are zero. .It Li random-pad A series of randomized values are set. .It Li seq-pad A series of sequential increasing numbers started from 1 are set. .El .\" .It Fl f Li nocyclic-seq Do not allow cyclic sequence number. .\" .It Fl lh Ar time .It Fl ls Ar time Specify hard/soft life time duration of the SA. .El .\" .Pp .It Ar algorithm .Bl -tag -width Fl -compact .It Fl E Ar ealgo Ar key Specify an encryption algorithm .Ar ealgo for ESP. .It Xo .Fl E Ar ealgo Ar key .Fl A Ar aalgo Ar key .Xc Specify a encryption algorithm .Ar ealgo , as well as a payload authentication algorithm .Ar aalgo , for ESP. .It Fl A Ar aalgo Ar key Specify an authentication algorithm for AH. .It Fl C Ar calgo Op Fl R Specify a compression algorithm for IPComp. If .Fl R is specified, the .Ar spi field value will be used as the IPComp CPI (compression parameter index) on wire as is. If .Fl R is not specified, the kernel will use well-known CPI on wire, and .Ar spi field will be used only as an index for kernel internal usage. .El .Pp .Ar key must be double-quoted character string, or a series of hexadecimal digits preceded by .Ql 0x . .Pp Possible values for .Ar ealgo , .Ar aalgo and .Ar calgo are specified in separate section. .\" .Pp .It Ar src_range .It Ar dst_range These are selections of the secure communication specified as IPv4/v6 address or IPv4/v6 address range, and it may accompany TCP/UDP port specification. This takes the following form: .Bd -unfilled .Ar address .Ar address/prefixlen .Ar address[port] .Ar address/prefixlen[port] .Ed .Pp .Ar prefixlen and .Ar port must be a decimal number. The square brackets around .Ar port are necessary and are not manpage metacharacters. For FQDN resolution, the rules applicable to .Ar src and .Ar dst apply here as well. .\" .Pp .It Ar upperspec The upper layer protocol to be used. You can use one of the words in .Pa /etc/protocols as .Ar upperspec , as well as .Li icmp6 , .Li ip4 , or .Li any . The word .Li any stands for .Dq any protocol . The protocol number may also be used to specify the .Ar upperspec . A type and code related to ICMPv6 may also be specified as an .Ar upperspec . The type is specified first, followed by a comma and then the relevant code. The specification must be placed after .Li icmp6 . The kernel considers a zero to be a wildcard but cannot distinguish between a wildcard and an ICMPv6 type which is zero. The following example shows a policy where IPSec is not required for inbound Neighbor Solicitations: .Pp .Dl "spdadd ::/0 ::/0 icmp6 135,0 -P in none;" .Pp NOTE: .Ar upperspec does not work in the forwarding case at this moment, as it requires extra reassembly at forwarding node, which is not implemented at this moment. Although there are many protocols in .Pa /etc/protocols , protocols other than TCP, UDP and ICMP may not be suitable to use with IPsec. .\" .Pp .It Ar policy .Ar policy is expressed in one of the following three formats: .Pp .Bl -tag -width 2n -compact .It Fl P Ar direction Li discard .It Fl P Ar direction Li none .It Xo Fl P Ar direction Li ipsec .Ar protocol/mode/src-dst/level Op ... .Xc .El .Pp The direction of a policy must be specified as one of: .Li out , .Li in , .Li discard , .Li none , or .Li ipsec . The .Li discard direction means that packets matching the supplied indices will be discarded while .Li none means that IPsec operations will not take place on the packet and .Li ipsec means that IPsec operation will take place onto the packet. The .Ar protocol/mode/src-dst/level statement gives the rule for how to process the packet. The .Ar protocol is specified as .Li ah , .Li esp or .Li ipcomp . The .Ar mode is either .Li transport or .Li tunnel . If .Ar mode is .Li tunnel , you must specify the end-point addresses of the SA as .Ar src and .Ar dst with a dash, .Sq - , between the addresses. If .Ar mode is .Li transport , both .Ar src and .Ar dst can be omitted. The .Ar level is one of the following: .Li default , use , require or .Li unique . If the SA is not available in every level, the kernel will request the SA from the key exchange daemon. A value of .Li default tells the kernel to use the system wide default protocol e.g.,\& the one from the .Li esp_trans_deflev sysctl variable, when the kernel processes the packet. A value of .Li use means that the kernel will use an SA if it is available, otherwise the kernel will pass the packet as it would normally. A value of .Li require means that an SA is required whenever the kernel sends a packet matched that matches the policy. The .Li unique level is the same as .Li require but, in addition, it allows the policy to bind with the unique out-bound SA. For example, if you specify the policy level .Li unique , .Xr racoon 8 will configure the SA for the policy. If you configure the SA by manual keying for that policy, you can put the decimal number as the policy identifier after .Li unique separated by colon .Ql :\& as in the following example: .Li unique:number . In order to bind this policy to the SA, .Li number must be between 1 and 32767, which corresponds to .Ar extensions Fl u of manual SA configuration. .Pp When you want to use an SA bundle, you can define multiple rules. For example, if an IP header was followed by an AH header followed by an ESP header followed by an upper layer protocol header, the rule would be: .Pp .Dl esp/transport//require ah/transport//require ; .Pp The rule order is very important. .Pp Note that .Dq Li discard and .Dq Li none are not in the syntax described in .Xr ipsec_set_policy 3 . There are small, but important, differences in the syntax. See .Xr ipsec_set_policy 3 for details. .El .\" .Sh ALGORITHMS The following list shows the supported algorithms. The .Sy protocol and .Sy algorithm are almost completely orthogonal. The following list of authentication algorithms can be used as .Ar aalgo in the .Fl A Ar aalgo of the .Ar protocol parameter: .Bd -literal -offset indent algorithm keylen (bits) comment hmac-md5 128 ah: rfc2403 128 ah-old: rfc2085 hmac-sha1 160 ah: rfc2404 160 ah-old: 128bit ICV (no document) keyed-md5 128 ah: 96bit ICV (no document) 128 ah-old: rfc1828 keyed-sha1 160 ah: 96bit ICV (no document) 160 ah-old: 128bit ICV (no document) null 0 to 2048 for debugging hmac-sha2-256 256 ah: 96bit ICV (draft-ietf-ipsec-ciph-sha-256-00) 256 ah-old: 128bit ICV (no document) hmac-sha2-384 384 ah: 96bit ICV (no document) 384 ah-old: 128bit ICV (no document) hmac-sha2-512 512 ah: 96bit ICV (no document) 512 ah-old: 128bit ICV (no document) hmac-ripemd160 160 ah: 96bit ICV (RFC2857) ah-old: 128bit ICV (no document) aes-xcbc-mac 128 ah: 96bit ICV (RFC3566) 128 ah-old: 128bit ICV (no document) tcp-md5 8 to 640 tcp: rfc2385 .Ed .Pp The following is the list of encryption algorithms that can be used as the .Ar ealgo in the .Fl E Ar ealgo of the .Ar protocol parameter: .Bd -literal -offset indent algorithm keylen (bits) comment des-cbc 64 esp-old: rfc1829, esp: rfc2405 3des-cbc 192 rfc2451 null 0 to 2048 rfc2410 blowfish-cbc 40 to 448 rfc2451 cast128-cbc 40 to 128 rfc2451 des-deriv 64 ipsec-ciph-des-derived-01 rijndael-cbc 128/192/256 rfc3602 aes-ctr 160/224/288 draft-ietf-ipsec-ciph-aes-ctr-03 aes-gcm-16 160/224/288 rfc4106 camellia-cbc 128/192/256 rfc4312 .Ed .Pp Note that the first 128/192/256 bits of a key for .Li aes-ctr or aes-gcm-16 will be used as AES key, and remaining 32 bits will be used as nonce. .Pp The following are the list of compression algorithms that can be used as the .Ar calgo in the .Fl C Ar calgo of the .Ar protocol parameter: .Bd -literal -offset indent algorithm comment deflate rfc2394 .Ed .\" .Sh EXIT STATUS .Ex -std .\" .Sh EXAMPLES Add an ESP SA between two IPv6 addresses using the des-cbc encryption algorithm. .Bd -literal -offset indent add 3ffe:501:4819::1 3ffe:501:481d::1 esp 123457 -E des-cbc 0x3ffe05014819ffff ; .Pp .Ed .\" Add an authentication SA between two FQDN specified hosts: .Bd -literal -offset indent add -6 myhost.example.com yourhost.example.com ah 123456 -A hmac-sha1 "AH SA configuration!" ; .Pp .Ed Use both ESP and AH between two numerically specified hosts: .Bd -literal -offset indent add 10.0.11.41 10.0.11.33 esp 0x10001 -E des-cbc 0x3ffe05014819ffff -A hmac-md5 "authentication!!" ; .Pp .Ed Get the SA information associated with first example above: .Bd -literal -offset indent get 3ffe:501:4819::1 3ffe:501:481d::1 ah 123456 ; .Pp .Ed Flush all entries from the database: .Bd -literal -offset indent flush ; .Pp .Ed Dump the ESP entries from the database: .Bd -literal -offset indent dump esp ; .Pp .Ed Add a security policy between two networks that uses ESP in tunnel mode: .Bd -literal -offset indent spdadd 10.0.11.41/32[21] 10.0.11.33/32[any] any -P out ipsec esp/tunnel/192.168.0.1-192.168.1.2/require ; .Pp .Ed Use TCP MD5 between two numerically specified hosts: .Bd -literal -offset indent add 10.1.10.34 10.1.10.36 tcp 0x1000 -A tcp-md5 "TCP-MD5 BGP secret" ; .Ed .\" .Sh SEE ALSO .Xr ipsec_set_policy 3 , .Xr racoon 8 , .Xr sysctl 8 .Rs .%T "Changed manual key configuration for IPsec" .%U http://www.kame.net/newsletter/19991007/ .%D "October 1999" .Re .\" .Sh HISTORY The .Nm utility first appeared in WIDE Hydrangea IPv6 protocol stack kit. The utility was completely re-designed in June 1998. It first appeared in .Fx 4.0 . .\" .Sh BUGS The .Nm utility should report and handle syntax errors better. .Pp For IPsec gateway configuration, .Ar src_range and .Ar dst_range with TCP/UDP port number do not work, as the gateway does not reassemble packets (cannot inspect upper-layer headers). diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index 816f929dc2cd..3debe5895aeb 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -1,985 +1,986 @@ # @(#)Makefile 8.1 (Berkeley) 6/18/93 # $FreeBSD$ .include PACKAGE=runtime-manuals MAN= aac.4 \ aacraid.4 \ acpi.4 \ ${_acpi_asus.4} \ ${_acpi_asus_wmi.4} \ ${_acpi_dock.4} \ ${_acpi_fujitsu.4} \ ${_acpi_hp.4} \ ${_acpi_ibm.4} \ ${_acpi_panasonic.4} \ ${_acpi_rapidstart.4} \ ${_acpi_sony.4} \ acpi_thermal.4 \ ${_acpi_toshiba.4} \ acpi_video.4 \ ${_acpi_wmi.4} \ ada.4 \ adv.4 \ adw.4 \ ae.4 \ ${_aesni.4} \ age.4 \ agp.4 \ aha.4 \ ahb.4 \ ahc.4 \ ahci.4 \ ahd.4 \ ${_aibs.4} \ aio.4 \ alc.4 \ ale.4 \ alpm.4 \ altera_atse.4 \ altera_avgen.4 \ altera_jtag_uart.4 \ altera_sdcard.4 \ altq.4 \ amdpm.4 \ ${_amdsbwd.4} \ ${_amdsmb.4} \ ${_amdtemp.4} \ ${_bxe.4} \ amr.4 \ an.4 \ ${_aout.4} \ ${_apic.4} \ arcmsr.4 \ ${_armv8crypto.4} \ ${_asmc.4} \ ata.4 \ ath.4 \ ath_ahb.4 \ ath_hal.4 \ ath_pci.4 \ atkbd.4 \ atkbdc.4 \ atp.4 \ ${_atf_test_case.4} \ ${_atrtc.4} \ ${_attimer.4} \ audit.4 \ auditpipe.4 \ aue.4 \ axe.4 \ axge.4 \ bce.4 \ bcma.4 \ bfe.4 \ bge.4 \ ${_bhyve.4} \ bhnd.4 \ bhndb.4 \ bktr.4 \ blackhole.4 \ bnxt.4 \ bpf.4 \ bridge.4 \ bt.4 \ bwi.4 \ bwn.4 \ ${_bytgpio.4} \ capsicum.4 \ cardbus.4 \ carp.4 \ cas.4 \ cc_cdg.4 \ cc_chd.4 \ cc_cubic.4 \ cc_dctcp.4 \ cc_hd.4 \ cc_htcp.4 \ cc_newreno.4 \ cc_vegas.4 \ ${_ccd.4} \ cd.4 \ cdce.4 \ cfi.4 \ ch.4 \ ciss.4 \ cloudabi.4 \ cm.4 \ cmx.4 \ ${_coretemp.4} \ ${_cpuctl.4} \ cpufreq.4 \ crypto.4 \ ctl.4 \ cue.4 \ cxgb.4 \ cxgbe.4 \ cxgbev.4 \ cy.4 \ cyapa.4 \ da.4 \ dc.4 \ dcons.4 \ dcons_crom.4 \ ddb.4 \ de.4 \ devctl.4 \ disc.4 \ divert.4 \ ${_dpms.4} \ dpt.4 \ ds1307.4 \ ds3231.4 \ ${_dtrace_io.4} \ ${_dtrace_ip.4} \ ${_dtrace_proc.4} \ ${_dtrace_sched.4} \ ${_dtrace_tcp.4} \ ${_dtrace_udp.4} \ dummynet.4 \ ed.4 \ edsc.4 \ ehci.4 \ em.4 \ en.4 \ enc.4 \ epair.4 \ esp.4 \ est.4 \ et.4 \ etherswitch.4 \ eventtimers.4 \ exca.4 \ fatm.4 \ fd.4 \ fdc.4 \ fdt.4 \ fdtbus.4 \ ffclock.4 \ filemon.4 \ firewire.4 \ fpa.4 \ full.4 \ fwe.4 \ fwip.4 \ fwohci.4 \ fxp.4 \ gbde.4 \ gdb.4 \ gem.4 \ geom.4 \ geom_fox.4 \ geom_linux_lvm.4 \ geom_map.4 \ geom_uzip.4 \ gif.4 \ gpio.4 \ gpioiic.4 \ gpioled.4 \ gre.4 \ h_ertt.4 \ hatm.4 \ hifn.4 \ hme.4 \ hpet.4 \ ${_hpt27xx.4} \ ${_hptiop.4} \ ${_hptmv.4} \ ${_hptnr.4} \ ${_hptrr.4} \ ${_hv_kvp.4} \ ${_hv_netvsc.4} \ ${_hv_storvsc.4} \ ${_hv_utils.4} \ ${_hv_vmbus.4} \ ${_hv_vss.4} \ hwpmc.4 \ ichsmb.4 \ ${_ichwd.4} \ icmp.4 \ icmp6.4 \ ida.4 \ + if_ipsec.4 \ ifmib.4 \ ig4.4 \ igb.4 \ igmp.4 \ iic.4 \ iicbb.4 \ iicbus.4 \ iicsmb.4 \ iir.4 \ inet.4 \ inet6.4 \ intpm.4 \ intro.4 \ ${_io.4} \ ${_ioat.4} \ ip.4 \ ip6.4 \ ipfirewall.4 \ ipheth.4 \ ${_ipmi.4} \ ips.4 \ ipsec.4 \ ipw.4 \ ipwfw.4 \ isci.4 \ isl.4 \ ismt.4 \ isp.4 \ ispfw.4 \ iwi.4 \ iwifw.4 \ iwm.4 \ iwmfw.4 \ iwn.4 \ iwnfw.4 \ ixgb.4 \ ixgbe.4 \ ixl.4 \ ixlv.4 \ jme.4 \ joy.4 \ kbdmux.4 \ keyboard.4 \ kld.4 \ ksyms.4 \ ktr.4 \ kue.4 \ lagg.4 \ le.4 \ led.4 \ lge.4 \ ${_linux.4} \ lm75.4 \ lmc.4 \ lo.4 \ lp.4 \ lpbb.4 \ lpt.4 \ mac.4 \ mac_biba.4 \ mac_bsdextended.4 \ mac_ifoff.4 \ mac_lomac.4 \ mac_mls.4 \ mac_none.4 \ mac_partition.4 \ mac_portacl.4 \ mac_seeotheruids.4 \ mac_stub.4 \ mac_test.4 \ malo.4 \ md.4 \ mdio.4 \ me.4 \ mem.4 \ meteor.4 \ mfi.4 \ miibus.4 \ mk48txx.4 \ mld.4 \ mlx.4 \ mlx4en.4 \ mlx5en.4 \ mly.4 \ mmc.4 \ mmcsd.4 \ mn.4 \ mod_cc.4 \ mos.4 \ mouse.4 \ mpr.4 \ mps.4 \ mpt.4 \ mrsas.4 \ msk.4 \ mtio.4 \ multicast.4 \ mvs.4 \ mwl.4 \ mwlfw.4 \ mxge.4 \ my.4 \ nand.4 \ nandsim.4 \ natm.4 \ natmip.4 \ ncr.4 \ ncv.4 \ ${_ndis.4} \ net80211.4 \ netfpga10g_nf10bmac.4 \ netgraph.4 \ netintro.4 \ netmap.4 \ ${_nfe.4} \ ${_nfsmb.4} \ ng_async.4 \ ng_atm.4 \ ngatmbase.4 \ ng_atmllc.4 \ ng_bpf.4 \ ng_bridge.4 \ ng_bt3c.4 \ ng_btsocket.4 \ ng_car.4 \ ng_ccatm.4 \ ng_cisco.4 \ ng_deflate.4 \ ng_device.4 \ nge.4 \ ng_echo.4 \ ng_eiface.4 \ ng_etf.4 \ ng_ether.4 \ ng_ether_echo.4 \ ng_frame_relay.4 \ ng_gif.4 \ ng_gif_demux.4 \ ng_h4.4 \ ng_hci.4 \ ng_hole.4 \ ng_hub.4 \ ng_iface.4 \ ng_ipfw.4 \ ng_ip_input.4 \ ng_ksocket.4 \ ng_l2cap.4 \ ng_l2tp.4 \ ng_lmi.4 \ ng_mppc.4 \ ng_nat.4 \ ng_netflow.4 \ ng_one2many.4 \ ng_patch.4 \ ng_ppp.4 \ ng_pppoe.4 \ ng_pptpgre.4 \ ng_pred1.4 \ ng_rfc1490.4 \ ng_socket.4 \ ng_source.4 \ ng_split.4 \ ng_sppp.4 \ ng_sscfu.4 \ ng_sscop.4 \ ng_tag.4 \ ng_tcpmss.4 \ ng_tee.4 \ ng_tty.4 \ ng_ubt.4 \ ng_UI.4 \ ng_uni.4 \ ng_vjc.4 \ ng_vlan.4 \ nmdm.4 \ nsp.4 \ ${_ntb_hw.4} \ ${_ntb_transport.4} \ ${_if_ntb.4} \ null.4 \ numa.4 \ ${_nvd.4} \ ${_nvme.4} \ ${_nvram.4} \ ${_nvram2env.4} \ ${_nxge.4} \ oce.4 \ ohci.4 \ orm.4 \ ow.4 \ ow_temp.4 \ owc.4 \ ${_padlock.4} \ pass.4 \ patm.4 \ pccard.4 \ pccbb.4 \ pcf.4 \ pci.4 \ pcib.4 \ pcic.4 \ pcm.4 \ pcn.4 \ ${_pf.4} \ ${_pflog.4} \ ${_pfsync.4} \ pim.4 \ pms.4 \ polling.4 \ ppbus.4 \ ppc.4 \ ppi.4 \ procdesc.4 \ proto.4 \ psm.4 \ pst.4 \ pt.4 \ pts.4 \ pty.4 \ puc.4 \ ${_qlxge.4} \ ${_qlxgb.4} \ ${_qlxgbe.4} \ ral.4 \ random.4 \ rc.4 \ re.4 \ rgephy.4 \ rights.4 \ rl.4 \ rndtest.4 \ route.4 \ rp.4 \ rtwn.4 \ rtwnfw.4 \ rtwn_pci.4 \ rue.4 \ sa.4 \ safe.4 \ sbp.4 \ sbp_targ.4 \ scc.4 \ sched_4bsd.4 \ sched_ule.4 \ screen.4 \ scsi.4 \ sctp.4 \ sdhci.4 \ sem.4 \ send.4 \ ses.4 \ sf.4 \ ${_sfxge.4} \ sge.4 \ siba.4 \ siftr.4 \ siis.4 \ simplebus.4 \ sio.4 \ sis.4 \ sk.4 \ smb.4 \ smbus.4 \ smp.4 \ smsc.4 \ sn.4 \ snd_ad1816.4 \ snd_als4000.4 \ snd_atiixp.4 \ snd_cmi.4 \ snd_cs4281.4 \ snd_csa.4 \ snd_ds1.4 \ snd_emu10k1.4 \ snd_emu10kx.4 \ snd_envy24.4 \ snd_envy24ht.4 \ snd_es137x.4 \ snd_ess.4 \ snd_fm801.4 \ snd_gusc.4 \ snd_hda.4 \ snd_hdspe.4 \ snd_ich.4 \ snd_maestro3.4 \ snd_maestro.4 \ snd_mss.4 \ snd_neomagic.4 \ snd_sbc.4 \ snd_solo.4 \ snd_spicds.4 \ snd_t4dwave.4 \ snd_uaudio.4 \ snd_via8233.4 \ snd_via82c686.4 \ snd_vibes.4 \ snp.4 \ ${_spkr.4} \ splash.4 \ sppp.4 \ ste.4 \ stf.4 \ stg.4 \ stge.4 \ sym.4 \ syncache.4 \ syncer.4 \ syscons.4 \ sysmouse.4 \ tap.4 \ targ.4 \ tcp.4 \ tdfx.4 \ terasic_mtl.4 \ termios.4 \ textdump.4 \ ti.4 \ timecounters.4 \ tl.4 \ ${_tpm.4} \ trm.4 \ tty.4 \ tun.4 \ twa.4 \ twe.4 \ tws.4 \ tx.4 \ txp.4 \ ure.4 \ vale.4 \ vga.4 \ vge.4 \ viapm.4 \ ${_viawd.4} \ ${_virtio.4} \ ${_virtio_balloon.4} \ ${_virtio_blk.4} \ ${_virtio_console.4} \ ${_virtio_random.4} \ ${_virtio_scsi.4} \ vkbd.4 \ vlan.4 \ vxlan.4 \ ${_vmx.4} \ vpo.4 \ vr.4 \ vt.4 \ vte.4 \ ${_vtnet.4} \ ${_vxge.4} \ watchdog.4 \ wb.4 \ ${_wbwd.4} \ wi.4 \ witness.4 \ wlan.4 \ wlan_acl.4 \ wlan_amrr.4 \ wlan_ccmp.4 \ wlan_tkip.4 \ wlan_wep.4 \ wlan_xauth.4 \ ${_wpi.4} \ wsp.4 \ xe.4 \ ${_xen.4} \ xhci.4 \ xl.4 \ ${_xnb.4} \ xpt.4 \ zero.4 MLINKS= ae.4 if_ae.4 MLINKS+=age.4 if_age.4 MLINKS+=agp.4 agpgart.4 MLINKS+=alc.4 if_alc.4 MLINKS+=ale.4 if_ale.4 MLINKS+=altera_atse.4 atse.4 MLINKS+=altera_sdcard.4 altera_sdcardc.4 MLINKS+=altq.4 ALTQ.4 MLINKS+=ath.4 if_ath.4 MLINKS+=ath_pci.4 if_ath_pci.4 MLINKS+=an.4 if_an.4 MLINKS+=aue.4 if_aue.4 MLINKS+=axe.4 if_axe.4 MLINKS+=bce.4 if_bce.4 MLINKS+=bfe.4 if_bfe.4 MLINKS+=bge.4 if_bge.4 MLINKS+=bktr.4 brooktree.4 MLINKS+=bnxt.4 if_bnxt.4 MLINKS+=bridge.4 if_bridge.4 MLINKS+=bwi.4 if_bwi.4 MLINKS+=bwn.4 if_bwn.4 MLINKS+=${_bxe.4} ${_if_bxe.4} MLINKS+=cas.4 if_cas.4 MLINKS+=cdce.4 if_cdce.4 MLINKS+=cfi.4 cfid.4 MLINKS+=cloudabi.4 cloudabi64.4 MLINKS+=crypto.4 cryptodev.4 MLINKS+=cue.4 if_cue.4 MLINKS+=cxgb.4 if_cxgb.4 MLINKS+=cxgbe.4 if_cxgbe.4 \ cxgbe.4 vcxgbe.4 \ cxgbe.4 if_vcxgbe.4 \ cxgbe.4 cxl.4 \ cxgbe.4 if_cxl.4 \ cxgbe.4 vcxl.4 \ cxgbe.4 if_vcxl.4 \ cxgbe.4 cc.4 \ cxgbe.4 if_cc.4 \ cxgbe.4 vcc.4 \ cxgbe.4 if_vcc.4 MLINKS+=cxgbev.4 if_cxgbev.4 \ cxgbev.4 cxlv.4 \ cxgbev.4 if_cxlv.4 \ cxgbev.4 ccv.4 \ cxgbev.4 if_ccv.4 MLINKS+=dc.4 if_dc.4 MLINKS+=de.4 if_de.4 MLINKS+=disc.4 if_disc.4 MLINKS+=ed.4 if_ed.4 MLINKS+=edsc.4 if_edsc.4 MLINKS+=em.4 if_em.4 MLINKS+=en.4 if_en.4 MLINKS+=enc.4 if_enc.4 MLINKS+=epair.4 if_epair.4 MLINKS+=et.4 if_et.4 MLINKS+=fatm.4 if_fatm.4 MLINKS+=fd.4 stderr.4 \ fd.4 stdin.4 \ fd.4 stdout.4 MLINKS+=fdt.4 FDT.4 MLINKS+=firewire.4 ieee1394.4 MLINKS+=fpa.4 fea.4 MLINKS+=fwe.4 if_fwe.4 MLINKS+=fwip.4 if_fwip.4 MLINKS+=fxp.4 if_fxp.4 MLINKS+=gem.4 if_gem.4 MLINKS+=geom.4 GEOM.4 MLINKS+=gif.4 if_gif.4 MLINKS+=gpio.4 gpiobus.4 MLINKS+=gre.4 if_gre.4 MLINKS+=hatm.4 if_hatm.4 MLINKS+=hme.4 if_hme.4 MLINKS+=hpet.4 acpi_hpet.4 MLINKS+=${_hptrr.4} ${_rr232x.4} MLINKS+=${_attimer.4} ${_i8254.4} MLINKS+=igb.4 if_igb.4 MLINKS+=ip.4 rawip.4 MLINKS+=ipfirewall.4 ipaccounting.4 \ ipfirewall.4 ipacct.4 \ ipfirewall.4 ipfw.4 MLINKS+=ipheth.4 if_ipheth.4 MLINKS+=ipw.4 if_ipw.4 MLINKS+=iwi.4 if_iwi.4 MLINKS+=iwm.4 if_iwm.4 MLINKS+=iwn.4 if_iwn.4 MLINKS+=ixgb.4 if_ixgb.4 MLINKS+=ixgbe.4 ix.4 MLINKS+=ixgbe.4 if_ix.4 MLINKS+=ixgbe.4 if_ixgbe.4 MLINKS+=ixl.4 if_ixl.4 MLINKS+=ixlv.4 if_ixlv.4 MLINKS+=jme.4 if_jme.4 MLINKS+=kue.4 if_kue.4 MLINKS+=lagg.4 trunk.4 MLINKS+=lagg.4 if_lagg.4 MLINKS+=le.4 if_le.4 MLINKS+=lge.4 if_lge.4 MLINKS+=lmc.4 if_lmc.4 MLINKS+=lo.4 loop.4 MLINKS+=lp.4 plip.4 MLINKS+=malo.4 if_malo.4 MLINKS+=md.4 vn.4 MLINKS+=mem.4 kmem.4 MLINKS+=mfi.4 mfi_linux.4 \ mfi.4 mfip.4 MLINKS+=mlx5en.4 mce.4 MLINKS+=mn.4 if_mn.4 MLINKS+=mos.4 if_mos.4 MLINKS+=msk.4 if_msk.4 MLINKS+=mwl.4 if_mwl.4 MLINKS+=mxge.4 if_mxge.4 MLINKS+=my.4 if_my.4 MLINKS+=${_ndis.4} ${_if_ndis.4} MLINKS+=netfpga10g_nf10bmac.4 if_nf10bmac.4 MLINKS+=netintro.4 net.4 \ netintro.4 networking.4 MLINKS+=${_nfe.4} ${_if_nfe.4} MLINKS+=nge.4 if_nge.4 MLINKS+=${_ntb_hw.4} ${_ntb.4} MLINKS+=${_nxge.4} ${_if_nxge.4} MLINKS+=ow.4 onewire.4 MLINKS+=patm.4 if_patm.4 MLINKS+=pccbb.4 cbb.4 MLINKS+=pcm.4 snd.4 \ pcm.4 sound.4 MLINKS+=pcn.4 if_pcn.4 MLINKS+=pms.4 pmspcv.4 MLINKS+=ral.4 if_ral.4 MLINKS+=re.4 if_re.4 MLINKS+=rl.4 if_rl.4 MLINKS+=rtwn.4 if_rtwn.4 MLINKS+=rtwn_pci.4 if_rtwn_pci.4 MLINKS+=rue.4 if_rue.4 MLINKS+=scsi.4 CAM.4 \ scsi.4 cam.4 \ scsi.4 scbus.4 \ scsi.4 SCSI.4 MLINKS+=sf.4 if_sf.4 MLINKS+=sge.4 if_sge.4 MLINKS+=sis.4 if_sis.4 MLINKS+=sk.4 if_sk.4 MLINKS+=smp.4 SMP.4 MLINKS+=smsc.4 if_smsc.4 MLINKS+=sn.4 if_sn.4 MLINKS+=snd_envy24.4 snd_ak452x.4 MLINKS+=snd_sbc.4 snd_sb16.4 \ snd_sbc.4 snd_sb8.4 MLINKS+=${_spkr.4} ${_speaker.4} MLINKS+=splash.4 screensaver.4 MLINKS+=ste.4 if_ste.4 MLINKS+=stf.4 if_stf.4 MLINKS+=stge.4 if_stge.4 MLINKS+=syncache.4 syncookies.4 MLINKS+=syscons.4 sc.4 MLINKS+=tap.4 if_tap.4 MLINKS+=tdfx.4 tdfx_linux.4 MLINKS+=ti.4 if_ti.4 MLINKS+=tl.4 if_tl.4 MLINKS+=tun.4 if_tun.4 MLINKS+=tx.4 if_tx.4 MLINKS+=txp.4 if_txp.4 MLINKS+=ure.4 if_ure.4 MLINKS+=vge.4 if_vge.4 MLINKS+=vlan.4 if_vlan.4 MLINKS+=vxlan.4 if_vxlan.4 MLINKS+=${_vmx.4} ${_if_vmx.4} MLINKS+=vpo.4 imm.4 MLINKS+=vr.4 if_vr.4 MLINKS+=vte.4 if_vte.4 MLINKS+=${_vtnet.4} ${_if_vtnet.4} MLINKS+=${_vxge.4} ${_if_vxge.4} MLINKS+=watchdog.4 SW_WATCHDOG.4 MLINKS+=wb.4 if_wb.4 MLINKS+=wi.4 if_wi.4 MLINKS+=${_wpi.4} ${_if_wpi.4} MLINKS+=xe.4 if_xe.4 MLINKS+=xl.4 if_xl.4 .if ${MACHINE_CPUARCH} == "aarch64" _armv8crypto.4= armv8crypto.4 .endif .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" _acpi_asus.4= acpi_asus.4 _acpi_asus_wmi.4= acpi_asus_wmi.4 _acpi_dock.4= acpi_dock.4 _acpi_fujitsu.4=acpi_fujitsu.4 _acpi_hp.4= acpi_hp.4 _acpi_ibm.4= acpi_ibm.4 _acpi_panasonic.4=acpi_panasonic.4 _acpi_rapidstart.4=acpi_rapidstart.4 _acpi_sony.4= acpi_sony.4 _acpi_toshiba.4=acpi_toshiba.4 _acpi_wmi.4= acpi_wmi.4 _aesni.4= aesni.4 _aout.4= aout.4 _apic.4= apic.4 _atrtc.4= atrtc.4 _attimer.4= attimer.4 _aibs.4= aibs.4 _amdsbwd.4= amdsbwd.4 _amdsmb.4= amdsmb.4 _amdtemp.4= amdtemp.4 _asmc.4= asmc.4 _bxe.4= bxe.4 _bytgpio.4= bytgpio.4 _coretemp.4= coretemp.4 _cpuctl.4= cpuctl.4 _dpms.4= dpms.4 _hpt27xx.4= hpt27xx.4 _hptiop.4= hptiop.4 _hptmv.4= hptmv.4 _hptnr.4= hptnr.4 _hptrr.4= hptrr.4 _hv_kvp.4= hv_kvp.4 _hv_netvsc.4= hv_netvsc.4 _hv_storvsc.4= hv_storvsc.4 _hv_utils.4= hv_utils.4 _hv_vmbus.4= hv_vmbus.4 _hv_vss.4= hv_vss.4 _i8254.4= i8254.4 _ichwd.4= ichwd.4 _if_bxe.4= if_bxe.4 _if_ndis.4= if_ndis.4 _if_nfe.4= if_nfe.4 _if_nxge.4= if_nxge.4 _if_urtw.4= if_urtw.4 _if_vmx.4= if_vmx.4 _if_vtnet.4= if_vtnet.4 _if_vxge.4= if_vxge.4 _if_wpi.4= if_wpi.4 _ipmi.4= ipmi.4 _io.4= io.4 _linux.4= linux.4 _ndis.4= ndis.4 _nfe.4= nfe.4 _nfsmb.4= nfsmb.4 _nvd.4= nvd.4 _nvme.4= nvme.4 _nvram.4= nvram.4 _nxge.4= nxge.4 _virtio.4= virtio.4 _virtio_balloon.4=virtio_balloon.4 _virtio_blk.4= virtio_blk.4 _virtio_console.4=virtio_console.4 _virtio_random.4= virtio_random.4 _virtio_scsi.4= virtio_scsi.4 _vmx.4= vmx.4 _vtnet.4= vtnet.4 _vxge.4= vxge.4 _padlock.4= padlock.4 _rr232x.4= rr232x.4 _speaker.4= speaker.4 _spkr.4= spkr.4 _tpm.4= tpm.4 _urtw.4= urtw.4 _viawd.4= viawd.4 _wbwd.4= wbwd.4 _wpi.4= wpi.4 _xen.4= xen.4 _xnb.4= xnb.4 .endif .if ${MACHINE_CPUARCH} == "amd64" _if_ntb.4= if_ntb.4 _ioat.4= ioat.4 _ntb.4= ntb.4 _ntb_hw.4= ntb_hw.4 _ntb_transport.4=ntb_transport.4 _qlxge.4= qlxge.4 _qlxgb.4= qlxgb.4 _qlxgbe.4= qlxgbe.4 _sfxge.4= sfxge.4 MLINKS+=qlxge.4 if_qlxge.4 MLINKS+=qlxgb.4 if_qlxgb.4 MLINKS+=qlxgbe.4 if_qlxgbe.4 MLINKS+=sfxge.4 if_sfxge.4 .if ${MK_BHYVE} != "no" _bhyve.4= bhyve.4 .endif .endif .if ${MACHINE_CPUARCH} == "mips" _nvram2env.4= nvram2env.4 .endif .if exists(${.CURDIR}/man4.${MACHINE_CPUARCH}) SUBDIR= man4.${MACHINE_CPUARCH} .endif .if ${MK_BLUETOOTH} != "no" MAN+= ng_bluetooth.4 .endif .if ${MK_CCD} != "no" _ccd.4= ccd.4 .endif .if ${MK_CDDL} != "no" _dtrace_io.4= dtrace_io.4 _dtrace_ip.4= dtrace_ip.4 _dtrace_proc.4= dtrace_proc.4 _dtrace_sched.4= dtrace_sched.4 _dtrace_tcp.4= dtrace_tcp.4 _dtrace_udp.4= dtrace_udp.4 .endif .if ${MK_ISCSI} != "no" MAN+= iscsi.4 MAN+= iscsi_initiator.4 MAN+= iser.4 .endif .if ${MK_OFED} != "no" MAN+= mlx4ib.4 .endif .if ${MK_TESTS} != "no" ATF= ${.CURDIR}/../../../contrib/atf .PATH: ${ATF}/doc _atf_test_case.4= atf-test-case.4 .endif .if ${MK_PF} != "no" _pf.4= pf.4 _pflog.4= pflog.4 _pfsync.4= pfsync.4 .endif .if ${MK_USB} != "no" MAN+= \ otus.4 \ otusfw.4 \ rsu.4 \ rsufw.4 \ rtwn_usb.4 \ rum.4 \ run.4 \ runfw.4 \ u3g.4 \ uark.4 \ uart.4 \ uath.4 \ ubsa.4 \ ubsec.4 \ ubser.4 \ ubtbcmfw.4 \ uchcom.4 \ ucom.4 \ ucycom.4 \ udav.4 \ udbp.4 \ udp.4 \ udplite.4 \ udl.4 \ uep.4 \ ufm.4 \ ufoma.4 \ uftdi.4 \ ugen.4 \ ugold.4 \ uhci.4 \ uhid.4 \ uhso.4 \ uipaq.4 \ ukbd.4 \ uled.4 \ ulpt.4 \ umass.4 \ umcs.4 \ umct.4 \ umodem.4 \ umoscom.4 \ ums.4 \ unix.4 \ upgt.4 \ uplcom.4 \ ural.4 \ urio.4 \ urndis.4 \ ${_urtw.4} \ usb.4 \ usb_quirk.4 \ usb_template.4 \ usfs.4 \ uslcom.4 \ utopia.4 \ uvisor.4 \ uvscom.4 \ zyd.4 MLINKS+=otus.4 if_otus.4 MLINKS+=rsu.4 if_rsu.4 MLINKS+=rtwn_usb.4 if_rtwn_usb.4 MLINKS+=rum.4 if_rum.4 MLINKS+=run.4 if_run.4 MLINKS+=u3g.4 u3gstub.4 MLINKS+=uath.4 if_uath.4 MLINKS+=udav.4 if_udav.4 MLINKS+=upgt.4 if_upgt.4 MLINKS+=ural.4 if_ural.4 MLINKS+=urndis.4 if_urndis.4 MLINKS+=${_urtw.4} ${_if_urtw.4} MLINKS+=zyd.4 if_zyd.4 .endif .include diff --git a/share/man/man4/if_ipsec.4 b/share/man/man4/if_ipsec.4 new file mode 100644 index 000000000000..2e978c0cdb94 --- /dev/null +++ b/share/man/man4/if_ipsec.4 @@ -0,0 +1,141 @@ +.\" Copyright (c) 2017 Andrey V. Elsukov +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd February 6, 2017 +.Dt if_ipsec 4 +.Os +.Sh NAME +.Nm if_ipsec +.Nd IPsec virtual tunneling interface +.Sh SYNOPSIS +The +.Cm if_ipsec +network interface is a part of the +.Fx +IPsec implementation. +To compile it into the kernel, place this line in the kernel +configuration file: +.Bd -ragged -offset indent +.Cd "options IPSEC" +.Ed +.Pp +It can also be loaded as part of the +.Cm ipsec +kernel module if the kernel was compiled with +.Bd -ragged -offset indent +.Cd "options IPSEC_SUPPORT" +.Ed +.Sh DESCRIPTION +The +.Nm +network interface is targeted for creating route-based VPNs. +It can tunnel IPv4 and IPv6 traffic over either IPv4 or IPv6 and secure +it with ESP. +.Pp +.Nm +interfaces are dynamically created and destroyed with the +.Xr ifconfig 8 +.Cm create +and +.Cm destroy +subcommands. +The administrator must configure IPsec +.Cm tunnel +endpoint addresses. +These addresses will be used for the outer IP header of ESP packets. +The administrator can also configure the protocol and addresses for the inner +IP header with +.Xr ifconfig 8 , +and modify the routing table to route the packets through the +.Nm +interface. +.Pp +When the +.Nm +interface is configured, it automatically creates special security policies. +These policies can be used to acquire security associations from the IKE daemon, +which are needed for establishing an IPsec tunnel. +It is also possible to create needed security associations manually with the +.Xr setkey 8 +utility. +.Pp +Each +.Nm +interface has an additional numeric configuration option +.Cm reqid Ar id . +This +.Ar id +is used to distinguish traffic and security policies between several +.Nm +interfaces. +The +.Cm reqid +can be specified on interface creation and changed later. +If not specified, it is automatically assigned. +Note that changing +.Cm reqid +will lead to generation of new security policies, and this +may require creating new security associations. +.Sh EXAMPLES +The example below shows manual configuration of an IPsec tunnel +between two FreeBSD hosts. +Host A has the IP address 192.168.0.3, and host B has the IP address +192.168.0.5. +.Pp +On host A: +.Bd -literal -offset indent +ifconfig ipsec0 create reqid 100 +ifconfig ipsec0 inet tunnel 192.168.0.3 192.168.0.5 +ifconfig ipsec0 inet 172.16.0.3/16 172.16.0.5 +setkey -c +add 192.168.0.3 192.168.0.5 esp 10000 -m tunnel -u 100 -E rijndael-cbc "VerySecureKey!!1"; +add 192.168.0.5 192.168.0.3 esp 10001 -m tunnel -u 100 -E rijndael-cbc "VerySecureKey!!2"; +^D +.Ed +.Pp +On host B: +.Bd -literal -offset indent +ifconfig ipsec0 create reqid 200 +ifconfig ipsec0 inet tunnel 192.168.0.5 192.168.0.3 +ifconfig ipsec0 inet 172.16.0.5/16 172.16.0.3 +setkey -c +add 192.168.0.3 192.168.0.5 esp 10000 -m tunnel -u 200 -E rijndael-cbc "VerySecureKey!!1"; +add 192.168.0.5 192.168.0.3 esp 10001 -m tunnel -u 200 -E rijndael-cbc "VerySecureKey!!2"; +^D +.Ed +.Pp +Note the value 100 on host A and value 200 on host B are used as reqid. +The same value must be used as identifier of the policy entry in the +.Xr setkey 8 +command. +.Sh SEE ALSO +.Xr gif 4 , +.Xr gre 4 , +.Xr ipsec 4 , +.Xr ifconfig 8 , +.Xr setkey 8 +.Sh AUTHORS +.An Andrey V. Elsukov Aq Mt ae@FreeBSD.org diff --git a/share/man/man4/ipsec.4 b/share/man/man4/ipsec.4 index c6a3f244c7de..9bee93153a54 100644 --- a/share/man/man4/ipsec.4 +++ b/share/man/man4/ipsec.4 @@ -1,415 +1,446 @@ .\" $KAME: ipsec.4,v 1.17 2001/06/27 15:25:10 itojun Exp $ .\" .\" Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the project nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd November 29, 2009 +.Dd February 6, 2017 .Dt IPSEC 4 .Os .Sh NAME .Nm ipsec .Nd Internet Protocol Security protocol .Sh SYNOPSIS .Cd "options IPSEC" +.Cd "options IPSEC_SUPPORT" .Cd "device crypto" .Pp .In sys/types.h .In netinet/in.h .In netipsec/ipsec.h .In netipsec/ipsec6.h .Sh DESCRIPTION .Nm is a security protocol implemented within the Internet Protocol layer of the networking stack. .Nm is defined for both IPv4 and IPv6 .Xr ( inet 4 and .Xr inet6 4 ) . .Nm is a set of protocols, .Tn ESP (for Encapsulating Security Payload) .Tn AH (for Authentication Header), and .Tn IPComp (for IP Payload Compression Protocol) that provide security services for IP datagrams. AH both authenticates and guarantees the integrity of an IP packet by attaching a cryptographic checksum computed using one-way hash functions. ESP, in addition, prevents unauthorized parties from reading the payload of an IP packet by also encrypting it. IPComp tries to increase communication performance by compressing IP payload, thus reducing the amount of data sent. This will help nodes on slow links but with enough computing power. .Nm operates in one of two modes: transport mode or tunnel mode. Transport mode is used to protect peer-to-peer communication between end nodes. Tunnel mode encapsulates IP packets within other IP packets and is designed for security gateways such as VPN endpoints. .Pp System configuration requires the .Xr crypto 4 subsystem. .Pp The packets can be passed to a virtual .Xr enc 4 interface, to perform packet filtering before outbound encryption and after decapsulation inbound. .Pp To properly filter on the inner packets of an .Nm tunnel with firewalls, you can change the values of the following sysctls .Bl -column net.inet6.ipsec6.filtertunnel default enable .It Sy "Name Default Enable" .It "net.inet.ipsec.filtertunnel 0 1" .It "net.inet6.ipsec6.filtertunnel 0 1" .El .\" .Ss Kernel interface .Nm is controlled by a key management and policy engine, that reside in the operating system kernel. Key management is the process of associating keys with security associations, also know as SAs. Policy management dictates when new security associations created or destroyed. .Pp The key management engine can be accessed from userland by using .Dv PF_KEY sockets. The .Dv PF_KEY socket API is defined in RFC2367. .Pp The policy engine is controlled by an extension to the .Dv PF_KEY API, .Xr setsockopt 2 operations, and .Xr sysctl 3 interface. The kernel implements an extended version of the .Dv PF_KEY interface and allows the programmer to define IPsec policies which are similar to the per-packet filters. The .Xr setsockopt 2 interface is used to define per-socket behavior, and .Xr sysctl 3 interface is used to define host-wide default behavior. .Pp The kernel code does not implement a dynamic encryption key exchange protocol such as IKE (Internet Key Exchange). Key exchange protocols are beyond what is necessary in the kernel and should be implemented as daemon processes which call the .Nm APIs. .\" .Ss Policy management IPsec policies can be managed in one of two ways, either by configuring per-socket policies using the .Xr setsockopt 2 system calls, or by configuring kernel level packet filter-based policies using the .Dv PF_KEY interface, via the .Xr setkey 8 you can define IPsec policies against packets using rules similar to packet filtering rules. Refer to .Xr setkey 8 on how to use it. .Pp +Depending on the socket's address family, IPPROTO_IP or IPPROTO_IPV6 +transport level and IP_IPSEC_POLICY or IPV6_IPSEC_POLICY socket options +may be used to configure per-socket security policies. +A properly-formed IPsec policy specification structure can be +created using +.Xr ipsec_set_policy 3 +function and used as socket option value for the +.Xr setsockopt 2 +call. +.Pp When setting policies using the .Xr setkey 8 command, the .Dq Li default option instructs the system to use its default policy, as explained below, for processing packets. The following sysctl variables are available for configuring the system's IPsec behavior. The variables can have one of two values. A .Li 1 means .Dq Li use , which means that if there is a security association then use it but if there is not then the packets are not processed by IPsec. The value .Li 2 is synonymous with .Dq Li require , which requires that a security association must exist for the packets to move, and not be dropped. These terms are defined in .Xr ipsec_set_policy 8 . .Bl -column net.inet6.ipsec6.esp_trans_deflev integerxxx .It Sy "Name Type Changeable" .It "net.inet.ipsec.esp_trans_deflev integer yes" .It "net.inet.ipsec.esp_net_deflev integer yes" .It "net.inet.ipsec.ah_trans_deflev integer yes" .It "net.inet.ipsec.ah_net_deflev integer yes" .It "net.inet6.ipsec6.esp_trans_deflev integer yes" .It "net.inet6.ipsec6.esp_net_deflev integer yes" .It "net.inet6.ipsec6.ah_trans_deflev integer yes" .It "net.inet6.ipsec6.ah_net_deflev integer yes" .El .Pp If the kernel does not find a matching, system wide, policy then the default value is applied. The system wide default policy is specified by the following .Xr sysctl 8 variables. .Li 0 means .Dq Li discard which asks the kernel to drop the packet. .Li 1 means .Dq Li none . .Bl -column net.inet6.ipsec6.def_policy integerxxx .It Sy "Name Type Changeable" .It "net.inet.ipsec.def_policy integer yes" .It "net.inet6.ipsec6.def_policy integer yes" .El .\" .Ss Miscellaneous sysctl variables When the .Nm protocols are configured for use, all protocols are included in the system. To selectively enable/disable protocols, use .Xr sysctl 8 . .Bl -column net.inet.ipcomp.ipcomp_enable .It Sy "Name Default" .It "net.inet.esp.esp_enable On" .It "net.inet.ah.ah_enable On" .It "net.inet.ipcomp.ipcomp_enable On" .El .Pp In addition the following variables are accessible via .Xr sysctl 8 , for tweaking the kernel's IPsec behavior: .Bl -column net.inet6.ipsec6.inbonud_call_ike integerxxx .It Sy "Name Type Changeable" .It "net.inet.ipsec.ah_cleartos integer yes" .It "net.inet.ipsec.ah_offsetmask integer yes" .It "net.inet.ipsec.dfbit integer yes" .It "net.inet.ipsec.ecn integer yes" .It "net.inet.ipsec.debug integer yes" +.It "net.inet.ipsec.natt_cksum_policy integer yes" +.It "net.inet.ipsec.check_policy_history integer yes" .It "net.inet6.ipsec6.ecn integer yes" .It "net.inet6.ipsec6.debug integer yes" .El .Pp The variables are interpreted as follows: .Bl -tag -width 6n .It Li ipsec.ah_cleartos If set to non-zero, the kernel clears the type-of-service field in the IPv4 header during AH authentication data computation. This variable is used to get current systems to inter-operate with devices that implement RFC1826 AH. It should be set to non-zero (clear the type-of-service field) for RFC2402 conformance. .It Li ipsec.ah_offsetmask During AH authentication data computation, the kernel will include a 16bit fragment offset field (including flag bits) in the IPv4 header, after computing logical AND with the variable. The variable is used for inter-operating with devices that implement RFC1826 AH. It should be set to zero (clear the fragment offset field during computation) for RFC2402 conformance. .It Li ipsec.dfbit This variable configures the kernel behavior on IPv4 IPsec tunnel encapsulation. If set to 0, the DF bit on the outer IPv4 header will be cleared while 1 means that the outer DF bit is set regardless from the inner DF bit and 2 indicates that the DF bit is copied from the inner header to the outer one. The variable is supplied to conform to RFC2401 chapter 6.1. .It Li ipsec.ecn If set to non-zero, IPv4 IPsec tunnel encapsulation/decapsulation behavior will be friendly to ECN (explicit congestion notification), as documented in .Li draft-ietf-ipsec-ecn-02.txt . .Xr gif 4 talks more about the behavior. .It Li ipsec.debug If set to non-zero, debug messages will be generated via .Xr syslog 3 . +.It Li ipsec.natt_cksum_policy +Controls how the kernel handles TCP and UDP checksums when ESP in UDP +encapsulation is used for IPsec transport mode. +If set to a non-zero value, the kernel fully recomputes checksums for +inbound TCP segments and UDP datagrams after they are decapsulated and +decrypted. +If set to 0 and original addresses were configured for corresponding SA +by the IKE daemon, the kernel incrementally recomputes checksums for +inbound TCP segments and UDP datagrams. +If addresses were not configured, the checksums are ignored. +.It Li ipsec.check_policy_history +Enables strict policy checking for inbound packets. +By default, inbound security policies check that packets handled by IPsec +have been decrypted and authenticated. +If this variable is set to a non-zero value, each packet handled by IPsec +is checked against the history of IPsec security associations. +The IPsec security protocol, mode, and SA addresses must match. .El .Pp Variables under the .Li net.inet6.ipsec6 tree have similar meanings to those described above. .\" .Sh PROTOCOLS The .Nm protocol acts as a plug-in to the .Xr inet 4 and .Xr inet6 4 protocols and therefore supports most of the protocols defined upon those IP-layer protocols. The .Xr icmp 4 and .Xr icmp6 4 protocols may behave differently with .Nm because .Nm can prevent .Xr icmp 4 or .Xr icmp6 4 routines from looking into the IP payload. .\" .Sh SEE ALSO .Xr ioctl 2 , .Xr socket 2 , .Xr ipsec_set_policy 3 , .Xr crypto 4 , .Xr enc 4 , +.Xr if_ipsec 4 , .Xr icmp6 4 , .Xr intro 4 , .Xr ip6 4 , .Xr setkey 8 , .Xr sysctl 8 .\".Xr racoon 8 .Rs .%A "S. Kent" .%A "R. Atkinson" .%T "IP Authentication Header" .%O "RFC 2404" .Re .Rs .%A "S. Kent" .%A "R. Atkinson" .%T "IP Encapsulating Security Payload (ESP)" .%O "RFC 2406" .Re .Sh STANDARDS .Rs .%A Daniel L. McDonald .%A Craig Metz .%A Bao G. Phan .%T "PF_KEY Key Management API, Version 2" .%R RFC .%N 2367 .Re .Pp .Rs .%A "D. L. McDonald" .%T "A Simple IP Security API Extension to BSD Sockets" .%R internet draft .%N "draft-mcdonald-simple-ipsec-api-03.txt" .%O work in progress material .Re .Sh HISTORY The original .Nm implementation appeared in the WIDE/KAME IPv6/IPsec stack. .Pp For .Fx 5.0 a fully locked IPsec implementation called fast_ipsec was brought in. The protocols drew heavily on the .Ox implementation of the .Tn IPsec protocols. The policy management code was derived from the .Tn KAME implementation found in their .Tn IPsec protocols. The fast_ipsec implementation lacked .Xr ip6 4 support but made use of the .Xr crypto 4 subsystem. .Pp For .Fx 7.0 .Xr ip6 4 support was added to fast_ipsec. After this the old KAME IPsec implementation was dropped and fast_ipsec became what now is the only .Nm implementation in .Fx . .Sh BUGS There is no single standard for the policy engine API, so the policy engine API described herein is just for this implementation. .Pp AH and tunnel mode encapsulation may not work as you might expect. If you configure inbound .Dq require policy with an AH tunnel or any IPsec encapsulating policy with AH (like .Dq Li esp/tunnel/A-B/use ah/transport/A-B/require ) , tunnelled packets will be rejected. This is because the policy check is enforced on the inner packet on reception, and AH authenticates encapsulating (outer) packet, not the encapsulated (inner) packet (so for the receiving kernel there is no sign of authenticity). The issue will be solved when we revamp our policy engine to keep all the packet decapsulation history. .Pp When a large database of security associations or policies is present in the kernel the .Dv SADB_DUMP and .Dv SADB_SPDDUMP operations on .Dv PF_KEY sockets may fail due to lack of space. Increasing the socket buffer size may alleviate this problem. .Pp The .Tn IPcomp protocol may occasionally error because of .Xr zlib 3 problems. .Pp This documentation needs more review. diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4 index b3139e74d4e1..6b94866a2b31 100644 --- a/share/man/man4/tcp.4 +++ b/share/man/man4/tcp.4 @@ -1,682 +1,676 @@ .\" Copyright (c) 1983, 1991, 1993 .\" The Regents of the University of California. .\" Copyright (c) 2010-2011 The FreeBSD Foundation .\" All rights reserved. .\" .\" Portions of this documentation were written at the Centre for Advanced .\" Internet Architectures, Swinburne University of Technology, Melbourne, .\" Australia by David Hayes under sponsorship from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd Jan 29, 2017 +.Dd February 6, 2017 .Dt TCP 4 .Os .Sh NAME .Nm tcp .Nd Internet Transmission Control Protocol .Sh SYNOPSIS .In sys/types.h .In sys/socket.h .In netinet/in.h .In netinet/tcp.h .Ft int .Fn socket AF_INET SOCK_STREAM 0 .Sh DESCRIPTION The .Tn TCP protocol provides reliable, flow-controlled, two-way transmission of data. It is a byte-stream protocol used to support the .Dv SOCK_STREAM abstraction. .Tn TCP uses the standard Internet address format and, in addition, provides a per-host collection of .Dq "port addresses" . Thus, each address is composed of an Internet address specifying the host and network, with a specific .Tn TCP port on the host identifying the peer entity. .Pp Sockets utilizing the .Tn TCP protocol are either .Dq active or .Dq passive . Active sockets initiate connections to passive sockets. By default, .Tn TCP sockets are created active; to create a passive socket, the .Xr listen 2 system call must be used after binding the socket with the .Xr bind 2 system call. Only passive sockets may use the .Xr accept 2 call to accept incoming connections. Only active sockets may use the .Xr connect 2 call to initiate connections. .Pp Passive sockets may .Dq underspecify their location to match incoming connection requests from multiple networks. This technique, termed .Dq "wildcard addressing" , allows a single server to provide service to clients on multiple networks. To create a socket which listens on all networks, the Internet address .Dv INADDR_ANY must be bound. The .Tn TCP port may still be specified at this time; if the port is not specified, the system will assign one. Once a connection has been established, the socket's address is fixed by the peer entity's location. The address assigned to the socket is the address associated with the network interface through which packets are being transmitted and received. Normally, this address corresponds to the peer entity's network. .Pp .Tn TCP supports a number of socket options which can be set with .Xr setsockopt 2 and tested with .Xr getsockopt 2 : .Bl -tag -width ".Dv TCP_FUNCTION_BLK" .It Dv TCP_INFO Information about a socket's underlying TCP session may be retrieved by passing the read-only option .Dv TCP_INFO to .Xr getsockopt 2 . It accepts a single argument: a pointer to an instance of .Vt "struct tcp_info" . .Pp This API is subject to change; consult the source to determine which fields are currently filled out by this option. .Fx specific additions include send window size, receive window size, and bandwidth-controlled window space. .It Dv TCP_CCALGOOPT Set or query congestion control algorithm specific parameters. See .Xr mod_cc 4 for details. .It Dv TCP_CONGESTION Select or query the congestion control algorithm that TCP will use for the connection. See .Xr mod_cc 4 for details. .It Dv TCP_FUNCTION_BLK Select or query the set of functions that TCP will use for this connection. This allows a user to select an alternate TCP stack. The alternate TCP stack must already be loaded in the kernel. To list the available TCP stacks, see .Va functions_available in the .Sx MIB Variables section further down. To list the default TCP stack, see .Va functions_default in the .Sx MIB Variables section. .It Dv TCP_KEEPINIT This .Xr setsockopt 2 option accepts a per-socket timeout argument of .Vt "u_int" in seconds, for new, non-established .Tn TCP connections. For the global default in milliseconds see .Va keepinit in the .Sx MIB Variables section further down. .It Dv TCP_KEEPIDLE This .Xr setsockopt 2 option accepts an argument of .Vt "u_int" for the amount of time, in seconds, that the connection must be idle before keepalive probes (if enabled) are sent for the connection of this socket. If set on a listening socket, the value is inherited by the newly created socket upon .Xr accept 2 . For the global default in milliseconds see .Va keepidle in the .Sx MIB Variables section further down. .It Dv TCP_KEEPINTVL This .Xr setsockopt 2 option accepts an argument of .Vt "u_int" to set the per-socket interval, in seconds, between keepalive probes sent to a peer. If set on a listening socket, the value is inherited by the newly created socket upon .Xr accept 2 . For the global default in milliseconds see .Va keepintvl in the .Sx MIB Variables section further down. .It Dv TCP_KEEPCNT This .Xr setsockopt 2 option accepts an argument of .Vt "u_int" and allows a per-socket tuning of the number of probes sent, with no response, before the connection will be dropped. If set on a listening socket, the value is inherited by the newly created socket upon .Xr accept 2 . For the global default see the .Va keepcnt in the .Sx MIB Variables section further down. .It Dv TCP_NODELAY Under most circumstances, .Tn TCP sends data when it is presented; when outstanding data has not yet been acknowledged, it gathers small amounts of output to be sent in a single packet once an acknowledgement is received. For a small number of clients, such as window systems that send a stream of mouse events which receive no replies, this packetization may cause significant delays. The boolean option .Dv TCP_NODELAY defeats this algorithm. .It Dv TCP_MAXSEG By default, a sender- and .No receiver- Ns Tn TCP will negotiate among themselves to determine the maximum segment size to be used for each connection. The .Dv TCP_MAXSEG option allows the user to determine the result of this negotiation, and to reduce it if desired. .It Dv TCP_NOOPT .Tn TCP usually sends a number of options in each packet, corresponding to various .Tn TCP extensions which are provided in this implementation. The boolean option .Dv TCP_NOOPT is provided to disable .Tn TCP option use on a per-connection basis. .It Dv TCP_NOPUSH By convention, the .No sender- Ns Tn TCP will set the .Dq push bit, and begin transmission immediately (if permitted) at the end of every user call to .Xr write 2 or .Xr writev 2 . When this option is set to a non-zero value, .Tn TCP will delay sending any data at all until either the socket is closed, or the internal send buffer is filled. .It Dv TCP_MD5SIG This option enables the use of MD5 digests (also known as TCP-MD5) on writes to the specified socket. Outgoing traffic is digested; -digests on incoming traffic are verified if the -.Va net.inet.tcp.signature_verify_input -sysctl is nonzero. -The current default behavior for the system is to respond to a system -advertising this option with TCP-MD5; this may change. +digests on incoming traffic are verified. +When this option is enabled on a socket, all inbound and outgoing +TCP segments must be signed with MD5 digests. .Pp One common use for this in a .Fx router deployment is to enable based routers to interwork with Cisco equipment at peering points. Support for this feature conforms to RFC 2385. -Only IPv4 -.Pq Dv AF_INET -sessions are supported. .Pp In order for this option to function correctly, it is necessary for the administrator to add a tcp-md5 key entry to the system's security associations database (SADB) using the .Xr setkey 8 utility. -This entry must have an SPI of 0x1000 and can therefore only be specified -on a per-host basis at this time. +This entry can only be specified on a per-host basis at this time. .Pp -If an SADB entry cannot be found for the destination, the outgoing traffic -will have an invalid digest option prepended, and the following error message -will be visible on the system console: -.Em "tcp_signature_compute: SADB lookup failed for %d.%d.%d.%d" . +If an SADB entry cannot be found for the destination, +the system does not send any outgoing segments and drops any inbound segments. +.Pp +Each dropped segment is taken into account in the TCP protocol statistics. .El .Pp The option level for the .Xr setsockopt 2 call is the protocol number for .Tn TCP , available from .Xr getprotobyname 3 , or .Dv IPPROTO_TCP . All options are declared in .In netinet/tcp.h . .Pp Options at the .Tn IP transport level may be used with .Tn TCP ; see .Xr ip 4 . Incoming connection requests that are source-routed are noted, and the reverse source route is used in responding. .Pp The default congestion control algorithm for .Tn TCP is .Xr cc_newreno 4 . Other congestion control algorithms can be made available using the .Xr mod_cc 4 framework. .Ss MIB Variables The .Tn TCP protocol implements a number of variables in the .Va net.inet.tcp branch of the .Xr sysctl 3 MIB. .Bl -tag -width ".Va TCPCTL_DO_RFC1323" .It Dv TCPCTL_DO_RFC1323 .Pq Va rfc1323 Implement the window scaling and timestamp options of RFC 1323 (default is true). .It Dv TCPCTL_MSSDFLT .Pq Va mssdflt The default value used for the maximum segment size .Pq Dq MSS when no advice to the contrary is received from MSS negotiation. .It Dv TCPCTL_SENDSPACE .Pq Va sendspace Maximum .Tn TCP send window. .It Dv TCPCTL_RECVSPACE .Pq Va recvspace Maximum .Tn TCP receive window. .It Va log_in_vain Log any connection attempts to ports where there is not a socket accepting connections. The value of 1 limits the logging to .Tn SYN (connection establishment) packets only. That of 2 results in any .Tn TCP packets to closed ports being logged. Any value unlisted above disables the logging (default is 0, i.e., the logging is disabled). .It Va msl The Maximum Segment Lifetime, in milliseconds, for a packet. .It Va keepinit Timeout, in milliseconds, for new, non-established .Tn TCP connections. The default is 75000 msec. .It Va keepidle Amount of time, in milliseconds, that the connection must be idle before keepalive probes (if enabled) are sent. The default is 7200000 msec (2 hours). .It Va keepintvl The interval, in milliseconds, between keepalive probes sent to remote machines, when no response is received on a .Va keepidle probe. The default is 75000 msec. .It Va keepcnt Number of probes sent, with no response, before a connection is dropped. The default is 8 packets. .It Va always_keepalive Assume that .Dv SO_KEEPALIVE is set on all .Tn TCP connections, the kernel will periodically send a packet to the remote host to verify the connection is still up. .It Va icmp_may_rst Certain .Tn ICMP unreachable messages may abort connections in .Tn SYN-SENT state. .It Va do_tcpdrain Flush packets in the .Tn TCP reassembly queue if the system is low on mbufs. .It Va blackhole If enabled, disable sending of RST when a connection is attempted to a port where there is not a socket accepting connections. See .Xr blackhole 4 . .It Va delayed_ack Delay ACK to try and piggyback it onto a data packet. .It Va delacktime Maximum amount of time, in milliseconds, before a delayed ACK is sent. .It Va path_mtu_discovery Enable Path MTU Discovery. .It Va tcbhashsize Size of the .Tn TCP control-block hash table (read-only). This may be tuned using the kernel option .Dv TCBHASHSIZE or by setting .Va net.inet.tcp.tcbhashsize in the .Xr loader 8 . .It Va pcbcount Number of active process control blocks (read-only). .It Va syncookies Determines whether or not .Tn SYN cookies should be generated for outbound .Tn SYN-ACK packets. .Tn SYN cookies are a great help during .Tn SYN flood attacks, and are enabled by default. (See .Xr syncookies 4 . ) .It Va isn_reseed_interval The interval (in seconds) specifying how often the secret data used in RFC 1948 initial sequence number calculations should be reseeded. By default, this variable is set to zero, indicating that no reseeding will occur. Reseeding should not be necessary, and will break .Dv TIME_WAIT recycling for a few minutes. .It Va rexmit_min , rexmit_slop Adjust the retransmit timer calculation for .Tn TCP . The slop is typically added to the raw calculation to take into account occasional variances that the .Tn SRTT (smoothed round-trip time) is unable to accommodate, while the minimum specifies an absolute minimum. While a number of .Tn TCP RFCs suggest a 1 second minimum, these RFCs tend to focus on streaming behavior, and fail to deal with the fact that a 1 second minimum has severe detrimental effects over lossy interactive connections, such as a 802.11b wireless link, and over very fast but lossy connections for those cases not covered by the fast retransmit code. For this reason, we use 200ms of slop and a near-0 minimum, which gives us an effective minimum of 200ms (similar to .Tn Linux ) . .It Va initcwnd_segments Enable the ability to specify initial congestion window in number of segments. The default value is 10 as suggested by RFC 6928. Changing the value on fly would not affect connections using congestion window from the hostcache. Caution: This regulates the burst of packets allowed to be sent in the first RTT. The value should be relative to the link capacity. Start with small values for lower-capacity links. Large bursts can cause buffer overruns and packet drops if routers have small buffers or the link is experiencing congestion. .It Va rfc3042 Enable the Limited Transmit algorithm as described in RFC 3042. It helps avoid timeouts on lossy links and also when the congestion window is small, as happens on short transfers. .It Va rfc3390 Enable support for RFC 3390, which allows for a variable-sized starting congestion window on new connections, depending on the maximum segment size. This helps throughput in general, but particularly affects short transfers and high-bandwidth large propagation-delay connections. .It Va sack.enable Enable support for RFC 2018, TCP Selective Acknowledgment option, which allows the receiver to inform the sender about all successfully arrived segments, allowing the sender to retransmit the missing segments only. .It Va sack.maxholes Maximum number of SACK holes per connection. Defaults to 128. .It Va sack.globalmaxholes Maximum number of SACK holes per system, across all connections. Defaults to 65536. .It Va maxtcptw When a TCP connection enters the .Dv TIME_WAIT state, its associated socket structure is freed, since it is of negligible size and use, and a new structure is allocated to contain a minimal amount of information necessary for sustaining a connection in this state, called the compressed TCP TIME_WAIT state. Since this structure is smaller than a socket structure, it can save a significant amount of system memory. The .Va net.inet.tcp.maxtcptw MIB variable controls the maximum number of these structures allocated. By default, it is initialized to .Va kern.ipc.maxsockets / 5. .It Va nolocaltimewait Suppress creating of compressed TCP TIME_WAIT states for connections in which both endpoints are local. .It Va fast_finwait2_recycle Recycle .Tn TCP .Dv FIN_WAIT_2 connections faster when the socket is marked as .Dv SBS_CANTRCVMORE (no user process has the socket open, data received on the socket cannot be read). The timeout used here is .Va finwait2_timeout . .It Va finwait2_timeout Timeout to use for fast recycling of .Tn TCP .Dv FIN_WAIT_2 connections. Defaults to 60 seconds. .It Va ecn.enable Enable support for TCP Explicit Congestion Notification (ECN). ECN allows a TCP sender to reduce the transmission rate in order to avoid packet drops. Settings: .Bl -tag -compact .It 0 Disable ECN. .It 1 Allow incoming connections to request ECN. Outgoing connections will request ECN. .It 2 Allow incoming connections to request ECN. Outgoing connections will not request ECN. .El .It Va ecn.maxretries Number of retries (SYN or SYN/ACK retransmits) before disabling ECN on a specific connection. This is needed to help with connection establishment when a broken firewall is in the network path. .It Va pmtud_blackhole_detection Turn on automatic path MTU blackhole detection. In case of retransmits OS will lower the MSS to check if it's MTU problem. If current MSS is greater than configured value to try, it will be set to configured value, otherwise, MSS will be set to default values .Po Va net.inet.tcp.mssdflt and .Va net.inet.tcp.v6mssdflt .Pc . .It Va pmtud_blackhole_mss MSS to try for IPv4 if PMTU blackhole detection is turned on. .It Va v6pmtud_blackhole_mss MSS to try for IPv6 if PMTU blackhole detection is turned on. .It Va pmtud_blackhole_activated Number of times configured values were used in an attempt to downshift. .It Va pmtud_blackhole_activated_min_mss Number of times default MSS was used in an attempt to downshift. .It Va pmtud_blackhole_failed Number of connections for which retransmits continued even after MSS downshift. .It Va functions_available List of available TCP function blocks (TCP stacks). .It Va functions_default The default TCP function block (TCP stack). .It Va functions_inherit_listen_socket_stack Determines whether to inherit listen socket's tcp stack or use the current system default tcp stack, as defined by .Va functions_default .Pc . Default is true. .It Va insecure_rst Use criteria defined in RFC793 instead of RFC5961 for accepting RST segments. Default is false. .It Va insecure_syn Use criteria defined in RFC793 instead of RFC5961 for accepting SYN segments. Default is false. .El .Sh ERRORS A socket operation may fail with one of the following errors returned: .Bl -tag -width Er .It Bq Er EISCONN when trying to establish a connection on a socket which already has one; .It Bq Er ENOBUFS when the system runs out of memory for an internal data structure; .It Bq Er ETIMEDOUT when a connection was dropped due to excessive retransmissions; .It Bq Er ECONNRESET when the remote peer forces the connection to be closed; .It Bq Er ECONNREFUSED when the remote peer actively refuses connection establishment (usually because no process is listening to the port); .It Bq Er EADDRINUSE when an attempt is made to create a socket with a port which has already been allocated; .It Bq Er EADDRNOTAVAIL when an attempt is made to create a socket with a network address for which no network interface exists; .It Bq Er EAFNOSUPPORT when an attempt is made to bind or connect a socket to a multicast address. .It Bq Er EINVAL when trying to change TCP function blocks at an invalid point in the session; .It Bq Er ENOENT when trying to use a TCP function block that is not available; .El .Sh SEE ALSO .Xr getsockopt 2 , .Xr socket 2 , .Xr sysctl 3 , .Xr blackhole 4 , .Xr inet 4 , .Xr intro 4 , .Xr ip 4 , .Xr mod_cc 4 , .Xr siftr 4 , .Xr syncache 4 , .Xr setkey 8 , .Xr tcp_functions 9 .Rs .%A "V. Jacobson" .%A "R. Braden" .%A "D. Borman" .%T "TCP Extensions for High Performance" .%O "RFC 1323" .Re .Rs .%A "A. Heffernan" .%T "Protection of BGP Sessions via the TCP MD5 Signature Option" .%O "RFC 2385" .Re .Rs .%A "K. Ramakrishnan" .%A "S. Floyd" .%A "D. Black" .%T "The Addition of Explicit Congestion Notification (ECN) to IP" .%O "RFC 3168" .Re .Sh HISTORY The .Tn TCP protocol appeared in .Bx 4.2 . The RFC 1323 extensions for window scaling and timestamps were added in .Bx 4.4 . The .Dv TCP_INFO option was introduced in .Tn Linux 2.6 and is .Em subject to change . diff --git a/share/man/man4/udp.4 b/share/man/man4/udp.4 index 2f828bf76a95..3d869c38339b 100644 --- a/share/man/man4/udp.4 +++ b/share/man/man4/udp.4 @@ -1,166 +1,178 @@ .\" Copyright (c) 1983, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)udp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd June 5, 1993 +.Dd February 6, 2017 .Dt UDP 4 .Os .Sh NAME .Nm udp .Nd Internet User Datagram Protocol .Sh SYNOPSIS .In sys/types.h .In sys/socket.h .In netinet/in.h .Ft int .Fn socket AF_INET SOCK_DGRAM 0 .Sh DESCRIPTION .Tn UDP is a simple, unreliable datagram protocol which is used to support the .Dv SOCK_DGRAM abstraction for the Internet protocol family. .Tn UDP sockets are connectionless, and are normally used with the .Xr sendto 2 and .Xr recvfrom 2 calls, though the .Xr connect 2 call may also be used to fix the destination for future packets (in which case the .Xr recv 2 or .Xr read 2 and .Xr send 2 or .Xr write 2 system calls may be used). .Pp .Tn UDP address formats are identical to those used by .Tn TCP . In particular .Tn UDP provides a port identifier in addition to the normal Internet address format. Note that the .Tn UDP port space is separate from the .Tn TCP port space (i.e., a .Tn UDP port may not be .Dq connected to a .Tn TCP port). In addition broadcast packets may be sent (assuming the underlying network supports this) by using a reserved .Dq broadcast address ; this address is network interface dependent. .Pp Options at the .Tn IP transport level may be used with .Tn UDP ; see .Xr ip 4 . +.Tn UDP_ENCAP +socket option may be used at the +.Tn IPPROTO_UDP +level to encapsulate +.Tn ESP +packets in +.Tn UDP . +Only one value is supported for this option: +.Tn UDP_ENCAP_ESPINUDP +from RFC 3948, defined in +.In netinet/udp.h . .Sh MIB VARIABLES The .Nm protocol implements a number of variables in the .Li net.inet branch of the .Xr sysctl 3 MIB. .Bl -tag -width UDPCTL_RECVSPACEX .It UDPCTL_CHECKSUM .Pq udp.checksum Enable udp checksums (enabled by default). .It UDPCTL_MAXDGRAM .Pq udp.maxdgram Maximum outgoing UDP datagram size .It UDPCTL_RECVSPACE .Pq udp.recvspace Maximum space for incoming UDP datagrams .It udp.log_in_vain For all udp datagrams, to ports on which there is no socket listening, log the connection attempt (disabled by default). .It udp.blackhole When a datagram is received on a port where there is no socket listening, do not return an ICMP port unreachable message. (Disabled by default. See .Xr blackhole 4 . ) .El .Sh ERRORS A socket operation may fail with one of the following errors returned: .Bl -tag -width Er .It Bq Er EISCONN when trying to establish a connection on a socket which already has one, or when trying to send a datagram with the destination address specified and the socket is already connected; .It Bq Er ENOTCONN when trying to send a datagram, but no destination address is specified, and the socket has not been connected; .It Bq Er ENOBUFS when the system runs out of memory for an internal data structure; .It Bq Er EADDRINUSE when an attempt is made to create a socket with a port which has already been allocated; .It Bq Er EADDRNOTAVAIL when an attempt is made to create a socket with a network address for which no network interface exists. .El .Sh SEE ALSO .Xr getsockopt 2 , .Xr recv 2 , .Xr send 2 , .Xr socket 2 , .Xr blackhole 4 , .Xr inet 4 , .Xr intro 4 , -.Xr ip 4 +.Xr ip 4 , +.Xr udplite 4 .Sh HISTORY The .Nm protocol appeared in .Bx 4.2 . diff --git a/sys/conf/NOTES b/sys/conf/NOTES index ed8566acacea..a8a6d03d8924 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1,3071 +1,3072 @@ # $FreeBSD$ # # NOTES -- Lines that can be cut/pasted into kernel and hints configs. # # Lines that begin with 'device', 'options', 'machine', 'ident', 'maxusers', # 'makeoptions', 'hints', etc. go into the kernel configuration that you # run config(8) with. # # Lines that begin with 'hint.' are NOT for config(8), they go into your # hints file. See /boot/device.hints and/or the 'hints' config(8) directive. # # Please use ``make LINT'' to create an old-style LINT file if you want to # do kernel test-builds. # # This file contains machine independent kernel configuration notes. For # machine dependent notes, look in /sys//conf/NOTES. # # # NOTES conventions and style guide: # # Large block comments should begin and end with a line containing only a # comment character. # # To describe a particular object, a block comment (if it exists) should # come first. Next should come device, options, and hints lines in that # order. All device and option lines must be described by a comment that # doesn't just expand the device or option name. Use only a concise # comment on the same line if possible. Very detailed descriptions of # devices and subsystems belong in man pages. # # A space followed by a tab separates 'options' from an option name. Two # spaces followed by a tab separate 'device' from a device name. Comments # after an option or device should use one space after the comment character. # To comment out a negative option that disables code and thus should not be # enabled for LINT builds, precede 'options' with "#!". # # # This is the ``identification'' of the kernel. Usually this should # be the same as the name of your kernel. # ident LINT # # The `maxusers' parameter controls the static sizing of a number of # internal system tables by a formula defined in subr_param.c. # Omitting this parameter or setting it to 0 will cause the system to # auto-size based on physical memory. # maxusers 10 # To statically compile in device wiring instead of /boot/device.hints #hints "LINT.hints" # Default places to look for devices. # Use the following to compile in values accessible to the kernel # through getenv() (or kenv(1) in userland). The format of the file # is 'variable=value', see kenv(1) # #env "LINT.env" # # The `makeoptions' parameter allows variables to be passed to the # generated Makefile in the build area. # # CONF_CFLAGS gives some extra compiler flags that are added to ${CFLAGS} # after most other flags. Here we use it to inhibit use of non-optimal # gcc built-in functions (e.g., memcmp). # # DEBUG happens to be magic. # The following is equivalent to 'config -g KERNELNAME' and creates # 'kernel.debug' compiled with -g debugging as well as a normal # 'kernel'. Use 'make install.debug' to install the debug kernel # but that isn't normally necessary as the debug symbols are not loaded # by the kernel and are not useful there anyway. # # KERNEL can be overridden so that you can change the default name of your # kernel. # # MODULES_OVERRIDE can be used to limit modules built to a specific list. # makeoptions CONF_CFLAGS=-fno-builtin #Don't allow use of memcmp, etc. #makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols #makeoptions KERNEL=foo #Build kernel "foo" and install "/foo" # Only build ext2fs module plus those parts of the sound system I need. #makeoptions MODULES_OVERRIDE="ext2fs sound/sound sound/driver/maestro3" makeoptions DESTDIR=/tmp # # FreeBSD processes are subject to certain limits to their consumption # of system resources. See getrlimit(2) for more details. Each # resource limit has two values, a "soft" limit and a "hard" limit. # The soft limits can be modified during normal system operation, but # the hard limits are set at boot time. Their default values are # in sys//include/vmparam.h. There are two ways to change them: # # 1. Set the values at kernel build time. The options below are one # way to allow that limit to grow to 1GB. They can be increased # further by changing the parameters: # # 2. In /boot/loader.conf, set the tunables kern.maxswzone, # kern.maxbcache, kern.maxtsiz, kern.dfldsiz, kern.maxdsiz, # kern.dflssiz, kern.maxssiz and kern.sgrowsiz. # # The options in /boot/loader.conf override anything in the kernel # configuration file. See the function init_param1 in # sys/kern/subr_param.c for more details. # options MAXDSIZ=(1024UL*1024*1024) options MAXSSIZ=(128UL*1024*1024) options DFLDSIZ=(1024UL*1024*1024) # # BLKDEV_IOSIZE sets the default block size used in user block # device I/O. Note that this value will be overridden by the label # when specifying a block device from a label with a non-0 # partition blocksize. The default is PAGE_SIZE. # options BLKDEV_IOSIZE=8192 # # MAXPHYS and DFLTPHYS # # These are the maximal and safe 'raw' I/O block device access sizes. # Reads and writes will be split into MAXPHYS chunks for known good # devices and DFLTPHYS for the rest. Some applications have better # performance with larger raw I/O access sizes. Note that certain VM # parameters are derived from these values and making them too large # can make an unbootable kernel. # # The defaults are 64K and 128K respectively. options DFLTPHYS=(64*1024) options MAXPHYS=(128*1024) # This allows you to actually store this configuration file into # the kernel binary itself. See config(8) for more details. # options INCLUDE_CONFIG_FILE # Include this file in kernel # # Compile-time defaults for various boot parameters # options BOOTVERBOSE=1 options BOOTHOWTO=RB_MULTIPLE options GEOM_AES # Don't use, use GEOM_BDE options GEOM_BDE # Disk encryption. options GEOM_BSD # BSD disklabels options GEOM_CACHE # Disk cache. options GEOM_CONCAT # Disk concatenation. options GEOM_ELI # Disk encryption. options GEOM_FOX # Redundant path mitigation options GEOM_GATE # Userland services. options GEOM_JOURNAL # Journaling. options GEOM_LABEL # Providers labelization. options GEOM_LINUX_LVM # Linux LVM2 volumes options GEOM_MAP # Map based partitioning options GEOM_MBR # DOS/MBR partitioning options GEOM_MIRROR # Disk mirroring. options GEOM_MULTIPATH # Disk multipath options GEOM_NOP # Test class. options GEOM_PART_APM # Apple partitioning options GEOM_PART_BSD # BSD disklabel options GEOM_PART_BSD64 # BSD disklabel64 options GEOM_PART_EBR # Extended Boot Records options GEOM_PART_EBR_COMPAT # Backward compatible partition names options GEOM_PART_GPT # GPT partitioning options GEOM_PART_LDM # Logical Disk Manager options GEOM_PART_MBR # MBR partitioning options GEOM_PART_VTOC8 # SMI VTOC8 disk label options GEOM_RAID # Soft RAID functionality. options GEOM_RAID3 # RAID3 functionality. options GEOM_SHSEC # Shared secret. options GEOM_STRIPE # Disk striping. options GEOM_SUNLABEL # Sun/Solaris partitioning options GEOM_UZIP # Read-only compressed disks options GEOM_VINUM # Vinum logical volume manager options GEOM_VIRSTOR # Virtual storage. options GEOM_VOL # Volume names from UFS superblock options GEOM_ZERO # Performance testing helper. # # The root device and filesystem type can be compiled in; # this provides a fallback option if the root device cannot # be correctly guessed by the bootstrap code, or an override if # the RB_DFLTROOT flag (-r) is specified when booting the kernel. # options ROOTDEVNAME=\"ufs:da0s2e\" ##################################################################### # Scheduler options: # # Specifying one of SCHED_4BSD or SCHED_ULE is mandatory. These options # select which scheduler is compiled in. # # SCHED_4BSD is the historical, proven, BSD scheduler. It has a global run # queue and no CPU affinity which makes it suboptimal for SMP. It has very # good interactivity and priority selection. # # SCHED_ULE provides significant performance advantages over 4BSD on many # workloads on SMP machines. It supports cpu-affinity, per-cpu runqueues # and scheduler locks. It also has a stronger notion of interactivity # which leads to better responsiveness even on uniprocessor machines. This # is the default scheduler. # # SCHED_STATS is a debugging option which keeps some stats in the sysctl # tree at 'kern.sched.stats' and is useful for debugging scheduling decisions. # options SCHED_4BSD options SCHED_STATS #options SCHED_ULE ##################################################################### # SMP OPTIONS: # # SMP enables building of a Symmetric MultiProcessor Kernel. # Mandatory: options SMP # Symmetric MultiProcessor Kernel # EARLY_AP_STARTUP releases the Application Processors earlier in the # kernel startup process (before devices are probed) rather than at the # end. This is a temporary option for use during the transition from # late to early AP startup. options EARLY_AP_STARTUP # MAXCPU defines the maximum number of CPUs that can boot in the system. # A default value should be already present, for every architecture. options MAXCPU=32 # MAXMEMDOM defines the maximum number of memory domains that can boot in the # system. A default value should already be defined by every architecture. options MAXMEMDOM=2 # VM_NUMA_ALLOC enables use of memory domain-aware allocation in the VM # system. options VM_NUMA_ALLOC # DEVICE_NUMA enables reporting of domain affinity of I/O devices via # bus_get_domain(), etc. options DEVICE_NUMA # ADAPTIVE_MUTEXES changes the behavior of blocking mutexes to spin # if the thread that currently owns the mutex is executing on another # CPU. This behavior is enabled by default, so this option can be used # to disable it. options NO_ADAPTIVE_MUTEXES # ADAPTIVE_RWLOCKS changes the behavior of reader/writer locks to spin # if the thread that currently owns the rwlock is executing on another # CPU. This behavior is enabled by default, so this option can be used # to disable it. options NO_ADAPTIVE_RWLOCKS # ADAPTIVE_SX changes the behavior of sx locks to spin if the thread that # currently owns the sx lock is executing on another CPU. # This behavior is enabled by default, so this option can be used to # disable it. options NO_ADAPTIVE_SX # MUTEX_NOINLINE forces mutex operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options MUTEX_NOINLINE # RWLOCK_NOINLINE forces rwlock operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options RWLOCK_NOINLINE # SX_NOINLINE forces sx lock operations to call functions to perform each # operation rather than inlining the simple cases. This can be used to # shrink the size of the kernel text segment. Note that this behavior is # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING, # and WITNESS options. options SX_NOINLINE # SMP Debugging Options: # # CALLOUT_PROFILING enables rudimentary profiling of the callwheel data # structure used as backend in callout(9). # PREEMPTION allows the threads that are in the kernel to be preempted by # higher priority [interrupt] threads. It helps with interactivity # and allows interrupt threads to run sooner rather than waiting. # WARNING! Only tested on amd64 and i386. # FULL_PREEMPTION instructs the kernel to preempt non-realtime kernel # threads. Its sole use is to expose race conditions and other # bugs during development. Enabling this option will reduce # performance and increase the frequency of kernel panics by # design. If you aren't sure that you need it then you don't. # Relies on the PREEMPTION option. DON'T TURN THIS ON. # SLEEPQUEUE_PROFILING enables rudimentary profiling of the hash table # used to hold active sleep queues as well as sleep wait message # frequency. # TURNSTILE_PROFILING enables rudimentary profiling of the hash table # used to hold active lock queues. # UMTX_PROFILING enables rudimentary profiling of the hash table used to hold active lock queues. # WITNESS enables the witness code which detects deadlocks and cycles # during locking operations. # WITNESS_KDB causes the witness code to drop into the kernel debugger if # a lock hierarchy violation occurs or if locks are held when going to # sleep. # WITNESS_SKIPSPIN disables the witness checks on spin mutexes. options PREEMPTION options FULL_PREEMPTION options WITNESS options WITNESS_KDB options WITNESS_SKIPSPIN # LOCK_PROFILING - Profiling locks. See LOCK_PROFILING(9) for details. options LOCK_PROFILING # Set the number of buffers and the hash size. The hash size MUST be larger # than the number of buffers. Hash size should be prime. options MPROF_BUFFERS="1536" options MPROF_HASH_SIZE="1543" # Profiling for the callout(9) backend. options CALLOUT_PROFILING # Profiling for internal hash tables. options SLEEPQUEUE_PROFILING options TURNSTILE_PROFILING options UMTX_PROFILING ##################################################################### # COMPATIBILITY OPTIONS # # Implement system calls compatible with 4.3BSD and older versions of # FreeBSD. You probably do NOT want to remove this as much current code # still relies on the 4.3 emulation. Note that some architectures that # are supported by FreeBSD do not include support for certain important # aspects of this compatibility option, namely those related to the # signal delivery mechanism. # options COMPAT_43 # Old tty interface. options COMPAT_43TTY # Note that as a general rule, COMPAT_FREEBSD depends on # COMPAT_FREEBSD, COMPAT_FREEBSD, etc. # Enable FreeBSD4 compatibility syscalls options COMPAT_FREEBSD4 # Enable FreeBSD5 compatibility syscalls options COMPAT_FREEBSD5 # Enable FreeBSD6 compatibility syscalls options COMPAT_FREEBSD6 # Enable FreeBSD7 compatibility syscalls options COMPAT_FREEBSD7 # Enable FreeBSD9 compatibility syscalls options COMPAT_FREEBSD9 # Enable FreeBSD10 compatibility syscalls options COMPAT_FREEBSD10 # Enable FreeBSD11 compatibility syscalls options COMPAT_FREEBSD11 # Enable Linux Kernel Programming Interface options COMPAT_LINUXKPI # # These three options provide support for System V Interface # Definition-style interprocess communication, in the form of shared # memory, semaphores, and message queues, respectively. # options SYSVSHM options SYSVSEM options SYSVMSG ##################################################################### # DEBUGGING OPTIONS # # Compile with kernel debugger related code. # options KDB # # Print a stack trace of the current thread on the console for a panic. # options KDB_TRACE # # Don't enter the debugger for a panic. Intended for unattended operation # where you may want to enter the debugger from the console, but still want # the machine to recover from a panic. # options KDB_UNATTENDED # # Enable the ddb debugger backend. # options DDB # # Print the numerical value of symbols in addition to the symbolic # representation. # options DDB_NUMSYM # # Enable the remote gdb debugger backend. # options GDB # # SYSCTL_DEBUG enables a 'sysctl' debug tree that can be used to dump the # contents of the registered sysctl nodes on the console. It is disabled by # default because it generates excessively verbose console output that can # interfere with serial console operation. # options SYSCTL_DEBUG # # Enable textdump by default, this disables kernel core dumps. # options TEXTDUMP_PREFERRED # # Enable extra debug messages while performing textdumps. # options TEXTDUMP_VERBOSE # # NO_SYSCTL_DESCR omits the sysctl node descriptions to save space in the # resulting kernel. options NO_SYSCTL_DESCR # # MALLOC_DEBUG_MAXZONES enables multiple uma zones for malloc(9) # allocations that are smaller than a page. The purpose is to isolate # different malloc types into hash classes, so that any buffer # overruns or use-after-free will usually only affect memory from # malloc types in that hash class. This is purely a debugging tool; # by varying the hash function and tracking which hash class was # corrupted, the intersection of the hash classes from each instance # will point to a single malloc type that is being misused. At this # point inspection or memguard(9) can be used to catch the offending # code. # options MALLOC_DEBUG_MAXZONES=8 # # DEBUG_MEMGUARD builds and enables memguard(9), a replacement allocator # for the kernel used to detect modify-after-free scenarios. See the # memguard(9) man page for more information on usage. # options DEBUG_MEMGUARD # # DEBUG_REDZONE enables buffer underflows and buffer overflows detection for # malloc(9). # options DEBUG_REDZONE # # EARLY_PRINTF enables support for calling a special printf (eprintf) # very early in the kernel (before cn_init() has been called). This # should only be used for debugging purposes early in boot. Normally, # it is not defined. It is commented out here because this feature # isn't generally available. And the required eputc() isn't defined. # #options EARLY_PRINTF # # KTRACE enables the system-call tracing facility ktrace(2). To be more # SMP-friendly, KTRACE uses a worker thread to process most trace events # asynchronously to the thread generating the event. This requires a # pre-allocated store of objects representing trace events. The # KTRACE_REQUEST_POOL option specifies the initial size of this store. # The size of the pool can be adjusted both at boottime and runtime via # the kern.ktrace_request_pool tunable and sysctl. # options KTRACE #kernel tracing options KTRACE_REQUEST_POOL=101 # # KTR is a kernel tracing facility imported from BSD/OS. It is # enabled with the KTR option. KTR_ENTRIES defines the number of # entries in the circular trace buffer; it may be an arbitrary number. # KTR_BOOT_ENTRIES defines the number of entries during the early boot, # before malloc(9) is functional. # KTR_COMPILE defines the mask of events to compile into the kernel as # defined by the KTR_* constants in . KTR_MASK defines the # initial value of the ktr_mask variable which determines at runtime # what events to trace. KTR_CPUMASK determines which CPU's log # events, with bit X corresponding to CPU X. The layout of the string # passed as KTR_CPUMASK must match a series of bitmasks each of them # separated by the "," character (ie: # KTR_CPUMASK=0xAF,0xFFFFFFFFFFFFFFFF). KTR_VERBOSE enables # dumping of KTR events to the console by default. This functionality # can be toggled via the debug.ktr_verbose sysctl and defaults to off # if KTR_VERBOSE is not defined. See ktr(4) and ktrdump(8) for details. # options KTR options KTR_BOOT_ENTRIES=1024 options KTR_ENTRIES=(128*1024) options KTR_COMPILE=(KTR_ALL) options KTR_MASK=KTR_INTR options KTR_CPUMASK=0x3 options KTR_VERBOSE # # ALQ(9) is a facility for the asynchronous queuing of records from the kernel # to a vnode, and is employed by services such as ktr(4) to produce trace # files based on a kernel event stream. Records are written asynchronously # in a worker thread. # options ALQ options KTR_ALQ # # The INVARIANTS option is used in a number of source files to enable # extra sanity checking of internal structures. This support is not # enabled by default because of the extra time it would take to check # for these conditions, which can only occur as a result of # programming errors. # options INVARIANTS # # The INVARIANT_SUPPORT option makes us compile in support for # verifying some of the internal structures. It is a prerequisite for # 'INVARIANTS', as enabling 'INVARIANTS' will make these functions be # called. The intent is that you can set 'INVARIANTS' for single # source files (by changing the source file or specifying it on the # command line) if you have 'INVARIANT_SUPPORT' enabled. Also, if you # wish to build a kernel module with 'INVARIANTS', then adding # 'INVARIANT_SUPPORT' to your kernel will provide all the necessary # infrastructure without the added overhead. # options INVARIANT_SUPPORT # # The DIAGNOSTIC option is used to enable extra debugging information # from some parts of the kernel. As this makes everything more noisy, # it is disabled by default. # options DIAGNOSTIC # # REGRESSION causes optional kernel interfaces necessary only for regression # testing to be enabled. These interfaces may constitute security risks # when enabled, as they permit processes to easily modify aspects of the # run-time environment to reproduce unlikely or unusual (possibly normally # impossible) scenarios. # options REGRESSION # # This option lets some drivers co-exist that can't co-exist in a running # system. This is used to be able to compile all kernel code in one go for # quality assurance purposes (like this file, which the option takes it name # from.) # options COMPILING_LINT # # STACK enables the stack(9) facility, allowing the capture of kernel stack # for the purpose of procinfo(1), etc. stack(9) will also be compiled in # automatically if DDB(4) is compiled into the kernel. # options STACK # # The NUM_CORE_FILES option specifies the limit for the number of core # files generated by a particular process, when the core file format # specifier includes the %I pattern. Since we only have 1 character for # the core count in the format string, meaning the range will be 0-9, the # maximum value allowed for this option is 10. # This core file limit can be adjusted at runtime via the debug.ncores # sysctl. # options NUM_CORE_FILES=5 ##################################################################### # PERFORMANCE MONITORING OPTIONS # # The hwpmc driver that allows the use of in-CPU performance monitoring # counters for performance monitoring. The base kernel needs to be configured # with the 'options' line, while the hwpmc device can be either compiled # in or loaded as a loadable kernel module. # # Additional configuration options may be required on specific architectures, # please see hwpmc(4). device hwpmc # Driver (also a loadable module) options HWPMC_DEBUG options HWPMC_HOOKS # Other necessary kernel hooks ##################################################################### # NETWORKING OPTIONS # # Protocol families # options INET #Internet communications protocols options INET6 #IPv6 communications protocols options RATELIMIT # TX rate limiting support options ROUTETABLES=2 # allocated fibs up to 65536. default is 1. # but that would be a bad idea as they are large. options TCP_OFFLOAD # TCP offload support. # In order to enable IPSEC you MUST also add device crypto to # your kernel configuration options IPSEC #IP security (requires device crypto) + +# Option IPSEC_SUPPORT does not enable IPsec, but makes it possible to +# load it as a kernel module. You still MUST add device crypto to your kernel +# configuration. +options IPSEC_SUPPORT #options IPSEC_DEBUG #debug for IP security -# -# Set IPSEC_NAT_T to enable NAT-Traversal support. This enables -# optional UDP encapsulation of ESP packets. -# -options IPSEC_NAT_T #NAT-T support, UDP encap of ESP # # SMB/CIFS requester # NETSMB enables support for SMB protocol, it requires LIBMCHAIN and LIBICONV # options. options NETSMB #SMB/CIFS requester # mchain library. It can be either loaded as KLD or compiled into kernel options LIBMCHAIN # libalias library, performing NAT options LIBALIAS # flowtable cache options FLOWTABLE # # SCTP is a NEW transport protocol defined by # RFC2960 updated by RFC3309 and RFC3758.. and # soon to have a new base RFC and many many more # extensions. This release supports all the extensions # including many drafts (most about to become RFC's). # It is the reference implementation of SCTP # and is quite well tested. # # Note YOU MUST have both INET and INET6 defined. # You don't have to enable V6, but SCTP is # dual stacked and so far we have not torn apart # the V6 and V4.. since an association can span # both a V6 and V4 address at the SAME time :-) # options SCTP # There are bunches of options: # this one turns on all sorts of # nastily printing that you can # do. It's all controlled by a # bit mask (settable by socket opt and # by sysctl). Including will not cause # logging until you set the bits.. but it # can be quite verbose.. so without this # option we don't do any of the tests for # bits and prints.. which makes the code run # faster.. if you are not debugging don't use. options SCTP_DEBUG # # This option turns off the CRC32c checksum. Basically, # you will not be able to talk to anyone else who # has not done this. Its more for experimentation to # see how much CPU the CRC32c really takes. Most new # cards for TCP support checksum offload.. so this # option gives you a "view" into what SCTP would be # like with such an offload (which only exists in # high in iSCSI boards so far). With the new # splitting 8's algorithm its not as bad as it used # to be.. but it does speed things up try only # for in a captured lab environment :-) options SCTP_WITH_NO_CSUM # # # All that options after that turn on specific types of # logging. You can monitor CWND growth, flight size # and all sorts of things. Go look at the code and # see. I have used this to produce interesting # charts and graphs as well :-> # # I have not yet committed the tools to get and print # the logs, I will do that eventually .. before then # if you want them send me an email rrs@freebsd.org # You basically must have ktr(4) enabled for these # and you then set the sysctl to turn on/off various # logging bits. Use ktrdump(8) to pull the log and run # it through a display program.. and graphs and other # things too. # options SCTP_LOCK_LOGGING options SCTP_MBUF_LOGGING options SCTP_MBCNT_LOGGING options SCTP_PACKET_LOGGING options SCTP_LTRACE_CHUNKS options SCTP_LTRACE_ERRORS # altq(9). Enable the base part of the hooks with the ALTQ option. # Individual disciplines must be built into the base system and can not be # loaded as modules at this point. ALTQ requires a stable TSC so if yours is # broken or changes with CPU throttling then you must also have the ALTQ_NOPCC # option. options ALTQ options ALTQ_CBQ # Class Based Queueing options ALTQ_RED # Random Early Detection options ALTQ_RIO # RED In/Out options ALTQ_CODEL # CoDel Active Queueing options ALTQ_HFSC # Hierarchical Packet Scheduler options ALTQ_FAIRQ # Fair Packet Scheduler options ALTQ_CDNR # Traffic conditioner options ALTQ_PRIQ # Priority Queueing options ALTQ_NOPCC # Required if the TSC is unusable options ALTQ_DEBUG # netgraph(4). Enable the base netgraph code with the NETGRAPH option. # Individual node types can be enabled with the corresponding option # listed below; however, this is not strictly necessary as netgraph # will automatically load the corresponding KLD module if the node type # is not already compiled into the kernel. Each type below has a # corresponding man page, e.g., ng_async(8). options NETGRAPH # netgraph(4) system options NETGRAPH_DEBUG # enable extra debugging, this # affects netgraph(4) and nodes # Node types options NETGRAPH_ASYNC options NETGRAPH_ATMLLC options NETGRAPH_ATM_ATMPIF options NETGRAPH_BLUETOOTH # ng_bluetooth(4) options NETGRAPH_BLUETOOTH_BT3C # ng_bt3c(4) options NETGRAPH_BLUETOOTH_HCI # ng_hci(4) options NETGRAPH_BLUETOOTH_L2CAP # ng_l2cap(4) options NETGRAPH_BLUETOOTH_SOCKET # ng_btsocket(4) options NETGRAPH_BLUETOOTH_UBT # ng_ubt(4) options NETGRAPH_BLUETOOTH_UBTBCMFW # ubtbcmfw(4) options NETGRAPH_BPF options NETGRAPH_BRIDGE options NETGRAPH_CAR options NETGRAPH_CISCO options NETGRAPH_DEFLATE options NETGRAPH_DEVICE options NETGRAPH_ECHO options NETGRAPH_EIFACE options NETGRAPH_ETHER options NETGRAPH_FRAME_RELAY options NETGRAPH_GIF options NETGRAPH_GIF_DEMUX options NETGRAPH_HOLE options NETGRAPH_IFACE options NETGRAPH_IP_INPUT options NETGRAPH_IPFW options NETGRAPH_KSOCKET options NETGRAPH_L2TP options NETGRAPH_LMI options NETGRAPH_MPPC_COMPRESSION options NETGRAPH_MPPC_ENCRYPTION options NETGRAPH_NETFLOW options NETGRAPH_NAT options NETGRAPH_ONE2MANY options NETGRAPH_PATCH options NETGRAPH_PIPE options NETGRAPH_PPP options NETGRAPH_PPPOE options NETGRAPH_PPTPGRE options NETGRAPH_PRED1 options NETGRAPH_RFC1490 options NETGRAPH_SOCKET options NETGRAPH_SPLIT options NETGRAPH_SPPP options NETGRAPH_TAG options NETGRAPH_TCPMSS options NETGRAPH_TEE options NETGRAPH_UI options NETGRAPH_VJC options NETGRAPH_VLAN # NgATM - Netgraph ATM options NGATM_ATM options NGATM_ATMBASE options NGATM_SSCOP options NGATM_SSCFU options NGATM_UNI options NGATM_CCATM device mn # Munich32x/Falc54 Nx64kbit/sec cards. # Network stack virtualization. #options VIMAGE #options VNET_DEBUG # debug for VIMAGE # # Network interfaces: # The `loop' device is MANDATORY when networking is enabled. device loop # The `ether' device provides generic code to handle # Ethernets; it is MANDATORY when an Ethernet device driver is # configured or token-ring is enabled. device ether # The `vlan' device implements the VLAN tagging of Ethernet frames # according to IEEE 802.1Q. device vlan # The `vxlan' device implements the VXLAN encapsulation of Ethernet # frames in UDP packets according to RFC7348. device vxlan # The `wlan' device provides generic code to support 802.11 # drivers, including host AP mode; it is MANDATORY for the wi, # and ath drivers and will eventually be required by all 802.11 drivers. device wlan options IEEE80211_DEBUG #enable debugging msgs options IEEE80211_AMPDU_AGE #age frames in AMPDU reorder q's options IEEE80211_SUPPORT_MESH #enable 802.11s D3.0 support options IEEE80211_SUPPORT_TDMA #enable TDMA support # The `wlan_wep', `wlan_tkip', and `wlan_ccmp' devices provide # support for WEP, TKIP, and AES-CCMP crypto protocols optionally # used with 802.11 devices that depend on the `wlan' module. device wlan_wep device wlan_ccmp device wlan_tkip # The `wlan_xauth' device provides support for external (i.e. user-mode) # authenticators for use with 802.11 drivers that use the `wlan' # module and support 802.1x and/or WPA security protocols. device wlan_xauth # The `wlan_acl' device provides a MAC-based access control mechanism # for use with 802.11 drivers operating in ap mode and using the # `wlan' module. # The 'wlan_amrr' device provides AMRR transmit rate control algorithm device wlan_acl device wlan_amrr # Generic TokenRing device token # The `fddi' device provides generic code to support FDDI. device fddi # The `arcnet' device provides generic code to support Arcnet. device arcnet # The `sppp' device serves a similar role for certain types # of synchronous PPP links (like `cx', `ar'). device sppp # The `bpf' device enables the Berkeley Packet Filter. Be # aware of the legal and administrative consequences of enabling this # option. DHCP requires bpf. device bpf # The `netmap' device implements memory-mapped access to network # devices from userspace, enabling wire-speed packet capture and # generation even at 10Gbit/s. Requires support in the device # driver. Supported drivers are ixgbe, e1000, re. device netmap # The `disc' device implements a minimal network interface, # which throws away all packets sent and never receives any. It is # included for testing and benchmarking purposes. device disc # The `epair' device implements a virtual back-to-back connected Ethernet # like interface pair. device epair # The `edsc' device implements a minimal Ethernet interface, # which discards all packets sent and receives none. device edsc # The `tap' device is a pty-like virtual Ethernet interface device tap # The `tun' device implements (user-)ppp and nos-tun(8) device tun # The `gif' device implements IPv6 over IP4 tunneling, # IPv4 over IPv6 tunneling, IPv4 over IPv4 tunneling and # IPv6 over IPv6 tunneling. # The `gre' device implements GRE (Generic Routing Encapsulation) tunneling, # as specified in the RFC 2784 and RFC 2890. # The `me' device implements Minimal Encapsulation within IPv4 as # specified in the RFC 2004. # The XBONEHACK option allows the same pair of addresses to be configured on # multiple gif interfaces. device gif device gre device me options XBONEHACK # The `stf' device implements 6to4 encapsulation. device stf # The pf packet filter consists of three devices: # The `pf' device provides /dev/pf and the firewall code itself. # The `pflog' device provides the pflog0 interface which logs packets. # The `pfsync' device provides the pfsync0 interface used for # synchronization of firewall state tables (over the net). device pf device pflog device pfsync # Bridge interface. device if_bridge # Common Address Redundancy Protocol. See carp(4) for more details. device carp # IPsec interface. device enc # Link aggregation interface. device lagg # # Internet family options: # # MROUTING enables the kernel multicast packet forwarder, which works # with mrouted and XORP. # # IPFIREWALL enables support for IP firewall construction, in # conjunction with the `ipfw' program. IPFIREWALL_VERBOSE sends # logged packets to the system logger. IPFIREWALL_VERBOSE_LIMIT # limits the number of times a matching entry can be logged. # # WARNING: IPFIREWALL defaults to a policy of "deny ip from any to any" # and if you do not add other rules during startup to allow access, # YOU WILL LOCK YOURSELF OUT. It is suggested that you set firewall_type=open # in /etc/rc.conf when first enabling this feature, then refining the # firewall rules in /etc/rc.firewall after you've tested that the new kernel # feature works properly. # # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to # allow everything. Use with care, if a cracker can crash your # firewall machine, they can get to your protected machines. However, # if you are using it as an as-needed filter for specific problems as # they arise, then this may be for you. Changing the default to 'allow' # means that you won't get stuck if the kernel and /sbin/ipfw binary get # out of sync. # # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''. It # depends on IPFIREWALL if compiled into the kernel. # # IPFIREWALL_NAT adds support for in kernel nat in ipfw, and it requires # LIBALIAS. # # IPFIREWALL_NAT64 adds support for in kernel NAT64 in ipfw. # # IPFIREWALL_NPTV6 adds support for in kernel NPTv6 in ipfw. # # IPSTEALTH enables code to support stealth forwarding (i.e., forwarding # packets without touching the TTL). This can be useful to hide firewalls # from traceroute and similar tools. # # PF_DEFAULT_TO_DROP causes the default pf(4) rule to deny everything. # # TCPDEBUG enables code which keeps traces of the TCP state machine # for sockets with the SO_DEBUG option set, which can then be examined # using the trpt(8) utility. # # TCPPCAP enables code which keeps the last n packets sent and received # on a TCP socket. # # TCP_HHOOK enables the hhook(9) framework hooks for the TCP stack. # # RADIX_MPATH provides support for equal-cost multi-path routing. # options MROUTING # Multicast routing options IPFIREWALL #firewall options IPFIREWALL_VERBOSE #enable logging to syslogd(8) options IPFIREWALL_VERBOSE_LIMIT=100 #limit verbosity options IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default options IPFIREWALL_NAT #ipfw kernel nat support options IPFIREWALL_NAT64 #ipfw kernel NAT64 support options IPFIREWALL_NPTV6 #ipfw kernel IPv6 NPT support options IPDIVERT #divert sockets options IPFILTER #ipfilter support options IPFILTER_LOG #ipfilter logging options IPFILTER_LOOKUP #ipfilter pools options IPFILTER_DEFAULT_BLOCK #block all packets by default options IPSTEALTH #support for stealth forwarding options PF_DEFAULT_TO_DROP #drop everything by default options TCPDEBUG options TCPPCAP options TCP_HHOOK options RADIX_MPATH # The MBUF_STRESS_TEST option enables options which create # various random failures / extreme cases related to mbuf # functions. See mbuf(9) for a list of available test cases. # MBUF_PROFILING enables code to profile the mbuf chains # exiting the system (via participating interfaces) and # return a logarithmic histogram of monitored parameters # (e.g. packet size, wasted space, number of mbufs in chain). options MBUF_STRESS_TEST options MBUF_PROFILING # Statically link in accept filters options ACCEPT_FILTER_DATA options ACCEPT_FILTER_DNS options ACCEPT_FILTER_HTTP # TCP_SIGNATURE adds support for RFC 2385 (TCP-MD5) digests. These are # carried in TCP option 19. This option is commonly used to protect # TCP sessions (e.g. BGP) where IPSEC is not available nor desirable. # This is enabled on a per-socket basis using the TCP_MD5SIG socket option. -# This requires the use of 'device crypto' and 'options IPSEC'. +# This requires the use of 'device crypto' and either 'options IPSEC' or +# 'options IPSEC_SUPPORT'. options TCP_SIGNATURE #include support for RFC 2385 # DUMMYNET enables the "dummynet" bandwidth limiter. You need IPFIREWALL # as well. See dummynet(4) and ipfw(8) for more info. When you run # DUMMYNET it is advisable to also have at least "options HZ=1000" to achieve # a smooth scheduling of the traffic. options DUMMYNET ##################################################################### # FILESYSTEM OPTIONS # # Only the root filesystem needs to be statically compiled or preloaded # as module; everything else will be automatically loaded at mount # time. Some people still prefer to statically compile other # filesystems as well. # # NB: The UNION filesystem was known to be buggy in the past. It is now # being actively maintained, although there are still some issues being # resolved. # # One of these is mandatory: options FFS #Fast filesystem options NFSCL #Network File System client # The rest are optional: options AUTOFS #Automounter filesystem options CD9660 #ISO 9660 filesystem options FDESCFS #File descriptor filesystem options FUSE #FUSE support module options MSDOSFS #MS DOS File System (FAT, FAT32) options NFSLOCKD #Network Lock Manager options NFSD #Network Filesystem Server options KGSSAPI #Kernel GSSAPI implementation options NULLFS #NULL filesystem options PROCFS #Process filesystem (requires PSEUDOFS) options PSEUDOFS #Pseudo-filesystem framework options PSEUDOFS_TRACE #Debugging support for PSEUDOFS options SMBFS #SMB/CIFS filesystem options TMPFS #Efficient memory filesystem options UDF #Universal Disk Format options UNIONFS #Union filesystem # The xFS_ROOT options REQUIRE the associated ``options xFS'' options NFS_ROOT #NFS usable as root device # Soft updates is a technique for improving filesystem speed and # making abrupt shutdown less risky. # options SOFTUPDATES # Extended attributes allow additional data to be associated with files, # and is used for ACLs, Capabilities, and MAC labels. # See src/sys/ufs/ufs/README.extattr for more information. options UFS_EXTATTR options UFS_EXTATTR_AUTOSTART # Access Control List support for UFS filesystems. The current ACL # implementation requires extended attribute support, UFS_EXTATTR, # for the underlying filesystem. # See src/sys/ufs/ufs/README.acls for more information. options UFS_ACL # Directory hashing improves the speed of operations on very large # directories at the expense of some memory. options UFS_DIRHASH # Gjournal-based UFS journaling support. options UFS_GJOURNAL # Make space in the kernel for a root filesystem on a md device. # Define to the number of kilobytes to reserve for the filesystem. # This is now optional. # If not defined, the root filesystem passed in as the MFS_IMAGE makeoption # will be automatically embedded in the kernel during linking. Its exact size # will be consumed within the kernel. # If defined, the old way of embedding the filesystem in the kernel will be # used. That is to say MD_ROOT_SIZE KB will be allocated in the kernel and # later, the filesystem image passed in as the MFS_IMAGE makeoption will be # dd'd into the reserved space if it fits. options MD_ROOT_SIZE=10 # Make the md device a potential root device, either with preloaded # images of type mfs_root or md_root. options MD_ROOT # Disk quotas are supported when this option is enabled. options QUOTA #enable disk quotas # If you are running a machine just as a fileserver for PC and MAC # users, using SAMBA, you may consider setting this option # and keeping all those users' directories on a filesystem that is # mounted with the suiddir option. This gives new files the same # ownership as the directory (similar to group). It's a security hole # if you let these users run programs, so confine it to file-servers # (but it'll save you lots of headaches in those cases). Root owned # directories are exempt and X bits are cleared. The suid bit must be # set on the directory as well; see chmod(1). PC owners can't see/set # ownerships so they keep getting their toes trodden on. This saves # you all the support calls as the filesystem it's used on will act as # they expect: "It's my dir so it must be my file". # options SUIDDIR # NFS options: options NFS_MINATTRTIMO=3 # VREG attrib cache timeout in sec options NFS_MAXATTRTIMO=60 options NFS_MINDIRATTRTIMO=30 # VDIR attrib cache timeout in sec options NFS_MAXDIRATTRTIMO=60 options NFS_DEBUG # Enable NFS Debugging # # Add support for the EXT2FS filesystem of Linux fame. Be a bit # careful with this - the ext2fs code has a tendency to lag behind # changes and not be exercised very much, so mounting read/write could # be dangerous (and even mounting read only could result in panics.) # options EXT2FS # Cryptographically secure random number generator; /dev/random device random # The system memory devices; /dev/mem, /dev/kmem device mem # The kernel symbol table device; /dev/ksyms device ksyms # Optional character code conversion support with LIBICONV. # Each option requires their base file system and LIBICONV. options CD9660_ICONV options MSDOSFS_ICONV options UDF_ICONV ##################################################################### # POSIX P1003.1B # Real time extensions added in the 1993 POSIX # _KPOSIX_PRIORITY_SCHEDULING: Build in _POSIX_PRIORITY_SCHEDULING options _KPOSIX_PRIORITY_SCHEDULING # p1003_1b_semaphores are very experimental, # user should be ready to assist in debugging if problems arise. options P1003_1B_SEMAPHORES # POSIX message queue options P1003_1B_MQUEUE ##################################################################### # SECURITY POLICY PARAMETERS # Support for BSM audit options AUDIT # Support for Mandatory Access Control (MAC): options MAC options MAC_BIBA options MAC_BSDEXTENDED options MAC_IFOFF options MAC_LOMAC options MAC_MLS options MAC_NONE options MAC_PARTITION options MAC_PORTACL options MAC_SEEOTHERUIDS options MAC_STUB options MAC_TEST # Support for Capsicum options CAPABILITIES # fine-grained rights on file descriptors options CAPABILITY_MODE # sandboxes with no global namespace access ##################################################################### # CLOCK OPTIONS # The granularity of operation is controlled by the kernel option HZ whose # default value (1000 on most architectures) means a granularity of 1ms # (1s/HZ). Historically, the default was 100, but finer granularity is # required for DUMMYNET and other systems on modern hardware. There are # reasonable arguments that HZ should, in fact, be 100 still; consider, # that reducing the granularity too much might cause excessive overhead in # clock interrupt processing, potentially causing ticks to be missed and thus # actually reducing the accuracy of operation. options HZ=100 # Enable support for the kernel PLL to use an external PPS signal, # under supervision of [x]ntpd(8) # More info in ntpd documentation: http://www.eecis.udel.edu/~ntp options PPS_SYNC # Enable support for generic feed-forward clocks in the kernel. # The feed-forward clock support is an alternative to the feedback oriented # ntpd/system clock approach, and is to be used with a feed-forward # synchronization algorithm such as the RADclock: # More info here: http://www.synclab.org/radclock options FFCLOCK ##################################################################### # SCSI DEVICES # SCSI DEVICE CONFIGURATION # The SCSI subsystem consists of the `base' SCSI code, a number of # high-level SCSI device `type' drivers, and the low-level host-adapter # device drivers. The host adapters are listed in the ISA and PCI # device configuration sections below. # # It is possible to wire down your SCSI devices so that a given bus, # target, and LUN always come on line as the same device unit. In # earlier versions the unit numbers were assigned in the order that # the devices were probed on the SCSI bus. This means that if you # removed a disk drive, you may have had to rewrite your /etc/fstab # file, and also that you had to be careful when adding a new disk # as it may have been probed earlier and moved your device configuration # around. (See also option GEOM_VOL for a different solution to this # problem.) # This old behavior is maintained as the default behavior. The unit # assignment begins with the first non-wired down unit for a device # type. For example, if you wire a disk as "da3" then the first # non-wired disk will be assigned da4. # The syntax for wiring down devices is: hint.scbus.0.at="ahc0" hint.scbus.1.at="ahc1" hint.scbus.1.bus="0" hint.scbus.3.at="ahc2" hint.scbus.3.bus="0" hint.scbus.2.at="ahc2" hint.scbus.2.bus="1" hint.da.0.at="scbus0" hint.da.0.target="0" hint.da.0.unit="0" hint.da.1.at="scbus3" hint.da.1.target="1" hint.da.2.at="scbus2" hint.da.2.target="3" hint.sa.1.at="scbus1" hint.sa.1.target="6" # "units" (SCSI logical unit number) that are not specified are # treated as if specified as LUN 0. # All SCSI devices allocate as many units as are required. # The ch driver drives SCSI Media Changer ("jukebox") devices. # # The da driver drives SCSI Direct Access ("disk") and Optical Media # ("WORM") devices. # # The sa driver drives SCSI Sequential Access ("tape") devices. # # The cd driver drives SCSI Read Only Direct Access ("cd") devices. # # The ses driver drives SCSI Environment Services ("ses") and # SAF-TE ("SCSI Accessible Fault-Tolerant Enclosure") devices. # # The pt driver drives SCSI Processor devices. # # The sg driver provides a passthrough API that is compatible with the # Linux SG driver. It will work in conjunction with the COMPAT_LINUX # option to run linux SG apps. It can also stand on its own and provide # source level API compatibility for porting apps to FreeBSD. # # Target Mode support is provided here but also requires that a SIM # (SCSI Host Adapter Driver) provide support as well. # # The targ driver provides target mode support as a Processor type device. # It exists to give the minimal context necessary to respond to Inquiry # commands. There is a sample user application that shows how the rest # of the command support might be done in /usr/share/examples/scsi_target. # # The targbh driver provides target mode support and exists to respond # to incoming commands that do not otherwise have a logical unit assigned # to them. # # The pass driver provides a passthrough API to access the CAM subsystem. device scbus #base SCSI code device ch #SCSI media changers device da #SCSI direct access devices (aka disks) device sa #SCSI tapes device cd #SCSI CD-ROMs device ses #Enclosure Services (SES and SAF-TE) device pt #SCSI processor device targ #SCSI Target Mode Code device targbh #SCSI Target Mode Blackhole Device device pass #CAM passthrough driver device sg #Linux SCSI passthrough device ctl #CAM Target Layer # CAM OPTIONS: # debugging options: # CAMDEBUG Compile in all possible debugging. # CAM_DEBUG_COMPILE Debug levels to compile in. # CAM_DEBUG_FLAGS Debug levels to enable on boot. # CAM_DEBUG_BUS Limit debugging to the given bus. # CAM_DEBUG_TARGET Limit debugging to the given target. # CAM_DEBUG_LUN Limit debugging to the given lun. # CAM_DEBUG_DELAY Delay in us after printing each debug line. # # CAM_MAX_HIGHPOWER: Maximum number of concurrent high power (start unit) cmds # SCSI_NO_SENSE_STRINGS: When defined disables sense descriptions # SCSI_NO_OP_STRINGS: When defined disables opcode descriptions # SCSI_DELAY: The number of MILLISECONDS to freeze the SIM (scsi adapter) # queue after a bus reset, and the number of milliseconds to # freeze the device queue after a bus device reset. This # can be changed at boot and runtime with the # kern.cam.scsi_delay tunable/sysctl. options CAMDEBUG options CAM_DEBUG_COMPILE=-1 options CAM_DEBUG_FLAGS=(CAM_DEBUG_INFO|CAM_DEBUG_PROBE|CAM_DEBUG_PERIPH) options CAM_DEBUG_BUS=-1 options CAM_DEBUG_TARGET=-1 options CAM_DEBUG_LUN=-1 options CAM_DEBUG_DELAY=1 options CAM_MAX_HIGHPOWER=4 options SCSI_NO_SENSE_STRINGS options SCSI_NO_OP_STRINGS options SCSI_DELAY=5000 # Be pessimistic about Joe SCSI device options CAM_IOSCHED_DYNAMIC # Options for the CAM CDROM driver: # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN # CHANGER_MAX_BUSY_SECONDS: Maximum time quantum per changer LUN, only # enforced if there is I/O waiting for another LUN # The compiled in defaults for these variables are 2 and 10 seconds, # respectively. # # These can also be changed on the fly with the following sysctl variables: # kern.cam.cd.changer.min_busy_seconds # kern.cam.cd.changer.max_busy_seconds # options CHANGER_MIN_BUSY_SECONDS=2 options CHANGER_MAX_BUSY_SECONDS=10 # Options for the CAM sequential access driver: # SA_IO_TIMEOUT: Timeout for read/write/wfm operations, in minutes # SA_SPACE_TIMEOUT: Timeout for space operations, in minutes # SA_REWIND_TIMEOUT: Timeout for rewind operations, in minutes # SA_ERASE_TIMEOUT: Timeout for erase operations, in minutes # SA_1FM_AT_EOD: Default to model which only has a default one filemark at EOT. options SA_IO_TIMEOUT=4 options SA_SPACE_TIMEOUT=60 options SA_REWIND_TIMEOUT=(2*60) options SA_ERASE_TIMEOUT=(4*60) options SA_1FM_AT_EOD # Optional timeout for the CAM processor target (pt) device # This is specified in seconds. The default is 60 seconds. options SCSI_PT_DEFAULT_TIMEOUT=60 # Optional enable of doing SES passthrough on other devices (e.g., disks) # # Normally disabled because a lot of newer SCSI disks report themselves # as having SES capabilities, but this can then clot up attempts to build # a topology with the SES device that's on the box these drives are in.... options SES_ENABLE_PASSTHROUGH ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS device pty #BSD-style compatibility pseudo ttys device nmdm #back-to-back tty devices device md #Memory/malloc disk device snp #Snoop device - to look at pty/vty/etc.. device ccd #Concatenated disk driver device firmware #firmware(9) support # Kernel side iconv library options LIBICONV # Size of the kernel message buffer. Should be N * pagesize. options MSGBUF_SIZE=40960 ##################################################################### # HARDWARE BUS CONFIGURATION # # PCI bus & PCI options: # device pci options PCI_HP # PCI-Express native HotPlug options PCI_IOV # PCI SR-IOV support ##################################################################### # HARDWARE DEVICE CONFIGURATION # For ISA the required hints are listed. # EISA, MCA, PCI, CardBus, SD/MMC and pccard are self identifying buses, so # no hints are needed. # # Mandatory devices: # # These options are valid for other keyboard drivers as well. options KBD_DISABLE_KEYMAP_LOAD # refuse to load a keymap options KBD_INSTALL_CDEV # install a CDEV entry in /dev device kbdmux # keyboard multiplexer options KBDMUX_DFLT_KEYMAP # specify the built-in keymap makeoptions KBDMUX_DFLT_KEYMAP=it.iso options FB_DEBUG # Frame buffer debugging device splash # Splash screen and screen saver support # Various screen savers. device blank_saver device daemon_saver device dragon_saver device fade_saver device fire_saver device green_saver device logo_saver device rain_saver device snake_saver device star_saver device warp_saver # The syscons console driver (SCO color console compatible). device sc hint.sc.0.at="isa" options MAXCONS=16 # number of virtual consoles options SC_ALT_MOUSE_IMAGE # simplified mouse cursor in text mode options SC_DFLT_FONT # compile font in makeoptions SC_DFLT_FONT=cp850 options SC_DISABLE_KDBKEY # disable `debug' key options SC_DISABLE_REBOOT # disable reboot key sequence options SC_HISTORY_SIZE=200 # number of history buffer lines options SC_MOUSE_CHAR=0x3 # char code for text mode mouse cursor options SC_PIXEL_MODE # add support for the raster text mode # The following options will let you change the default colors of syscons. options SC_NORM_ATTR=(FG_GREEN|BG_BLACK) options SC_NORM_REV_ATTR=(FG_YELLOW|BG_GREEN) options SC_KERNEL_CONS_ATTR=(FG_RED|BG_BLACK) options SC_KERNEL_CONS_REV_ATTR=(FG_BLACK|BG_RED) # The following options will let you change the default behavior of # cut-n-paste feature options SC_CUT_SPACES2TABS # convert leading spaces into tabs options SC_CUT_SEPCHARS=\"x09\" # set of characters that delimit words # (default is single space - \"x20\") # If you have a two button mouse, you may want to add the following option # to use the right button of the mouse to paste text. options SC_TWOBUTTON_MOUSE # You can selectively disable features in syscons. options SC_NO_CUTPASTE options SC_NO_FONT_LOADING options SC_NO_HISTORY options SC_NO_MODE_CHANGE options SC_NO_SYSMOUSE options SC_NO_SUSPEND_VTYSWITCH # `flags' for sc # 0x80 Put the video card in the VESA 800x600 dots, 16 color mode # 0x100 Probe for a keyboard device periodically if one is not present # Enable experimental features of the syscons terminal emulator (teken). options TEKEN_CONS25 # cons25-style terminal emulation options TEKEN_UTF8 # UTF-8 output handling # The vt video console driver. device vt options VT_ALT_TO_ESC_HACK=1 # Prepend ESC sequence to ALT keys options VT_MAXWINDOWS=16 # Number of virtual consoles options VT_TWOBUTTON_MOUSE # Use right mouse button to paste # The following options set the default framebuffer size. options VT_FB_DEFAULT_HEIGHT=480 options VT_FB_DEFAULT_WIDTH=640 # The following options will let you change the default vt terminal colors. options TERMINAL_NORM_ATTR=(FG_GREEN|BG_BLACK) options TERMINAL_KERN_ATTR=(FG_LIGHTRED|BG_BLACK) # # Optional devices: # # # SCSI host adapters: # # adv: All Narrow SCSI bus AdvanSys controllers. # adw: Second Generation AdvanSys controllers including the ADV940UW. # aha: Adaptec 154x/1535/1640 # ahb: Adaptec 174x EISA controllers # ahc: Adaptec 274x/284x/2910/293x/294x/394x/3950x/3960x/398X/4944/ # 19160x/29160x, aic7770/aic78xx # ahd: Adaptec 29320/39320 Controllers. # aic: Adaptec 6260/6360, APA-1460 (PC Card) # bt: Most Buslogic controllers: including BT-445, BT-54x, BT-64x, BT-74x, # BT-75x, BT-946, BT-948, BT-956, BT-958, SDC3211B, SDC3211F, SDC3222F # esp: Emulex ESP, NCR 53C9x and QLogic FAS families based controllers # including the AMD Am53C974 (found on devices such as the Tekram # DC-390(T)) and the Sun ESP and FAS families of controllers # isp: Qlogic ISP 1020, 1040 and 1040B PCI SCSI host adapters, # ISP 1240 Dual Ultra SCSI, ISP 1080 and 1280 (Dual) Ultra2, # ISP 12160 Ultra3 SCSI, # Qlogic ISP 2100 and ISP 2200 1Gb Fibre Channel host adapters. # Qlogic ISP 2300 and ISP 2312 2Gb Fibre Channel host adapters. # Qlogic ISP 2322 and ISP 6322 2Gb Fibre Channel host adapters. # ispfw: Firmware module for Qlogic host adapters # mpt: LSI-Logic MPT/Fusion 53c1020 or 53c1030 Ultra4 # or FC9x9 Fibre Channel host adapters. # ncr: NCR 53C810, 53C825 self-contained SCSI host adapters. # sym: Symbios/Logic 53C8XX family of PCI-SCSI I/O processors: # 53C810, 53C810A, 53C815, 53C825, 53C825A, 53C860, 53C875, # 53C876, 53C885, 53C895, 53C895A, 53C896, 53C897, 53C1510D, # 53C1010-33, 53C1010-66. # trm: Tekram DC395U/UW/F DC315U adapters. # # Note that the order is important in order for Buslogic ISA/EISA cards to be # probed correctly. # device bt hint.bt.0.at="isa" hint.bt.0.port="0x330" device adv hint.adv.0.at="isa" device adw device aha hint.aha.0.at="isa" device aic hint.aic.0.at="isa" device ahb device ahc device ahd device esp device iscsi_initiator device isp hint.isp.0.disable="1" hint.isp.0.role="3" hint.isp.0.prefer_iomap="1" hint.isp.0.prefer_memmap="1" hint.isp.0.fwload_disable="1" hint.isp.0.ignore_nvram="1" hint.isp.0.fullduplex="1" hint.isp.0.topology="lport" hint.isp.0.topology="nport" hint.isp.0.topology="lport-only" hint.isp.0.topology="nport-only" # we can't get u_int64_t types, nor can we get strings if it's got # a leading 0x, hence this silly dodge. hint.isp.0.portwnn="w50000000aaaa0000" hint.isp.0.nodewnn="w50000000aaaa0001" device ispfw device mpt device ncr device sym device trm # The aic7xxx driver will attempt to use memory mapped I/O for all PCI # controllers that have it configured only if this option is set. Unfortunately, # this doesn't work on some motherboards, which prevents it from being the # default. options AHC_ALLOW_MEMIO # Dump the contents of the ahc controller configuration PROM. options AHC_DUMP_EEPROM # Bitmap of units to enable targetmode operations. options AHC_TMODE_ENABLE # Compile in Aic7xxx Debugging code. options AHC_DEBUG # Aic7xxx driver debugging options. See sys/dev/aic7xxx/aic7xxx.h options AHC_DEBUG_OPTS # Print register bitfields in debug output. Adds ~128k to driver # See ahc(4). options AHC_REG_PRETTY_PRINT # Compile in aic79xx debugging code. options AHD_DEBUG # Aic79xx driver debugging options. Adds ~215k to driver. See ahd(4). options AHD_DEBUG_OPTS=0xFFFFFFFF # Print human-readable register definitions when debugging options AHD_REG_PRETTY_PRINT # Bitmap of units to enable targetmode operations. options AHD_TMODE_ENABLE # The adw driver will attempt to use memory mapped I/O for all PCI # controllers that have it configured only if this option is set. options ADW_ALLOW_MEMIO # Options used in dev/iscsi (Software iSCSI stack) # options ISCSI_INITIATOR_DEBUG=9 # Options used in dev/isp/ (Qlogic SCSI/FC driver). # # ISP_TARGET_MODE - enable target mode operation # options ISP_TARGET_MODE=1 # # ISP_DEFAULT_ROLES - default role # none=0 # target=1 # initiator=2 # both=3 (not supported currently) # # ISP_INTERNAL_TARGET (trivial internal disk target, for testing) # options ISP_DEFAULT_ROLES=0 # Options used in dev/sym/ (Symbios SCSI driver). #options SYM_SETUP_LP_PROBE_MAP #-Low Priority Probe Map (bits) # Allows the ncr to take precedence # 1 (1<<0) -> 810a, 860 # 2 (1<<1) -> 825a, 875, 885, 895 # 4 (1<<2) -> 895a, 896, 1510d #options SYM_SETUP_SCSI_DIFF #-HVD support for 825a, 875, 885 # disabled:0 (default), enabled:1 #options SYM_SETUP_PCI_PARITY #-PCI parity checking # disabled:0, enabled:1 (default) #options SYM_SETUP_MAX_LUN #-Number of LUNs supported # default:8, range:[1..64] # The 'dpt' driver provides support for old DPT controllers (http://www.dpt.com/). # These have hardware RAID-{0,1,5} support, and do multi-initiator I/O. # The DPT controllers are commonly re-licensed under other brand-names - # some controllers by Olivetti, Dec, HP, AT&T, SNI, AST, Alphatronic, NEC and # Compaq are actually DPT controllers. # # See src/sys/dev/dpt for debugging and other subtle options. # DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various # instruments are enabled. The tools in # /usr/sbin/dpt_* assume these to be enabled. # DPT_DEBUG_xxxx These are controllable from sys/dev/dpt/dpt.h # DPT_RESET_HBA Make "reset" actually reset the controller # instead of fudging it. Only enable this if you # are 100% certain you need it. device dpt # DPT options #!CAM# options DPT_MEASURE_PERFORMANCE options DPT_RESET_HBA # # Compaq "CISS" RAID controllers (SmartRAID 5* series) # These controllers have a SCSI-like interface, and require the # CAM infrastructure. # device ciss # # Intel Integrated RAID controllers. # This driver was developed and is maintained by Intel. Contacts # at Intel for this driver are # "Kannanthanam, Boji T" and # "Leubner, Achim" . # device iir # # Mylex AcceleRAID and eXtremeRAID controllers with v6 and later # firmware. These controllers have a SCSI-like interface, and require # the CAM infrastructure. # device mly # # Compaq Smart RAID, Mylex DAC960 and AMI MegaRAID controllers. Only # one entry is needed; the code will find and configure all supported # controllers. # device ida # Compaq Smart RAID device mlx # Mylex DAC960 device amr # AMI MegaRAID device amrp # SCSI Passthrough interface (optional, CAM req.) device mfi # LSI MegaRAID SAS device mfip # LSI MegaRAID SAS passthrough, requires CAM options MFI_DEBUG device mrsas # LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s # # 3ware ATA RAID # device twe # 3ware ATA RAID # # Serial ATA host controllers: # # ahci: Advanced Host Controller Interface (AHCI) compatible # mvs: Marvell 88SX50XX/88SX60XX/88SX70XX/SoC controllers # siis: SiliconImage SiI3124/SiI3132/SiI3531 controllers # # These drivers are part of cam(4) subsystem. They supersede less featured # ata(4) subsystem drivers, supporting same hardware. device ahci device mvs device siis # # The 'ATA' driver supports all legacy ATA/ATAPI controllers, including # PC Card devices. You only need one "device ata" for it to find all # PCI and PC Card ATA/ATAPI devices on modern machines. # Alternatively, individual bus and chipset drivers may be chosen by using # the 'atacore' driver then selecting the drivers on a per vendor basis. # For example to build a system which only supports a VIA chipset, # omit 'ata' and include the 'atacore', 'atapci' and 'atavia' drivers. device ata # Modular ATA #device atacore # Core ATA functionality #device atacard # CARDBUS support #device ataisa # ISA bus support #device atapci # PCI bus support; only generic chipset support # PCI ATA chipsets #device ataacard # ACARD #device ataacerlabs # Acer Labs Inc. (ALI) #device ataamd # American Micro Devices (AMD) #device ataati # ATI #device atacenatek # Cenatek #device atacypress # Cypress #device atacyrix # Cyrix #device atahighpoint # HighPoint #device ataintel # Intel #device ataite # Integrated Technology Inc. (ITE) #device atajmicron # JMicron #device atamarvell # Marvell #device atamicron # Micron #device atanational # National #device atanetcell # NetCell #device atanvidia # nVidia #device atapromise # Promise #device ataserverworks # ServerWorks #device atasiliconimage # Silicon Image Inc. (SiI) (formerly CMD) #device atasis # Silicon Integrated Systems Corp.(SiS) #device atavia # VIA Technologies Inc. # # For older non-PCI, non-PnPBIOS systems, these are the hints lines to add: hint.ata.0.at="isa" hint.ata.0.port="0x1f0" hint.ata.0.irq="14" hint.ata.1.at="isa" hint.ata.1.port="0x170" hint.ata.1.irq="15" # # The following options are valid on the ATA driver: # # ATA_REQUEST_TIMEOUT: the number of seconds to wait for an ATA request # before timing out. #options ATA_REQUEST_TIMEOUT=10 # # Standard floppy disk controllers and floppy tapes, supports # the Y-E DATA External FDD (PC Card) # device fdc hint.fdc.0.at="isa" hint.fdc.0.port="0x3F0" hint.fdc.0.irq="6" hint.fdc.0.drq="2" # # FDC_DEBUG enables floppy debugging. Since the debug output is huge, you # gotta turn it actually on by setting the variable fd_debug with DDB, # however. options FDC_DEBUG # # Activate this line if you happen to have an Insight floppy tape. # Probing them proved to be dangerous for people with floppy disks only, # so it's "hidden" behind a flag: #hint.fdc.0.flags="1" # Specify floppy devices hint.fd.0.at="fdc0" hint.fd.0.drive="0" hint.fd.1.at="fdc0" hint.fd.1.drive="1" # # uart: newbusified driver for serial interfaces. It consolidates the sio(4), # sab(4) and zs(4) drivers. # device uart # Options for uart(4) options UART_PPS_ON_CTS # Do time pulse capturing using CTS # instead of DCD. options UART_POLL_FREQ # Set polling rate, used when hw has # no interrupt support (50 Hz default). # The following hint should only be used for pure ISA devices. It is not # needed otherwise. Use of hints is strongly discouraged. hint.uart.0.at="isa" # The following 3 hints are used when the UART is a system device (i.e., a # console or debug port), but only on platforms that don't have any other # means to pass the information to the kernel. The unit number of the hint # is only used to bundle the hints together. There is no relation to the # unit number of the probed UART. hint.uart.0.port="0x3f8" hint.uart.0.flags="0x10" hint.uart.0.baud="115200" # `flags' for serial drivers that support consoles like sio(4) and uart(4): # 0x10 enable console support for this unit. Other console flags # (if applicable) are ignored unless this is set. Enabling # console support does not make the unit the preferred console. # Boot with -h or set boot_serial=YES in the loader. For sio(4) # specifically, the 0x20 flag can also be set (see above). # Currently, at most one unit can have console support; the # first one (in config file order) with this flag set is # preferred. Setting this flag for sio0 gives the old behavior. # 0x80 use this port for serial line gdb support in ddb. Also known # as debug port. # # Options for serial drivers that support consoles: options BREAK_TO_DEBUGGER # A BREAK/DBG on the console goes to # ddb, if available. # Solaris implements a new BREAK which is initiated by a character # sequence CR ~ ^b which is similar to a familiar pattern used on # Sun servers by the Remote Console. There are FreeBSD extensions: # CR ~ ^p requests force panic and CR ~ ^r requests a clean reboot. options ALT_BREAK_TO_DEBUGGER # Serial Communications Controller # Supports the Siemens SAB 82532 and Zilog Z8530 multi-channel # communications controllers. device scc # PCI Universal Communications driver # Supports various multi port PCI I/O cards. device puc # # Network interfaces: # # MII bus support is required for many PCI Ethernet NICs, # namely those which use MII-compliant transceivers or implement # transceiver control interfaces that operate like an MII. Adding # "device miibus" to the kernel config pulls in support for the generic # miibus API, the common support for for bit-bang'ing the MII and all # of the PHY drivers, including a generic one for PHYs that aren't # specifically handled by an individual driver. Support for specific # PHYs may be built by adding "device mii", "device mii_bitbang" if # needed by the NIC driver and then adding the appropriate PHY driver. device mii # Minimal MII support device mii_bitbang # Common module for bit-bang'ing the MII device miibus # MII support w/ bit-bang'ing and all PHYs device acphy # Altima Communications AC101 device amphy # AMD AM79c873 / Davicom DM910{1,2} device atphy # Attansic/Atheros F1 device axphy # Asix Semiconductor AX88x9x device bmtphy # Broadcom BCM5201/BCM5202 and 3Com 3c905C device bnxt # Broadcom NetXtreme-C/NetXtreme-E device brgphy # Broadcom BCM54xx/57xx 1000baseTX device ciphy # Cicada/Vitesse CS/VSC8xxx device e1000phy # Marvell 88E1000 1000/100/10-BT device gentbi # Generic 10-bit 1000BASE-{LX,SX} fiber ifaces device icsphy # ICS ICS1889-1893 device ip1000phy # IC Plus IP1000A/IP1001 device jmphy # JMicron JMP211/JMP202 device lxtphy # Level One LXT-970 device mlphy # Micro Linear 6692 device nsgphy # NatSemi DP8361/DP83865/DP83891 device nsphy # NatSemi DP83840A device nsphyter # NatSemi DP83843/DP83815 device pnaphy # HomePNA device qsphy # Quality Semiconductor QS6612 device rdcphy # RDC Semiconductor R6040 device rgephy # RealTek 8169S/8110S/8211B/8211C device rlphy # RealTek 8139 device rlswitch # RealTek 8305 device smcphy # SMSC LAN91C111 device tdkphy # TDK 89Q2120 device tlphy # Texas Instruments ThunderLAN device truephy # LSI TruePHY device xmphy # XaQti XMAC II # an: Aironet 4500/4800 802.11 wireless adapters. Supports the PCMCIA, # PCI and ISA varieties. # ae: Support for gigabit ethernet adapters based on the Attansic/Atheros # L2 PCI-Express FastEthernet controllers. # age: Support for gigabit ethernet adapters based on the Attansic/Atheros # L1 PCI express gigabit ethernet controllers. # alc: Support for Atheros AR8131/AR8132 PCIe ethernet controllers. # ale: Support for Atheros AR8121/AR8113/AR8114 PCIe ethernet controllers. # ath: Atheros a/b/g WiFi adapters (requires ath_hal and wlan) # bce: Broadcom NetXtreme II (BCM5706/BCM5708) PCI/PCIe Gigabit Ethernet # adapters. # bfe: Broadcom BCM4401 Ethernet adapter. # bge: Support for gigabit ethernet adapters based on the Broadcom # BCM570x family of controllers, including the 3Com 3c996-T, # the Netgear GA302T, the SysKonnect SK-9D21 and SK-9D41, and # the embedded gigE NICs on Dell PowerEdge 2550 servers. # bnxt: Broadcom NetXtreme-C and NetXtreme-E PCIe 10/25/50G Ethernet adapters. # bxe: Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet # adapters. # bwi: Broadcom BCM430* and BCM431* family of wireless adapters. # bwn: Broadcom BCM43xx family of wireless adapters. # cas: Sun Cassini/Cassini+ and National Semiconductor DP83065 Saturn # cm: Arcnet SMC COM90c26 / SMC COM90c56 # (and SMC COM90c66 in '56 compatibility mode) adapters. # cxgb: Chelsio T3 based 1GbE/10GbE PCIe Ethernet adapters. # cxgbe:Chelsio T4, T5, and T6-based 1/10/25/40/100GbE PCIe Ethernet # adapters. # cxgbev: Chelsio T4, T5, and T6-based PCIe Virtual Functions. # dc: Support for PCI fast ethernet adapters based on the DEC/Intel 21143 # and various workalikes including: # the ADMtek AL981 Comet and AN985 Centaur, the ASIX Electronics # AX88140A and AX88141, the Davicom DM9100 and DM9102, the Lite-On # 82c168 and 82c169 PNIC, the Lite-On/Macronix LC82C115 PNIC II # and the Macronix 98713/98713A/98715/98715A/98725 PMAC. This driver # replaces the old al, ax, dm, pn and mx drivers. List of brands: # Digital DE500-BA, Kingston KNE100TX, D-Link DFE-570TX, SOHOware SFA110, # SVEC PN102-TX, CNet Pro110B, 120A, and 120B, Compex RL100-TX, # LinkSys LNE100TX, LNE100TX V2.0, Jaton XpressNet, Alfa Inc GFC2204, # KNE110TX. # de: Digital Equipment DC21040 # em: Intel Pro/1000 Gigabit Ethernet 82542, 82543, 82544 based adapters. # ep: 3Com 3C509, 3C529, 3C556, 3C562D, 3C563D, 3C572, 3C574X, 3C579, 3C589 # and PC Card devices using these chipsets. # ex: Intel EtherExpress Pro/10 and other i82595-based adapters, # Olicom Ethernet PC Card devices. # fe: Fujitsu MB86960A/MB86965A Ethernet # fea: DEC DEFEA EISA FDDI adapter # fpa: Support for the Digital DEFPA PCI FDDI. `device fddi' is also needed. # fxp: Intel EtherExpress Pro/100B # (hint of prefer_iomap can be done to prefer I/O instead of Mem mapping) # gem: Apple GMAC/Sun ERI/Sun GEM # hme: Sun HME (Happy Meal Ethernet) # jme: JMicron JMC260 Fast Ethernet/JMC250 Gigabit Ethernet based adapters. # le: AMD Am7900 LANCE and Am79C9xx PCnet # lge: Support for PCI gigabit ethernet adapters based on the Level 1 # LXT1001 NetCellerator chipset. This includes the D-Link DGE-500SX, # SMC TigerCard 1000 (SMC9462SX), and some Addtron cards. # malo: Marvell Libertas wireless NICs. # mwl: Marvell 88W8363 802.11n wireless NICs. # Requires the mwl firmware module # mwlfw: Marvell 88W8363 firmware # msk: Support for gigabit ethernet adapters based on the Marvell/SysKonnect # Yukon II Gigabit controllers, including 88E8021, 88E8022, 88E8061, # 88E8062, 88E8035, 88E8036, 88E8038, 88E8050, 88E8052, 88E8053, # 88E8055, 88E8056 and D-Link 560T/550SX. # lmc: Support for the LMC/SBE wide-area network interface cards. # mlx5: Mellanox ConnectX-4 and ConnectX-4 LX IB and Eth shared code module. # mlx5en:Mellanox ConnectX-4 and ConnectX-4 LX PCIe Ethernet adapters. # my: Myson Fast Ethernet (MTD80X, MTD89X) # nge: Support for PCI gigabit ethernet adapters based on the National # Semiconductor DP83820 and DP83821 chipset. This includes the # SMC EZ Card 1000 (SMC9462TX), D-Link DGE-500T, Asante FriendlyNet # GigaNIX 1000TA and 1000TPC, the Addtron AEG320T, the Surecom # EP-320G-TX and the Netgear GA622T. # oce: Emulex 10 Gbit adapters (OneConnect Ethernet) # pcn: Support for PCI fast ethernet adapters based on the AMD Am79c97x # PCnet-FAST, PCnet-FAST+, PCnet-FAST III, PCnet-PRO and PCnet-Home # chipsets. These can also be handled by the le(4) driver if the # pcn(4) driver is left out of the kernel. The le(4) driver does not # support the additional features like the MII bus and burst mode of # the PCnet-FAST and greater chipsets though. # ral: Ralink Technology IEEE 802.11 wireless adapter # re: RealTek 8139C+/8169/816xS/811xS/8101E PCI/PCIe Ethernet adapter # rl: Support for PCI fast ethernet adapters based on the RealTek 8129/8139 # chipset. Note that the RealTek driver defaults to using programmed # I/O to do register accesses because memory mapped mode seems to cause # severe lockups on SMP hardware. This driver also supports the # Accton EN1207D `Cheetah' adapter, which uses a chip called # the MPX 5030/5038, which is either a RealTek in disguise or a # RealTek workalike. Note that the D-Link DFE-530TX+ uses the RealTek # chipset and is supported by this driver, not the 'vr' driver. # rtwn: RealTek wireless adapters. # rtwnfw: RealTek wireless firmware. # sf: Support for Adaptec Duralink PCI fast ethernet adapters based on the # Adaptec AIC-6915 "starfire" controller. # This includes dual and quad port cards, as well as one 100baseFX card. # Most of these are 64-bit PCI devices, except for one single port # card which is 32-bit. # sge: Silicon Integrated Systems SiS190/191 Fast/Gigabit Ethernet adapter # sis: Support for NICs based on the Silicon Integrated Systems SiS 900, # SiS 7016 and NS DP83815 PCI fast ethernet controller chips. # sk: Support for the SysKonnect SK-984x series PCI gigabit ethernet NICs. # This includes the SK-9841 and SK-9842 single port cards (single mode # and multimode fiber) and the SK-9843 and SK-9844 dual port cards # (also single mode and multimode). # The driver will autodetect the number of ports on the card and # attach each one as a separate network interface. # sn: Support for ISA and PC Card Ethernet devices using the # SMC91C90/92/94/95 chips. # ste: Sundance Technologies ST201 PCI fast ethernet controller, includes # the D-Link DFE-550TX. # stge: Support for gigabit ethernet adapters based on the Sundance/Tamarack # TC9021 family of controllers, including the Sundance ST2021/ST2023, # the Sundance/Tamarack TC9021, the D-Link DL-4000 and ASUS NX1101. # ti: Support for PCI gigabit ethernet NICs based on the Alteon Networks # Tigon 1 and Tigon 2 chipsets. This includes the Alteon AceNIC, the # 3Com 3c985, the Netgear GA620 and various others. Note that you will # probably want to bump up kern.ipc.nmbclusters a lot to use this driver. # tl: Support for the Texas Instruments TNETE100 series 'ThunderLAN' # cards and integrated ethernet controllers. This includes several # Compaq Netelligent 10/100 cards and the built-in ethernet controllers # in several Compaq Prosignia, Proliant and Deskpro systems. It also # supports several Olicom 10Mbps and 10/100 boards. # tx: SMC 9432 TX, BTX and FTX cards. (SMC EtherPower II series) # txp: Support for 3Com 3cR990 cards with the "Typhoon" chipset # vr: Support for various fast ethernet adapters based on the VIA # Technologies VT3043 `Rhine I' and VT86C100A `Rhine II' chips, # including the D-Link DFE520TX and D-Link DFE530TX (see 'rl' for # DFE530TX+), the Hawking Technologies PN102TX, and the AOpen/Acer ALN-320. # vte: DM&P Vortex86 RDC R6040 Fast Ethernet # vx: 3Com 3C590 and 3C595 # wb: Support for fast ethernet adapters based on the Winbond W89C840F chip. # Note: this is not the same as the Winbond W89C940F, which is a # NE2000 clone. # wi: Lucent WaveLAN/IEEE 802.11 PCMCIA adapters. Note: this supports both # the PCMCIA and ISA cards: the ISA card is really a PCMCIA to ISA # bridge with a PCMCIA adapter plugged into it. # xe: Xircom/Intel EtherExpress Pro100/16 PC Card ethernet controller, # Accton Fast EtherCard-16, Compaq Netelligent 10/100 PC Card, # Toshiba 10/100 Ethernet PC Card, Xircom 16-bit Ethernet + Modem 56 # xl: Support for the 3Com 3c900, 3c905, 3c905B and 3c905C (Fast) # Etherlink XL cards and integrated controllers. This includes the # integrated 3c905B-TX chips in certain Dell Optiplex and Dell # Precision desktop machines and the integrated 3c905-TX chips # in Dell Latitude laptop docking stations. # Also supported: 3Com 3c980(C)-TX, 3Com 3cSOHO100-TX, 3Com 3c450-TX # Order for ISA/EISA devices is important here device cm hint.cm.0.at="isa" hint.cm.0.port="0x2e0" hint.cm.0.irq="9" hint.cm.0.maddr="0xdc000" device ep device ex device fe hint.fe.0.at="isa" hint.fe.0.port="0x300" device fea device sn hint.sn.0.at="isa" hint.sn.0.port="0x300" hint.sn.0.irq="10" device an device wi device xe # PCI Ethernet NICs that use the common MII bus controller code. device ae # Attansic/Atheros L2 FastEthernet device age # Attansic/Atheros L1 Gigabit Ethernet device alc # Atheros AR8131/AR8132 Ethernet device ale # Atheros AR8121/AR8113/AR8114 Ethernet device bce # Broadcom BCM5706/BCM5708 Gigabit Ethernet device bfe # Broadcom BCM440x 10/100 Ethernet device bge # Broadcom BCM570xx Gigabit Ethernet device cas # Sun Cassini/Cassini+ and NS DP83065 Saturn device dc # DEC/Intel 21143 and various workalikes device et # Agere ET1310 10/100/Gigabit Ethernet device fxp # Intel EtherExpress PRO/100B (82557, 82558) hint.fxp.0.prefer_iomap="0" device gem # Apple GMAC/Sun ERI/Sun GEM device hme # Sun HME (Happy Meal Ethernet) device jme # JMicron JMC250 Gigabit/JMC260 Fast Ethernet device lge # Level 1 LXT1001 gigabit Ethernet device mlx5 # Shared code module between IB and Ethernet device mlx5en # Mellanox ConnectX-4 and ConnectX-4 LX device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device my # Myson Fast Ethernet (MTD80X, MTD89X) device nge # NatSemi DP83820 gigabit Ethernet device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device pcn # AMD Am79C97x PCI 10/100 NICs device sf # Adaptec AIC-6915 (``Starfire'') device sge # Silicon Integrated Systems SiS190/191 device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) device stge # Sundance/Tamarack TC9021 gigabit Ethernet device tl # Texas Instruments ThunderLAN device tx # SMC EtherPower II (83c170 ``EPIC'') device vr # VIA Rhine, Rhine II device vte # DM&P Vortex86 RDC R6040 Fast Ethernet device wb # Winbond W89C840F device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') # PCI Ethernet NICs. device cxgb # Chelsio T3 10 Gigabit Ethernet device cxgb_t3fw # Chelsio T3 10 Gigabit Ethernet firmware device cxgbe # Chelsio T4-T6 1/10/25/40/100 Gigabit Ethernet device cxgbev # Chelsio T4-T6 Virtual Functions device de # DEC/Intel DC21x4x (``Tulip'') device em # Intel Pro/1000 Gigabit Ethernet device ixgb # Intel Pro/10Gbe PCI-X Ethernet device ix # Intel Pro/10Gbe PCIE Ethernet device ixv # Intel Pro/10Gbe PCIE Ethernet VF device le # AMD Am7900 LANCE and Am79C9xx PCnet device mxge # Myricom Myri-10G 10GbE NIC device nxge # Neterion Xframe 10GbE Server/Storage Adapter device oce # Emulex 10 GbE (OneConnect Ethernet) device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') device vx # 3Com 3c590, 3c595 (``Vortex'') device vxge # Exar/Neterion XFrame 3100 10GbE # PCI FDDI NICs. device fpa # PCI WAN adapters. device lmc # PCI IEEE 802.11 Wireless NICs device ath # Atheros pci/cardbus NIC's device ath_hal # pci/cardbus chip support #device ath_ar5210 # AR5210 chips #device ath_ar5211 # AR5211 chips #device ath_ar5212 # AR5212 chips #device ath_rf2413 #device ath_rf2417 #device ath_rf2425 #device ath_rf5111 #device ath_rf5112 #device ath_rf5413 #device ath_ar5416 # AR5416 chips options AH_SUPPORT_AR5416 # enable AR5416 tx/rx descriptors # All of the AR5212 parts have a problem when paired with the AR71xx # CPUS. These parts have a bug that triggers a fatal bus error on the AR71xx # only. Details of the exact nature of the bug are sketchy, but some can be # found at https://forum.openwrt.org/viewtopic.php?pid=70060 on pages 4, 5 and # 6. This option enables this workaround. There is a performance penalty # for this work around, but without it things don't work at all. The DMA # from the card usually bursts 128 bytes, but on the affected CPUs, only # 4 are safe. options AH_RXCFG_SDMAMW_4BYTES #device ath_ar9160 # AR9160 chips #device ath_ar9280 # AR9280 chips #device ath_ar9285 # AR9285 chips device ath_rate_sample # SampleRate tx rate control for ath device bwi # Broadcom BCM430* BCM431* device bwn # Broadcom BCM43xx device malo # Marvell Libertas wireless NICs. device mwl # Marvell 88W8363 802.11n wireless NICs. device mwlfw device ral # Ralink Technology RT2500 wireless NICs. device rtwn # Realtek wireless NICs device rtwnfw # Use sf_buf(9) interface for jumbo buffers on ti(4) controllers. #options TI_SF_BUF_JUMBO # Turn on the header splitting option for the ti(4) driver firmware. This # only works for Tigon II chips, and has no effect for Tigon I chips. # This option requires the TI_SF_BUF_JUMBO option above. #options TI_JUMBO_HDRSPLIT # These two options allow manipulating the mbuf cluster size and mbuf size, # respectively. Be very careful with NIC driver modules when changing # these from their default values, because that can potentially cause a # mismatch between the mbuf size assumed by the kernel and the mbuf size # assumed by a module. The only driver that currently has the ability to # detect a mismatch is ti(4). options MCLSHIFT=12 # mbuf cluster shift in bits, 12 == 4KB options MSIZE=512 # mbuf size in bytes # # ATM related options (Cranor version) # (note: this driver cannot be used with the HARP ATM stack) # # The `en' device provides support for Efficient Networks (ENI) # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0). # # The `hatm' device provides support for Fore/Marconi HE155 and HE622 # ATM PCI cards. # # The `fatm' device provides support for Fore PCA200E ATM PCI cards. # # The `patm' device provides support for IDT77252 based cards like # ProSum's ProATM-155 and ProATM-25 and IDT's evaluation boards. # # atm device provides generic atm functions and is required for # atm devices. # NATM enables the netnatm protocol family that can be used to # bypass TCP/IP. # # utopia provides the access to the ATM PHY chips and is required for en, # hatm and fatm. # # the current driver supports only PVC operations (no atm-arp, no multicast). # for more details, please read the original documents at # http://www.ccrc.wustl.edu/pub/chuck/tech/bsdatm/bsdatm.html # device atm device en device fatm #Fore PCA200E device hatm #Fore/Marconi HE155/622 device patm #IDT77252 cards (ProATM and IDT) device utopia #ATM PHY driver options NATM #native ATM options LIBMBPOOL #needed by patm, iatm # # Sound drivers # # sound: The generic sound driver. # device sound # # snd_*: Device-specific drivers. # # The flags of the device tell the device a bit more info about the # device that normally is obtained through the PnP interface. # bit 2..0 secondary DMA channel; # bit 4 set if the board uses two dma channels; # bit 15..8 board type, overrides autodetection; leave it # zero if don't know what to put in (and you don't, # since this is unsupported at the moment...). # # snd_ad1816: Analog Devices AD1816 ISA PnP/non-PnP. # snd_als4000: Avance Logic ALS4000 PCI. # snd_atiixp: ATI IXP 200/300/400 PCI. # snd_audiocs: Crystal Semiconductor CS4231 SBus/EBus. Only # for sparc64. # snd_cmi: CMedia CMI8338/CMI8738 PCI. # snd_cs4281: Crystal Semiconductor CS4281 PCI. # snd_csa: Crystal Semiconductor CS461x/428x PCI. (except # 4281) # snd_ds1: Yamaha DS-1 PCI. # snd_emu10k1: Creative EMU10K1 PCI and EMU10K2 (Audigy) PCI. # snd_emu10kx: Creative SoundBlaster Live! and Audigy # snd_envy24: VIA Envy24 and compatible, needs snd_spicds. # snd_envy24ht: VIA Envy24HT and compatible, needs snd_spicds. # snd_es137x: Ensoniq AudioPCI ES137x PCI. # snd_ess: Ensoniq ESS ISA PnP/non-PnP, to be used in # conjunction with snd_sbc. # snd_fm801: Forte Media FM801 PCI. # snd_gusc: Gravis UltraSound ISA PnP/non-PnP. # snd_hda: Intel High Definition Audio (Controller) and # compatible. # snd_hdspe: RME HDSPe AIO and RayDAT. # snd_ich: Intel ICH AC'97 and some more audio controllers # embedded in a chipset, for example nVidia # nForce controllers. # snd_maestro: ESS Technology Maestro-1/2x PCI. # snd_maestro3: ESS Technology Maestro-3/Allegro PCI. # snd_mss: Microsoft Sound System ISA PnP/non-PnP. # snd_neomagic: Neomagic 256 AV/ZX PCI. # snd_sb16: Creative SoundBlaster16, to be used in # conjunction with snd_sbc. # snd_sb8: Creative SoundBlaster (pre-16), to be used in # conjunction with snd_sbc. # snd_sbc: Creative SoundBlaster ISA PnP/non-PnP. # Supports ESS and Avance ISA chips as well. # snd_solo: ESS Solo-1x PCI. # snd_spicds: SPI codec driver, needed by Envy24/Envy24HT drivers. # snd_t4dwave: Trident 4DWave DX/NX PCI, Sis 7018 PCI and Acer Labs # M5451 PCI. # snd_uaudio: USB audio. # snd_via8233: VIA VT8233x PCI. # snd_via82c686: VIA VT82C686A PCI. # snd_vibes: S3 Sonicvibes PCI. device snd_ad1816 device snd_als4000 device snd_atiixp #device snd_audiocs device snd_cmi device snd_cs4281 device snd_csa device snd_ds1 device snd_emu10k1 device snd_emu10kx device snd_envy24 device snd_envy24ht device snd_es137x device snd_ess device snd_fm801 device snd_gusc device snd_hda device snd_hdspe device snd_ich device snd_maestro device snd_maestro3 device snd_mss device snd_neomagic device snd_sb16 device snd_sb8 device snd_sbc device snd_solo device snd_spicds device snd_t4dwave device snd_uaudio device snd_via8233 device snd_via82c686 device snd_vibes # For non-PnP sound cards: hint.pcm.0.at="isa" hint.pcm.0.irq="10" hint.pcm.0.drq="1" hint.pcm.0.flags="0x0" hint.sbc.0.at="isa" hint.sbc.0.port="0x220" hint.sbc.0.irq="5" hint.sbc.0.drq="1" hint.sbc.0.flags="0x15" hint.gusc.0.at="isa" hint.gusc.0.port="0x220" hint.gusc.0.irq="5" hint.gusc.0.drq="1" hint.gusc.0.flags="0x13" # # Following options are intended for debugging/testing purposes: # # SND_DEBUG Enable extra debugging code that includes # sanity checking and possible increase of # verbosity. # # SND_DIAGNOSTIC Similar in a spirit of INVARIANTS/DIAGNOSTIC, # zero tolerance against inconsistencies. # # SND_FEEDER_MULTIFORMAT By default, only 16/32 bit feeders are compiled # in. This options enable most feeder converters # except for 8bit. WARNING: May bloat the kernel. # # SND_FEEDER_FULL_MULTIFORMAT Ditto, but includes 8bit feeders as well. # # SND_FEEDER_RATE_HP (feeder_rate) High precision 64bit arithmetic # as much as possible (the default trying to # avoid it). Possible slowdown. # # SND_PCM_64 (Only applicable for i386/32bit arch) # Process 32bit samples through 64bit # integer/arithmetic. Slight increase of dynamic # range at a cost of possible slowdown. # # SND_OLDSTEREO Only 2 channels are allowed, effectively # disabling multichannel processing. # options SND_DEBUG options SND_DIAGNOSTIC options SND_FEEDER_MULTIFORMAT options SND_FEEDER_FULL_MULTIFORMAT options SND_FEEDER_RATE_HP options SND_PCM_64 options SND_OLDSTEREO # # Miscellaneous hardware: # # bktr: Brooktree bt848/848a/849a/878/879 video capture and TV Tuner board # joy: joystick (including IO DATA PCJOY PC Card joystick) # cmx: OmniKey CardMan 4040 pccard smartcard reader device joy # PnP aware, hints for non-PnP only hint.joy.0.at="isa" hint.joy.0.port="0x201" device cmx # # The 'bktr' device is a PCI video capture device using the Brooktree # bt848/bt848a/bt849a/bt878/bt879 chipset. When used with a TV Tuner it forms a # TV card, e.g. Miro PC/TV, Hauppauge WinCast/TV WinTV, VideoLogic Captivator, # Intel Smart Video III, AverMedia, IMS Turbo, FlyVideo. # # options OVERRIDE_CARD=xxx # options OVERRIDE_TUNER=xxx # options OVERRIDE_MSP=1 # options OVERRIDE_DBX=1 # These options can be used to override the auto detection # The current values for xxx are found in src/sys/dev/bktr/bktr_card.h # Using sysctl(8) run-time overrides on a per-card basis can be made # # options BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_PAL # or # options BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_NTSC # Specifies the default video capture mode. # This is required for Dual Crystal (28&35MHz) boards where PAL is used # to prevent hangs during initialization, e.g. VideoLogic Captivator PCI. # # options BKTR_USE_PLL # This is required for PAL or SECAM boards with a 28MHz crystal and no 35MHz # crystal, e.g. some new Bt878 cards. # # options BKTR_GPIO_ACCESS # This enables IOCTLs which give user level access to the GPIO port. # # options BKTR_NO_MSP_RESET # Prevents the MSP34xx reset. Good if you initialize the MSP in another OS first # # options BKTR_430_FX_MODE # Switch Bt878/879 cards into Intel 430FX chipset compatibility mode. # # options BKTR_SIS_VIA_MODE # Switch Bt878/879 cards into SIS/VIA chipset compatibility mode which is # needed for some old SiS and VIA chipset motherboards. # This also allows Bt878/879 chips to work on old OPTi (<1997) chipset # motherboards and motherboards with bad or incomplete PCI 2.1 support. # As a rough guess, old = before 1998 # # options BKTR_NEW_MSP34XX_DRIVER # Use new, more complete initialization scheme for the msp34* soundchip. # Should fix stereo autodetection if the old driver does only output # mono sound. # # options BKTR_USE_FREEBSD_SMBUS # Compile with FreeBSD SMBus implementation # # Brooktree driver has been ported to the new I2C framework. Thus, # you'll need to have the following 3 lines in the kernel config. # device smbus # device iicbus # device iicbb # device iicsmb # The iic and smb devices are only needed if you want to control other # I2C slaves connected to the external connector of some cards. # device bktr # # PC Card/PCMCIA and Cardbus # # cbb: pci/cardbus bridge implementing YENTA interface # pccard: pccard slots # cardbus: cardbus slots device cbb device pccard device cardbus # # MMC/SD # # mmc MMC/SD bus # mmcsd MMC/SD memory card # sdhci Generic PCI SD Host Controller # device mmc device mmcsd device sdhci # # SMB bus # # System Management Bus support is provided by the 'smbus' device. # Access to the SMBus device is via the 'smb' device (/dev/smb*), # which is a child of the 'smbus' device. # # Supported devices: # smb standard I/O through /dev/smb* # # Supported SMB interfaces: # iicsmb I2C to SMB bridge with any iicbus interface # bktr brooktree848 I2C hardware interface # intpm Intel PIIX4 (82371AB, 82443MX) Power Management Unit # alpm Acer Aladdin-IV/V/Pro2 Power Management Unit # ichsmb Intel ICH SMBus controller chips (82801AA, 82801AB, 82801BA) # viapm VIA VT82C586B/596B/686A and VT8233 Power Management Unit # amdpm AMD 756 Power Management Unit # amdsmb AMD 8111 SMBus 2.0 Controller # nfpm NVIDIA nForce Power Management Unit # nfsmb NVIDIA nForce2/3/4 MCP SMBus 2.0 Controller # ismt Intel SMBus 2.0 controller chips (on Atom S1200, C2000) # device smbus # Bus support, required for smb below. device intpm device alpm device ichsmb device viapm device amdpm device amdsmb device nfpm device nfsmb device ismt device smb # SMBus peripheral devices # # jedec_ts Temperature Sensor compliant with JEDEC Standard 21-C # device jedec_ts # I2C Bus # # Philips i2c bus support is provided by the `iicbus' device. # # Supported devices: # ic i2c network interface # iic i2c standard io # iicsmb i2c to smb bridge. Allow i2c i/o with smb commands. # iicoc simple polling driver for OpenCores I2C controller # # Supported interfaces: # bktr brooktree848 I2C software interface # # Other: # iicbb generic I2C bit-banging code (needed by lpbb, bktr) # device iicbus # Bus support, required for ic/iic/iicsmb below. device iicbb device ic device iic device iicsmb # smb over i2c bridge device iicoc # OpenCores I2C controller support # I2C peripheral devices # # ds133x Dallas Semiconductor DS1337, DS1338 and DS1339 RTC # ds1374 Dallas Semiconductor DS1374 RTC # ds1672 Dallas Semiconductor DS1672 RTC # s35390a Seiko Instruments S-35390A RTC # device ds133x device ds1374 device ds1672 device s35390a # Parallel-Port Bus # # Parallel port bus support is provided by the `ppbus' device. # Multiple devices may be attached to the parallel port, devices # are automatically probed and attached when found. # # Supported devices: # vpo Iomega Zip Drive # Requires SCSI disk support ('scbus' and 'da'), best # performance is achieved with ports in EPP 1.9 mode. # lpt Parallel Printer # plip Parallel network interface # ppi General-purpose I/O ("Geek Port") + IEEE1284 I/O # pps Pulse per second Timing Interface # lpbb Philips official parallel port I2C bit-banging interface # pcfclock Parallel port clock driver. # # Supported interfaces: # ppc ISA-bus parallel port interfaces. # options PPC_PROBE_CHIPSET # Enable chipset specific detection # (see flags in ppc(4)) options DEBUG_1284 # IEEE1284 signaling protocol debug options PERIPH_1284 # Makes your computer act as an IEEE1284 # compliant peripheral options DONTPROBE_1284 # Avoid boot detection of PnP parallel devices options VP0_DEBUG # ZIP/ZIP+ debug options LPT_DEBUG # Printer driver debug options PPC_DEBUG # Parallel chipset level debug options PLIP_DEBUG # Parallel network IP interface debug options PCFCLOCK_VERBOSE # Verbose pcfclock driver options PCFCLOCK_MAX_RETRIES=5 # Maximum read tries (default 10) device ppc hint.ppc.0.at="isa" hint.ppc.0.irq="7" device ppbus device vpo device lpt device plip device ppi device pps device lpbb device pcfclock # # Etherswitch framework and drivers # # etherswitch The etherswitch(4) framework # miiproxy Proxy device for miibus(4) functionality # # Switch hardware support: # arswitch Atheros switches # ip17x IC+ 17x family switches # rtl8366r Realtek RTL8366 switches # ukswitch Multi-PHY switches # device etherswitch device miiproxy device arswitch device ip17x device rtl8366rb device ukswitch # Kernel BOOTP support options BOOTP # Use BOOTP to obtain IP address/hostname # Requires NFSCL and NFS_ROOT options BOOTP_NFSROOT # NFS mount root filesystem using BOOTP info options BOOTP_NFSV3 # Use NFS v3 to NFS mount root options BOOTP_COMPAT # Workaround for broken bootp daemons. options BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP options BOOTP_BLOCKSIZE=8192 # Override NFS block size # # Add software watchdog routines. # options SW_WATCHDOG # # Add the software deadlock resolver thread. # options DEADLKRES # # Disable swapping of stack pages. This option removes all # code which actually performs swapping, so it's not possible to turn # it back on at run-time. # # This is sometimes usable for systems which don't have any swap space # (see also sysctls "vm.defer_swapspace_pageouts" and # "vm.disable_swapspace_pageouts") # #options NO_SWAPPING # Set the number of sf_bufs to allocate. sf_bufs are virtual buffers # for sendfile(2) that are used to map file VM pages, and normally # default to a quantity that is roughly 16*MAXUSERS+512. You would # typically want about 4 of these for each simultaneous file send. # options NSFBUFS=1024 # # Enable extra debugging code for locks. This stores the filename and # line of whatever acquired the lock in the lock itself, and changes a # number of function calls to pass around the relevant data. This is # not at all useful unless you are debugging lock code. Note that # modules should be recompiled as this option modifies KBI. # options DEBUG_LOCKS ##################################################################### # USB support # UHCI controller device uhci # OHCI controller device ohci # EHCI controller device ehci # XHCI controller device xhci # SL811 Controller #device slhci # General USB code (mandatory for USB) device usb # # USB Double Bulk Pipe devices device udbp # USB Fm Radio device ufm # USB temperature meter device ugold # USB LED device uled # Human Interface Device (anything with buttons and dials) device uhid # USB keyboard device ukbd # USB printer device ulpt # USB mass storage driver (Requires scbus and da) device umass # USB mass storage driver for device-side mode device usfs # USB support for Belkin F5U109 and Magic Control Technology serial adapters device umct # USB modem support device umodem # USB mouse device ums # USB touchpad(s) device atp device wsp # eGalax USB touch screen device uep # Diamond Rio 500 MP3 player device urio # # USB serial support device ucom # USB support for 3G modem cards by Option, Novatel, Huawei and Sierra device u3g # USB support for Technologies ARK3116 based serial adapters device uark # USB support for Belkin F5U103 and compatible serial adapters device ubsa # USB support for serial adapters based on the FT8U100AX and FT8U232AM device uftdi # USB support for some Windows CE based serial communication. device uipaq # USB support for Prolific PL-2303 serial adapters device uplcom # USB support for Silicon Laboratories CP2101/CP2102 based USB serial adapters device uslcom # USB Visor and Palm devices device uvisor # USB serial support for DDI pocket's PHS device uvscom # # USB ethernet support device uether # ADMtek USB ethernet. Supports the LinkSys USB100TX, # the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX # and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus # eval board. device aue # ASIX Electronics AX88172 USB 2.0 ethernet driver. Used in the # LinkSys USB200M and various other adapters. device axe # ASIX Electronics AX88178A/AX88179 USB 2.0/3.0 gigabit ethernet driver. device axge # # Devices which communicate using Ethernet over USB, particularly # Communication Device Class (CDC) Ethernet specification. Supports # Sharp Zaurus PDAs, some DOCSIS cable modems and so on. device cdce # # CATC USB-EL1201A USB ethernet. Supports the CATC Netmate # and Netmate II, and the Belkin F5U111. device cue # # Kawasaki LSI ethernet. Supports the LinkSys USB10T, # Entrega USB-NET-E45, Peracom Ethernet Adapter, the # 3Com 3c19250, the ADS Technologies USB-10BT, the ATen UC10T, # the Netgear EA101, the D-Link DSB-650, the SMC 2102USB # and 2104USB, and the Corega USB-T. device kue # # RealTek RTL8150 USB to fast ethernet. Supports the Melco LUA-KTX # and the GREEN HOUSE GH-USB100B. device rue # # Davicom DM9601E USB to fast ethernet. Supports the Corega FEther USB-TXC. device udav # # RealTek RTL8152/RTL8153 USB Ethernet driver device ure # # Moschip MCS7730/MCS7840 USB to fast ethernet. Supports the Sitecom LN030. device mos # # HSxPA devices from Option N.V device uhso # Realtek RTL8188SU/RTL8191SU/RTL8192SU wireless driver device rsu # # Ralink Technology RT2501USB/RT2601USB wireless driver device rum # Ralink Technology RT2700U/RT2800U/RT3000U wireless driver device run # # Atheros AR5523 wireless driver device uath # # Conexant/Intersil PrismGT wireless driver device upgt # # Ralink Technology RT2500USB wireless driver device ural # # RNDIS USB ethernet driver device urndis # Realtek RTL8187B/L wireless driver device urtw # # ZyDas ZD1211/ZD1211B wireless driver device zyd # # Sierra USB wireless driver device usie # # debugging options for the USB subsystem # options USB_DEBUG options U3G_DEBUG # options for ukbd: options UKBD_DFLT_KEYMAP # specify the built-in keymap makeoptions UKBD_DFLT_KEYMAP=jp # options for uplcom: options UPLCOM_INTR_INTERVAL=100 # interrupt pipe interval # in milliseconds # options for uvscom: options UVSCOM_DEFAULT_OPKTSIZE=8 # default output packet size options UVSCOM_INTR_INTERVAL=100 # interrupt pipe interval # in milliseconds ##################################################################### # FireWire support device firewire # FireWire bus code device sbp # SCSI over Firewire (Requires scbus and da) device sbp_targ # SBP-2 Target mode (Requires scbus and targ) device fwe # Ethernet over FireWire (non-standard!) device fwip # IP over FireWire (RFC2734 and RFC3146) ##################################################################### # dcons support (Dumb Console Device) device dcons # dumb console driver device dcons_crom # FireWire attachment options DCONS_BUF_SIZE=16384 # buffer size options DCONS_POLL_HZ=100 # polling rate options DCONS_FORCE_CONSOLE=0 # force to be the primary console options DCONS_FORCE_GDB=1 # force to be the gdb device ##################################################################### # crypto subsystem # # This is a port of the OpenBSD crypto framework. Include this when # configuring IPSEC and when you have a h/w crypto device to accelerate # user applications that link to OpenSSL. # # Drivers are ports from OpenBSD with some simple enhancements that have # been fed back to OpenBSD. device crypto # core crypto support # Only install the cryptodev device if you are running tests, or know # specifically why you need it. In most cases, it is not needed and # will make things slower. device cryptodev # /dev/crypto for access to h/w device rndtest # FIPS 140-2 entropy tester device hifn # Hifn 7951, 7781, etc. options HIFN_DEBUG # enable debugging support: hw.hifn.debug options HIFN_RNDTEST # enable rndtest support device ubsec # Broadcom 5501, 5601, 58xx options UBSEC_DEBUG # enable debugging support: hw.ubsec.debug options UBSEC_RNDTEST # enable rndtest support ##################################################################### # # Embedded system options: # # An embedded system might want to run something other than init. options INIT_PATH=/sbin/init:/rescue/init # Debug options options BUS_DEBUG # enable newbus debugging options DEBUG_VFS_LOCKS # enable VFS lock debugging options SOCKBUF_DEBUG # enable sockbuf last record/mb tail checking options IFMEDIA_DEBUG # enable debugging in net/if_media.c # # Verbose SYSINIT # # Make the SYSINIT process performed by mi_startup() verbose. This is very # useful when porting to a new architecture. If DDB is also enabled, this # will print function names instead of addresses. options VERBOSE_SYSINIT ##################################################################### # SYSV IPC KERNEL PARAMETERS # # Maximum number of System V semaphores that can be used on the system at # one time. options SEMMNI=11 # Total number of semaphores system wide options SEMMNS=61 # Total number of undo structures in system options SEMMNU=31 # Maximum number of System V semaphores that can be used by a single process # at one time. options SEMMSL=61 # Maximum number of operations that can be outstanding on a single System V # semaphore at one time. options SEMOPM=101 # Maximum number of undo operations that can be outstanding on a single # System V semaphore at one time. options SEMUME=11 # Maximum number of shared memory pages system wide. options SHMALL=1025 # Maximum size, in bytes, of a single System V shared memory region. options SHMMAX=(SHMMAXPGS*PAGE_SIZE+1) options SHMMAXPGS=1025 # Minimum size, in bytes, of a single System V shared memory region. options SHMMIN=2 # Maximum number of shared memory regions that can be used on the system # at one time. options SHMMNI=33 # Maximum number of System V shared memory regions that can be attached to # a single process at one time. options SHMSEG=9 # Set the amount of time (in seconds) the system will wait before # rebooting automatically when a kernel panic occurs. If set to (-1), # the system will wait indefinitely until a key is pressed on the # console. options PANIC_REBOOT_WAIT_TIME=16 # Attempt to bypass the buffer cache and put data directly into the # userland buffer for read operation when O_DIRECT flag is set on the # file. Both offset and length of the read operation must be # multiples of the physical media sector size. # options DIRECTIO # Specify a lower limit for the number of swap I/O buffers. They are # (among other things) used when bypassing the buffer cache due to # DIRECTIO kernel option enabled and O_DIRECT flag set on file. # options NSWBUF_MIN=120 ##################################################################### # More undocumented options for linting. # Note that documenting these is not considered an affront. options CAM_DEBUG_DELAY # VFS cluster debugging. options CLUSTERDEBUG options DEBUG # Kernel filelock debugging. options LOCKF_DEBUG # System V compatible message queues # Please note that the values provided here are used to test kernel # building. The defaults in the sources provide almost the same numbers. # MSGSSZ must be a power of 2 between 8 and 1024. options MSGMNB=2049 # Max number of chars in queue options MSGMNI=41 # Max number of message queue identifiers options MSGSEG=2049 # Max number of message segments options MSGSSZ=16 # Size of a message segment options MSGTQL=41 # Max number of messages in system options NBUF=512 # Number of buffer headers options SCSI_NCR_DEBUG options SCSI_NCR_MAX_SYNC=10000 options SCSI_NCR_MAX_WIDE=1 options SCSI_NCR_MYADDR=7 options SC_DEBUG_LEVEL=5 # Syscons debug level options SC_RENDER_DEBUG # syscons rendering debugging options VFS_BIO_DEBUG # VFS buffer I/O debugging options KSTACK_MAX_PAGES=32 # Maximum pages to give the kernel stack options KSTACK_USAGE_PROF # Adaptec Array Controller driver options options AAC_DEBUG # Debugging levels: # 0 - quiet, only emit warnings # 1 - noisy, emit major function # points and things done # 2 - extremely noisy, emit trace # items in loops, etc. # Resource Accounting options RACCT # Resource Limits options RCTL # Yet more undocumented options for linting. # BKTR_ALLOC_PAGES has no effect except to cause warnings, and # BROOKTREE_ALLOC_PAGES hasn't actually been superseded by it, since the # driver still mostly spells this option BROOKTREE_ALLOC_PAGES. ##options BKTR_ALLOC_PAGES=(217*4+1) options BROOKTREE_ALLOC_PAGES=(217*4+1) options MAXFILES=999 # Random number generator # Only ONE of the below two may be used; they are mutually exclusive. # If neither is present, then the Fortuna algorithm is selected. #options RANDOM_YARROW # Yarrow CSPRNG (old default) #options RANDOM_LOADABLE # Allow the algorithm to be loaded as # a module. # Select this to allow high-rate but potentially expensive # harvesting of Slab-Allocator entropy. In very high-rate # situations the value of doing this is dubious at best. options RANDOM_ENABLE_UMA # slab allocator # Module to enable execution of application via emulators like QEMU options IMAGACT_BINMISC # zlib I/O stream support # This enables support for compressed core dumps. options GZIO # BHND(4) drivers options BHND_LOGLEVEL # Logging threshold level # evdev interface device evdev # input event device support options EVDEV_SUPPORT # evdev support in legacy drivers options EVDEV_DEBUG # enable event debug msgs device uinput # install /dev/uinput cdev options UINPUT_DEBUG # enable uinput debug msgs # Encrypted kernel crash dumps. options EKCD diff --git a/sys/conf/files b/sys/conf/files index c89c9c994a67..e964508b7693 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4693 +1,4703 @@ # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # acpi_quirks.h optional acpi \ dependency "$S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \ compile-with "${AWK} -f $S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \ no-obj no-implicit-rule before-depend \ clean "acpi_quirks.h" bhnd_nvram_map.h optional bhnd \ dependency "$S/dev/bhnd/tools/nvram_map_gen.sh $S/dev/bhnd/tools/nvram_map_gen.awk $S/dev/bhnd/nvram/nvram_map" \ compile-with "sh $S/dev/bhnd/tools/nvram_map_gen.sh $S/dev/bhnd/nvram/nvram_map -h" \ no-obj no-implicit-rule before-depend \ clean "bhnd_nvram_map.h" bhnd_nvram_map_data.h optional bhnd \ dependency "$S/dev/bhnd/tools/nvram_map_gen.sh $S/dev/bhnd/tools/nvram_map_gen.awk $S/dev/bhnd/nvram/nvram_map" \ compile-with "sh $S/dev/bhnd/tools/nvram_map_gen.sh $S/dev/bhnd/nvram/nvram_map -d" \ no-obj no-implicit-rule before-depend \ clean "bhnd_nvram_map_data.h" # # The 'fdt_dtb_file' target covers an actual DTB file name, which is derived # from the specified source (DTS) file: .dts -> .dtb # fdt_dtb_file optional fdt fdt_dtb_static \ compile-with "sh -c 'MACHINE=${MACHINE} $S/tools/fdt/make_dtb.sh $S ${FDT_DTS_FILE} ${.CURDIR}'" \ no-obj no-implicit-rule before-depend \ clean "${FDT_DTS_FILE:R}.dtb" fdt_static_dtb.h optional fdt fdt_dtb_static \ compile-with "sh -c 'MACHINE=${MACHINE} $S/tools/fdt/make_dtbh.sh ${FDT_DTS_FILE} ${.CURDIR}'" \ dependency "fdt_dtb_file" \ no-obj no-implicit-rule before-depend \ clean "fdt_static_dtb.h" feeder_eq_gen.h optional sound \ dependency "$S/tools/sound/feeder_eq_mkfilter.awk" \ compile-with "${AWK} -f $S/tools/sound/feeder_eq_mkfilter.awk -- ${FEEDER_EQ_PRESETS} > feeder_eq_gen.h" \ no-obj no-implicit-rule before-depend \ clean "feeder_eq_gen.h" feeder_rate_gen.h optional sound \ dependency "$S/tools/sound/feeder_rate_mkfilter.awk" \ compile-with "${AWK} -f $S/tools/sound/feeder_rate_mkfilter.awk -- ${FEEDER_RATE_PRESETS} > feeder_rate_gen.h" \ no-obj no-implicit-rule before-depend \ clean "feeder_rate_gen.h" snd_fxdiv_gen.h optional sound \ dependency "$S/tools/sound/snd_fxdiv_gen.awk" \ compile-with "${AWK} -f $S/tools/sound/snd_fxdiv_gen.awk -- > snd_fxdiv_gen.h" \ no-obj no-implicit-rule before-depend \ clean "snd_fxdiv_gen.h" miidevs.h optional miibus | mii \ dependency "$S/tools/miidevs2h.awk $S/dev/mii/miidevs" \ compile-with "${AWK} -f $S/tools/miidevs2h.awk $S/dev/mii/miidevs" \ no-obj no-implicit-rule before-depend \ clean "miidevs.h" pccarddevs.h standard \ dependency "$S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \ compile-with "${AWK} -f $S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \ no-obj no-implicit-rule before-depend \ clean "pccarddevs.h" kbdmuxmap.h optional kbdmux_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${KBDMUX_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > kbdmuxmap.h" \ no-obj no-implicit-rule before-depend \ clean "kbdmuxmap.h" teken_state.h optional sc | vt \ dependency "$S/teken/gensequences $S/teken/sequences" \ compile-with "${AWK} -f $S/teken/gensequences $S/teken/sequences > teken_state.h" \ no-obj no-implicit-rule before-depend \ clean "teken_state.h" usbdevs.h optional usb \ dependency "$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \ compile-with "${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -h" \ no-obj no-implicit-rule before-depend \ clean "usbdevs.h" usbdevs_data.h optional usb \ dependency "$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \ compile-with "${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -d" \ no-obj no-implicit-rule before-depend \ clean "usbdevs_data.h" cam/cam.c optional scbus cam/cam_compat.c optional scbus cam/cam_iosched.c optional scbus cam/cam_periph.c optional scbus cam/cam_queue.c optional scbus cam/cam_sim.c optional scbus cam/cam_xpt.c optional scbus cam/ata/ata_all.c optional scbus cam/ata/ata_xpt.c optional scbus cam/ata/ata_pmp.c optional scbus cam/nvme/nvme_all.c optional scbus nvme !nvd cam/nvme/nvme_da.c optional scbus nvme da !nvd cam/nvme/nvme_xpt.c optional scbus nvme !nvd cam/scsi/scsi_xpt.c optional scbus cam/scsi/scsi_all.c optional scbus cam/scsi/scsi_cd.c optional cd cam/scsi/scsi_ch.c optional ch cam/ata/ata_da.c optional ada | da cam/ctl/ctl.c optional ctl cam/ctl/ctl_backend.c optional ctl cam/ctl/ctl_backend_block.c optional ctl cam/ctl/ctl_backend_ramdisk.c optional ctl cam/ctl/ctl_cmd_table.c optional ctl cam/ctl/ctl_frontend.c optional ctl cam/ctl/ctl_frontend_cam_sim.c optional ctl cam/ctl/ctl_frontend_ioctl.c optional ctl cam/ctl/ctl_frontend_iscsi.c optional ctl cam/ctl/ctl_ha.c optional ctl cam/ctl/ctl_scsi_all.c optional ctl cam/ctl/ctl_tpc.c optional ctl cam/ctl/ctl_tpc_local.c optional ctl cam/ctl/ctl_error.c optional ctl cam/ctl/ctl_util.c optional ctl cam/ctl/scsi_ctl.c optional ctl cam/scsi/scsi_da.c optional da cam/scsi/scsi_low.c optional ncv | nsp | stg cam/scsi/scsi_pass.c optional pass cam/scsi/scsi_pt.c optional pt cam/scsi/scsi_sa.c optional sa cam/scsi/scsi_enc.c optional ses cam/scsi/scsi_enc_ses.c optional ses cam/scsi/scsi_enc_safte.c optional ses cam/scsi/scsi_sg.c optional sg cam/scsi/scsi_targ_bh.c optional targbh cam/scsi/scsi_target.c optional targ cam/scsi/smp_all.c optional scbus # shared between zfs and dtrace cddl/compat/opensolaris/kern/opensolaris.c optional zfs | dtrace compile-with "${CDDL_C}" cddl/compat/opensolaris/kern/opensolaris_cmn_err.c optional zfs | dtrace compile-with "${CDDL_C}" cddl/compat/opensolaris/kern/opensolaris_kmem.c optional zfs | dtrace compile-with "${CDDL_C}" cddl/compat/opensolaris/kern/opensolaris_misc.c optional zfs | dtrace compile-with "${CDDL_C}" cddl/compat/opensolaris/kern/opensolaris_proc.c optional zfs | dtrace compile-with "${CDDL_C}" cddl/compat/opensolaris/kern/opensolaris_sunddi.c optional zfs | dtrace compile-with "${CDDL_C}" cddl/compat/opensolaris/kern/opensolaris_taskq.c optional zfs | dtrace compile-with "${CDDL_C}" # zfs specific cddl/compat/opensolaris/kern/opensolaris_acl.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_dtrace.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_kobj.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_kstat.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_lookup.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_policy.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_string.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_sysevent.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_uio.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_vfs.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_vm.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_zone.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/acl/acl_common.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/avl/avl.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/nvpair/opensolaris_fnvpair.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/nvpair/opensolaris_nvpair.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/nvpair/opensolaris_nvpair_alloc_fixed.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/unicode/u8_textprep.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfeature_common.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_comutil.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_deleg.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_fletcher.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_prop.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zpool_prop.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zprop_common.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/gfs.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/vnode.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/blkptr.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/ddt_zap.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c optional zfs compile-with "${ZFS_C}" \ warning "kernel contains CDDL licensed ZFS filesystem" cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/gzip.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/lzjb.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/range_tree.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/sha256.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/skein_zfs.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/space_reftree.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/unique.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_debug.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zle.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/os/callb.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/os/fm.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/os/list.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/os/nvpair_alloc_system.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/adler32.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/deflate.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/inffast.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/inflate.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/inftrees.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/opensolaris_crc32.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/trees.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/zmod.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/zmod_subr.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/zutil.c optional zfs compile-with "${ZFS_C}" # dtrace specific cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c optional dtrace compile-with "${DTRACE_C}" \ warning "kernel contains CDDL licensed DTRACE" cddl/dev/dtmalloc/dtmalloc.c optional dtmalloc | dtraceall compile-with "${CDDL_C}" cddl/dev/profile/profile.c optional dtrace_profile | dtraceall compile-with "${CDDL_C}" cddl/dev/sdt/sdt.c optional dtrace_sdt | dtraceall compile-with "${CDDL_C}" cddl/dev/fbt/fbt.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" cddl/dev/systrace/systrace.c optional dtrace_systrace | dtraceall compile-with "${CDDL_C}" cddl/dev/prototype.c optional dtrace_prototype | dtraceall compile-with "${CDDL_C}" fs/nfsclient/nfs_clkdtrace.c optional dtnfscl nfscl | dtraceall nfscl compile-with "${CDDL_C}" compat/cloudabi/cloudabi_clock.c optional compat_cloudabi32 | compat_cloudabi64 compat/cloudabi/cloudabi_errno.c optional compat_cloudabi32 | compat_cloudabi64 compat/cloudabi/cloudabi_fd.c optional compat_cloudabi32 | compat_cloudabi64 compat/cloudabi/cloudabi_file.c optional compat_cloudabi32 | compat_cloudabi64 compat/cloudabi/cloudabi_futex.c optional compat_cloudabi32 | compat_cloudabi64 compat/cloudabi/cloudabi_mem.c optional compat_cloudabi32 | compat_cloudabi64 compat/cloudabi/cloudabi_proc.c optional compat_cloudabi32 | compat_cloudabi64 compat/cloudabi/cloudabi_random.c optional compat_cloudabi32 | compat_cloudabi64 compat/cloudabi/cloudabi_sock.c optional compat_cloudabi32 | compat_cloudabi64 compat/cloudabi/cloudabi_thread.c optional compat_cloudabi32 | compat_cloudabi64 compat/cloudabi/cloudabi_vdso.c optional compat_cloudabi32 | compat_cloudabi64 compat/cloudabi32/cloudabi32_fd.c optional compat_cloudabi32 compat/cloudabi32/cloudabi32_module.c optional compat_cloudabi32 compat/cloudabi32/cloudabi32_poll.c optional compat_cloudabi32 compat/cloudabi32/cloudabi32_sock.c optional compat_cloudabi32 compat/cloudabi32/cloudabi32_syscalls.c optional compat_cloudabi32 compat/cloudabi32/cloudabi32_sysent.c optional compat_cloudabi32 compat/cloudabi32/cloudabi32_thread.c optional compat_cloudabi32 compat/cloudabi64/cloudabi64_fd.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_module.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_poll.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_sock.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_syscalls.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_sysent.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_thread.c optional compat_cloudabi64 compat/freebsd32/freebsd32_capability.c optional compat_freebsd32 compat/freebsd32/freebsd32_ioctl.c optional compat_freebsd32 compat/freebsd32/freebsd32_misc.c optional compat_freebsd32 compat/freebsd32/freebsd32_syscalls.c optional compat_freebsd32 compat/freebsd32/freebsd32_sysent.c optional compat_freebsd32 contrib/ck/src/ck_array.c standard compile-with "${NORMAL_C} -I$S/contrib/ck/include" contrib/ck/src/ck_barrier_centralized.c standard compile-with "${NORMAL_C} -I$S/contrib/ck/include" contrib/ck/src/ck_barrier_combining.c standard compile-with "${NORMAL_C} -I$S/contrib/ck/include" contrib/ck/src/ck_barrier_dissemination.c standard compile-with "${NORMAL_C} -I$S/contrib/ck/include" contrib/ck/src/ck_barrier_mcs.c standard compile-with "${NORMAL_C} -I$S/contrib/ck/include" contrib/ck/src/ck_barrier_tournament.c standard compile-with "${NORMAL_C} -I$S/contrib/ck/include" contrib/ck/src/ck_epoch.c standard compile-with "${NORMAL_C} -I$S/contrib/ck/include" contrib/ck/src/ck_hp.c standard compile-with "${NORMAL_C} -I$S/contrib/ck/include" contrib/ck/src/ck_hs.c standard compile-with "${NORMAL_C} -I$S/contrib/ck/include" contrib/ck/src/ck_ht.c standard compile-with "${NORMAL_C} -I$S/contrib/ck/include" contrib/ck/src/ck_rhs.c standard compile-with "${NORMAL_C} -I$S/contrib/ck/include" contrib/dev/acpica/common/ahids.c optional acpi acpi_debug contrib/dev/acpica/common/ahuuids.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbcmds.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbconvert.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbdisply.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbexec.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbhistry.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbinput.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbmethod.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbnames.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbobject.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbstats.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbtest.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbutils.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbxface.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmbuffer.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmcstyle.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmdeferred.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmnames.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmopcode.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmresrc.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmresrcl.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmresrcl2.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmresrcs.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmutils.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmwalk.c optional acpi acpi_debug contrib/dev/acpica/components/dispatcher/dsargs.c optional acpi contrib/dev/acpica/components/dispatcher/dscontrol.c optional acpi contrib/dev/acpica/components/dispatcher/dsdebug.c optional acpi contrib/dev/acpica/components/dispatcher/dsfield.c optional acpi contrib/dev/acpica/components/dispatcher/dsinit.c optional acpi contrib/dev/acpica/components/dispatcher/dsmethod.c optional acpi contrib/dev/acpica/components/dispatcher/dsmthdat.c optional acpi contrib/dev/acpica/components/dispatcher/dsobject.c optional acpi contrib/dev/acpica/components/dispatcher/dsopcode.c optional acpi contrib/dev/acpica/components/dispatcher/dsutils.c optional acpi contrib/dev/acpica/components/dispatcher/dswexec.c optional acpi contrib/dev/acpica/components/dispatcher/dswload.c optional acpi contrib/dev/acpica/components/dispatcher/dswload2.c optional acpi contrib/dev/acpica/components/dispatcher/dswscope.c optional acpi contrib/dev/acpica/components/dispatcher/dswstate.c optional acpi contrib/dev/acpica/components/events/evevent.c optional acpi contrib/dev/acpica/components/events/evglock.c optional acpi contrib/dev/acpica/components/events/evgpe.c optional acpi contrib/dev/acpica/components/events/evgpeblk.c optional acpi contrib/dev/acpica/components/events/evgpeinit.c optional acpi contrib/dev/acpica/components/events/evgpeutil.c optional acpi contrib/dev/acpica/components/events/evhandler.c optional acpi contrib/dev/acpica/components/events/evmisc.c optional acpi contrib/dev/acpica/components/events/evregion.c optional acpi contrib/dev/acpica/components/events/evrgnini.c optional acpi contrib/dev/acpica/components/events/evsci.c optional acpi contrib/dev/acpica/components/events/evxface.c optional acpi contrib/dev/acpica/components/events/evxfevnt.c optional acpi contrib/dev/acpica/components/events/evxfgpe.c optional acpi contrib/dev/acpica/components/events/evxfregn.c optional acpi contrib/dev/acpica/components/executer/exconcat.c optional acpi contrib/dev/acpica/components/executer/exconfig.c optional acpi contrib/dev/acpica/components/executer/exconvrt.c optional acpi contrib/dev/acpica/components/executer/excreate.c optional acpi contrib/dev/acpica/components/executer/exdebug.c optional acpi contrib/dev/acpica/components/executer/exdump.c optional acpi contrib/dev/acpica/components/executer/exfield.c optional acpi contrib/dev/acpica/components/executer/exfldio.c optional acpi contrib/dev/acpica/components/executer/exmisc.c optional acpi contrib/dev/acpica/components/executer/exmutex.c optional acpi contrib/dev/acpica/components/executer/exnames.c optional acpi contrib/dev/acpica/components/executer/exoparg1.c optional acpi contrib/dev/acpica/components/executer/exoparg2.c optional acpi contrib/dev/acpica/components/executer/exoparg3.c optional acpi contrib/dev/acpica/components/executer/exoparg6.c optional acpi contrib/dev/acpica/components/executer/exprep.c optional acpi contrib/dev/acpica/components/executer/exregion.c optional acpi contrib/dev/acpica/components/executer/exresnte.c optional acpi contrib/dev/acpica/components/executer/exresolv.c optional acpi contrib/dev/acpica/components/executer/exresop.c optional acpi contrib/dev/acpica/components/executer/exstore.c optional acpi contrib/dev/acpica/components/executer/exstoren.c optional acpi contrib/dev/acpica/components/executer/exstorob.c optional acpi contrib/dev/acpica/components/executer/exsystem.c optional acpi contrib/dev/acpica/components/executer/extrace.c optional acpi contrib/dev/acpica/components/executer/exutils.c optional acpi contrib/dev/acpica/components/hardware/hwacpi.c optional acpi contrib/dev/acpica/components/hardware/hwesleep.c optional acpi contrib/dev/acpica/components/hardware/hwgpe.c optional acpi contrib/dev/acpica/components/hardware/hwpci.c optional acpi contrib/dev/acpica/components/hardware/hwregs.c optional acpi contrib/dev/acpica/components/hardware/hwsleep.c optional acpi contrib/dev/acpica/components/hardware/hwtimer.c optional acpi contrib/dev/acpica/components/hardware/hwvalid.c optional acpi contrib/dev/acpica/components/hardware/hwxface.c optional acpi contrib/dev/acpica/components/hardware/hwxfsleep.c optional acpi contrib/dev/acpica/components/namespace/nsaccess.c optional acpi contrib/dev/acpica/components/namespace/nsalloc.c optional acpi contrib/dev/acpica/components/namespace/nsarguments.c optional acpi contrib/dev/acpica/components/namespace/nsconvert.c optional acpi contrib/dev/acpica/components/namespace/nsdump.c optional acpi contrib/dev/acpica/components/namespace/nseval.c optional acpi contrib/dev/acpica/components/namespace/nsinit.c optional acpi contrib/dev/acpica/components/namespace/nsload.c optional acpi contrib/dev/acpica/components/namespace/nsnames.c optional acpi contrib/dev/acpica/components/namespace/nsobject.c optional acpi contrib/dev/acpica/components/namespace/nsparse.c optional acpi contrib/dev/acpica/components/namespace/nspredef.c optional acpi contrib/dev/acpica/components/namespace/nsprepkg.c optional acpi contrib/dev/acpica/components/namespace/nsrepair.c optional acpi contrib/dev/acpica/components/namespace/nsrepair2.c optional acpi contrib/dev/acpica/components/namespace/nssearch.c optional acpi contrib/dev/acpica/components/namespace/nsutils.c optional acpi contrib/dev/acpica/components/namespace/nswalk.c optional acpi contrib/dev/acpica/components/namespace/nsxfeval.c optional acpi contrib/dev/acpica/components/namespace/nsxfname.c optional acpi contrib/dev/acpica/components/namespace/nsxfobj.c optional acpi contrib/dev/acpica/components/parser/psargs.c optional acpi contrib/dev/acpica/components/parser/psloop.c optional acpi contrib/dev/acpica/components/parser/psobject.c optional acpi contrib/dev/acpica/components/parser/psopcode.c optional acpi contrib/dev/acpica/components/parser/psopinfo.c optional acpi contrib/dev/acpica/components/parser/psparse.c optional acpi contrib/dev/acpica/components/parser/psscope.c optional acpi contrib/dev/acpica/components/parser/pstree.c optional acpi contrib/dev/acpica/components/parser/psutils.c optional acpi contrib/dev/acpica/components/parser/pswalk.c optional acpi contrib/dev/acpica/components/parser/psxface.c optional acpi contrib/dev/acpica/components/resources/rsaddr.c optional acpi contrib/dev/acpica/components/resources/rscalc.c optional acpi contrib/dev/acpica/components/resources/rscreate.c optional acpi contrib/dev/acpica/components/resources/rsdump.c optional acpi acpi_debug contrib/dev/acpica/components/resources/rsdumpinfo.c optional acpi contrib/dev/acpica/components/resources/rsinfo.c optional acpi contrib/dev/acpica/components/resources/rsio.c optional acpi contrib/dev/acpica/components/resources/rsirq.c optional acpi contrib/dev/acpica/components/resources/rslist.c optional acpi contrib/dev/acpica/components/resources/rsmemory.c optional acpi contrib/dev/acpica/components/resources/rsmisc.c optional acpi contrib/dev/acpica/components/resources/rsserial.c optional acpi contrib/dev/acpica/components/resources/rsutils.c optional acpi contrib/dev/acpica/components/resources/rsxface.c optional acpi contrib/dev/acpica/components/tables/tbdata.c optional acpi contrib/dev/acpica/components/tables/tbfadt.c optional acpi contrib/dev/acpica/components/tables/tbfind.c optional acpi contrib/dev/acpica/components/tables/tbinstal.c optional acpi contrib/dev/acpica/components/tables/tbprint.c optional acpi contrib/dev/acpica/components/tables/tbutils.c optional acpi contrib/dev/acpica/components/tables/tbxface.c optional acpi contrib/dev/acpica/components/tables/tbxfload.c optional acpi contrib/dev/acpica/components/tables/tbxfroot.c optional acpi contrib/dev/acpica/components/utilities/utaddress.c optional acpi contrib/dev/acpica/components/utilities/utalloc.c optional acpi contrib/dev/acpica/components/utilities/utascii.c optional acpi contrib/dev/acpica/components/utilities/utbuffer.c optional acpi contrib/dev/acpica/components/utilities/utcache.c optional acpi contrib/dev/acpica/components/utilities/utcopy.c optional acpi contrib/dev/acpica/components/utilities/utdebug.c optional acpi contrib/dev/acpica/components/utilities/utdecode.c optional acpi contrib/dev/acpica/components/utilities/utdelete.c optional acpi contrib/dev/acpica/components/utilities/uterror.c optional acpi contrib/dev/acpica/components/utilities/uteval.c optional acpi contrib/dev/acpica/components/utilities/utexcep.c optional acpi contrib/dev/acpica/components/utilities/utglobal.c optional acpi contrib/dev/acpica/components/utilities/uthex.c optional acpi contrib/dev/acpica/components/utilities/utids.c optional acpi contrib/dev/acpica/components/utilities/utinit.c optional acpi contrib/dev/acpica/components/utilities/utlock.c optional acpi contrib/dev/acpica/components/utilities/utmath.c optional acpi contrib/dev/acpica/components/utilities/utmisc.c optional acpi contrib/dev/acpica/components/utilities/utmutex.c optional acpi contrib/dev/acpica/components/utilities/utnonansi.c optional acpi contrib/dev/acpica/components/utilities/utobject.c optional acpi contrib/dev/acpica/components/utilities/utosi.c optional acpi contrib/dev/acpica/components/utilities/utownerid.c optional acpi contrib/dev/acpica/components/utilities/utpredef.c optional acpi contrib/dev/acpica/components/utilities/utresrc.c optional acpi contrib/dev/acpica/components/utilities/utstate.c optional acpi contrib/dev/acpica/components/utilities/utstring.c optional acpi contrib/dev/acpica/components/utilities/utstrtoul64.c optional acpi contrib/dev/acpica/components/utilities/utuuid.c optional acpi acpi_debug contrib/dev/acpica/components/utilities/utxface.c optional acpi contrib/dev/acpica/components/utilities/utxferror.c optional acpi contrib/dev/acpica/components/utilities/utxfinit.c optional acpi contrib/dev/acpica/os_specific/service_layers/osgendbg.c optional acpi acpi_debug contrib/ipfilter/netinet/fil.c optional ipfilter inet \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_auth.c optional ipfilter inet \ compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_fil_freebsd.c optional ipfilter inet \ compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_frag.c optional ipfilter inet \ compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_log.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_nat.c optional ipfilter inet \ compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_proxy.c optional ipfilter inet \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_state.c optional ipfilter inet \ compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_lookup.c optional ipfilter inet \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -Wno-error -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_pool.c optional ipfilter inet \ compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_htable.c optional ipfilter inet \ compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_sync.c optional ipfilter inet \ compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/mlfk_ipl.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_nat6.c optional ipfilter inet \ compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_rules.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_scan.c optional ipfilter inet \ compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_dstlist.c optional ipfilter inet \ compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter" contrib/ipfilter/netinet/radix_ipf.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/libfdt/fdt.c optional fdt contrib/libfdt/fdt_ro.c optional fdt contrib/libfdt/fdt_rw.c optional fdt contrib/libfdt/fdt_strerror.c optional fdt contrib/libfdt/fdt_sw.c optional fdt contrib/libfdt/fdt_wip.c optional fdt contrib/libnv/cnvlist.c standard contrib/libnv/dnvlist.c standard contrib/libnv/nvlist.c standard contrib/libnv/nvpair.c standard contrib/ngatm/netnatm/api/cc_conn.c optional ngatm_ccatm \ compile-with "${NORMAL_C_NOWERROR} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/cc_data.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/cc_dump.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/cc_port.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/cc_sig.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/cc_user.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/unisap.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/misc/straddr.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/misc/unimsg_common.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/msg/traffic.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/msg/uni_ie.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/msg/uni_msg.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/saal/saal_sscfu.c optional ngatm_sscfu \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/saal/saal_sscop.c optional ngatm_sscop \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_call.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_coord.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_party.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_print.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_reset.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_uni.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_unimsgcpy.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_verify.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" -crypto/blowfish/bf_ecb.c optional ipsec -crypto/blowfish/bf_skey.c optional crypto | ipsec -crypto/camellia/camellia.c optional crypto | ipsec -crypto/camellia/camellia-api.c optional crypto | ipsec -crypto/des/des_ecb.c optional crypto | ipsec | netsmb -crypto/des/des_setkey.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_ecb.c optional ipsec | ipsec_support +crypto/blowfish/bf_skey.c optional crypto | ipsec | ipsec_support +crypto/camellia/camellia.c optional crypto | ipsec | ipsec_support +crypto/camellia/camellia-api.c optional crypto | ipsec | ipsec_support +crypto/des/des_ecb.c optional crypto | ipsec | ipsec_support | netsmb +crypto/des/des_setkey.c optional crypto | ipsec | ipsec_support | netsmb crypto/rc4/rc4.c optional netgraph_mppc_encryption | kgssapi crypto/rijndael/rijndael-alg-fst.c optional crypto | ekcd | geom_bde | \ - ipsec | random !random_loadable | wlan_ccmp + ipsec | ipsec_support | random !random_loadable | wlan_ccmp crypto/rijndael/rijndael-api-fst.c optional ekcd | geom_bde | random !random_loadable -crypto/rijndael/rijndael-api.c optional crypto | ipsec | wlan_ccmp +crypto/rijndael/rijndael-api.c optional crypto | ipsec | ipsec_support | \ + wlan_ccmp crypto/sha1.c optional carp | crypto | ipsec | \ - netgraph_mppc_encryption | sctp -crypto/sha2/sha256c.c optional crypto | ekcd | geom_bde | ipsec | random !random_loadable | \ - sctp | zfs -crypto/sha2/sha512c.c optional crypto | geom_bde | ipsec | zfs + ipsec_support | netgraph_mppc_encryption | sctp +crypto/sha2/sha256c.c optional crypto | ekcd | geom_bde | ipsec | \ + ipsec_support | random !random_loadable | sctp | zfs +crypto/sha2/sha512c.c optional crypto | geom_bde | ipsec | \ + ipsec_support | zfs crypto/skein/skein.c optional crypto | zfs crypto/skein/skein_block.c optional crypto | zfs crypto/siphash/siphash.c optional inet | inet6 crypto/siphash/siphash_test.c optional inet | inet6 ddb/db_access.c optional ddb ddb/db_break.c optional ddb ddb/db_capture.c optional ddb ddb/db_command.c optional ddb ddb/db_examine.c optional ddb ddb/db_expr.c optional ddb ddb/db_input.c optional ddb ddb/db_lex.c optional ddb ddb/db_main.c optional ddb ddb/db_output.c optional ddb ddb/db_print.c optional ddb ddb/db_ps.c optional ddb ddb/db_run.c optional ddb ddb/db_script.c optional ddb ddb/db_sym.c optional ddb ddb/db_thread.c optional ddb ddb/db_textdump.c optional ddb ddb/db_variables.c optional ddb ddb/db_watch.c optional ddb ddb/db_write_cmd.c optional ddb dev/aac/aac.c optional aac dev/aac/aac_cam.c optional aacp aac dev/aac/aac_debug.c optional aac dev/aac/aac_disk.c optional aac dev/aac/aac_linux.c optional aac compat_linux dev/aac/aac_pci.c optional aac pci dev/aacraid/aacraid.c optional aacraid dev/aacraid/aacraid_cam.c optional aacraid scbus dev/aacraid/aacraid_debug.c optional aacraid dev/aacraid/aacraid_linux.c optional aacraid compat_linux dev/aacraid/aacraid_pci.c optional aacraid pci dev/acpi_support/acpi_wmi.c optional acpi_wmi acpi dev/acpi_support/acpi_asus.c optional acpi_asus acpi dev/acpi_support/acpi_asus_wmi.c optional acpi_asus_wmi acpi dev/acpi_support/acpi_fujitsu.c optional acpi_fujitsu acpi dev/acpi_support/acpi_hp.c optional acpi_hp acpi dev/acpi_support/acpi_ibm.c optional acpi_ibm acpi dev/acpi_support/acpi_panasonic.c optional acpi_panasonic acpi dev/acpi_support/acpi_sony.c optional acpi_sony acpi dev/acpi_support/acpi_toshiba.c optional acpi_toshiba acpi dev/acpi_support/atk0110.c optional aibs acpi dev/acpica/Osd/OsdDebug.c optional acpi dev/acpica/Osd/OsdHardware.c optional acpi dev/acpica/Osd/OsdInterrupt.c optional acpi dev/acpica/Osd/OsdMemory.c optional acpi dev/acpica/Osd/OsdSchedule.c optional acpi dev/acpica/Osd/OsdStream.c optional acpi dev/acpica/Osd/OsdSynch.c optional acpi dev/acpica/Osd/OsdTable.c optional acpi dev/acpica/acpi.c optional acpi dev/acpica/acpi_acad.c optional acpi dev/acpica/acpi_battery.c optional acpi dev/acpica/acpi_button.c optional acpi dev/acpica/acpi_cmbat.c optional acpi dev/acpica/acpi_cpu.c optional acpi dev/acpica/acpi_ec.c optional acpi dev/acpica/acpi_isab.c optional acpi isa dev/acpica/acpi_lid.c optional acpi dev/acpica/acpi_package.c optional acpi dev/acpica/acpi_pci.c optional acpi pci dev/acpica/acpi_pci_link.c optional acpi pci dev/acpica/acpi_pcib.c optional acpi pci dev/acpica/acpi_pcib_acpi.c optional acpi pci dev/acpica/acpi_pcib_pci.c optional acpi pci dev/acpica/acpi_perf.c optional acpi dev/acpica/acpi_powerres.c optional acpi dev/acpica/acpi_quirk.c optional acpi dev/acpica/acpi_resource.c optional acpi dev/acpica/acpi_smbat.c optional acpi dev/acpica/acpi_thermal.c optional acpi dev/acpica/acpi_throttle.c optional acpi dev/acpica/acpi_video.c optional acpi_video acpi dev/acpica/acpi_dock.c optional acpi_dock acpi dev/adlink/adlink.c optional adlink dev/advansys/adv_eisa.c optional adv eisa dev/advansys/adv_pci.c optional adv pci dev/advansys/advansys.c optional adv dev/advansys/advlib.c optional adv dev/advansys/advmcode.c optional adv dev/advansys/adw_pci.c optional adw pci dev/advansys/adwcam.c optional adw dev/advansys/adwlib.c optional adw dev/advansys/adwmcode.c optional adw dev/ae/if_ae.c optional ae pci dev/age/if_age.c optional age pci dev/agp/agp.c optional agp pci dev/agp/agp_if.m optional agp pci dev/aha/aha.c optional aha dev/aha/aha_isa.c optional aha isa dev/aha/aha_mca.c optional aha mca dev/ahb/ahb.c optional ahb eisa dev/ahci/ahci.c optional ahci dev/ahci/ahciem.c optional ahci dev/ahci/ahci_pci.c optional ahci pci dev/aic/aic.c optional aic dev/aic/aic_pccard.c optional aic pccard dev/aic7xxx/ahc_eisa.c optional ahc eisa dev/aic7xxx/ahc_isa.c optional ahc isa dev/aic7xxx/ahc_pci.c optional ahc pci \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/aic7xxx/ahd_pci.c optional ahd pci \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/aic7xxx/aic7770.c optional ahc dev/aic7xxx/aic79xx.c optional ahd pci dev/aic7xxx/aic79xx_osm.c optional ahd pci dev/aic7xxx/aic79xx_pci.c optional ahd pci dev/aic7xxx/aic79xx_reg_print.c optional ahd pci ahd_reg_pretty_print dev/aic7xxx/aic7xxx.c optional ahc dev/aic7xxx/aic7xxx_93cx6.c optional ahc dev/aic7xxx/aic7xxx_osm.c optional ahc dev/aic7xxx/aic7xxx_pci.c optional ahc pci dev/aic7xxx/aic7xxx_reg_print.c optional ahc ahc_reg_pretty_print dev/al_eth/al_eth.c optional al_eth fdt \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" dev/al_eth/al_init_eth_lm.c optional al_eth fdt \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" dev/al_eth/al_init_eth_kr.c optional al_eth fdt \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" contrib/alpine-hal/al_hal_iofic.c optional al_iofic \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" contrib/alpine-hal/al_hal_serdes_25g.c optional al_serdes \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" contrib/alpine-hal/al_hal_serdes_hssp.c optional al_serdes \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" contrib/alpine-hal/al_hal_udma_config.c optional al_udma \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" contrib/alpine-hal/al_hal_udma_debug.c optional al_udma \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" contrib/alpine-hal/al_hal_udma_iofic.c optional al_udma \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" contrib/alpine-hal/al_hal_udma_main.c optional al_udma \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" contrib/alpine-hal/al_serdes.c optional al_serdes \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" contrib/alpine-hal/eth/al_hal_eth_kr.c optional al_eth \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" contrib/alpine-hal/eth/al_hal_eth_main.c optional al_eth \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" dev/alc/if_alc.c optional alc pci dev/ale/if_ale.c optional ale pci dev/alpm/alpm.c optional alpm pci dev/altera/avgen/altera_avgen.c optional altera_avgen dev/altera/avgen/altera_avgen_fdt.c optional altera_avgen fdt dev/altera/avgen/altera_avgen_nexus.c optional altera_avgen dev/altera/sdcard/altera_sdcard.c optional altera_sdcard dev/altera/sdcard/altera_sdcard_disk.c optional altera_sdcard dev/altera/sdcard/altera_sdcard_io.c optional altera_sdcard dev/altera/sdcard/altera_sdcard_fdt.c optional altera_sdcard fdt dev/altera/sdcard/altera_sdcard_nexus.c optional altera_sdcard dev/altera/pio/pio.c optional altera_pio dev/altera/pio/pio_if.m optional altera_pio dev/amdpm/amdpm.c optional amdpm pci | nfpm pci dev/amdsmb/amdsmb.c optional amdsmb pci dev/amr/amr.c optional amr dev/amr/amr_cam.c optional amrp amr dev/amr/amr_disk.c optional amr dev/amr/amr_linux.c optional amr compat_linux dev/amr/amr_pci.c optional amr pci dev/an/if_an.c optional an dev/an/if_an_isa.c optional an isa dev/an/if_an_pccard.c optional an pccard dev/an/if_an_pci.c optional an pci # dev/ata/ata_if.m optional ata | atacore dev/ata/ata-all.c optional ata | atacore dev/ata/ata-dma.c optional ata | atacore dev/ata/ata-lowlevel.c optional ata | atacore dev/ata/ata-sata.c optional ata | atacore dev/ata/ata-card.c optional ata pccard | atapccard dev/ata/ata-isa.c optional ata isa | ataisa dev/ata/ata-pci.c optional ata pci | atapci dev/ata/chipsets/ata-acard.c optional ata pci | ataacard dev/ata/chipsets/ata-acerlabs.c optional ata pci | ataacerlabs dev/ata/chipsets/ata-amd.c optional ata pci | ataamd dev/ata/chipsets/ata-ati.c optional ata pci | ataati dev/ata/chipsets/ata-cenatek.c optional ata pci | atacenatek dev/ata/chipsets/ata-cypress.c optional ata pci | atacypress dev/ata/chipsets/ata-cyrix.c optional ata pci | atacyrix dev/ata/chipsets/ata-highpoint.c optional ata pci | atahighpoint dev/ata/chipsets/ata-intel.c optional ata pci | ataintel dev/ata/chipsets/ata-ite.c optional ata pci | ataite dev/ata/chipsets/ata-jmicron.c optional ata pci | atajmicron dev/ata/chipsets/ata-marvell.c optional ata pci | atamarvell dev/ata/chipsets/ata-micron.c optional ata pci | atamicron dev/ata/chipsets/ata-national.c optional ata pci | atanational dev/ata/chipsets/ata-netcell.c optional ata pci | atanetcell dev/ata/chipsets/ata-nvidia.c optional ata pci | atanvidia dev/ata/chipsets/ata-promise.c optional ata pci | atapromise dev/ata/chipsets/ata-serverworks.c optional ata pci | ataserverworks dev/ata/chipsets/ata-siliconimage.c optional ata pci | atasiliconimage | ataati dev/ata/chipsets/ata-sis.c optional ata pci | atasis dev/ata/chipsets/ata-via.c optional ata pci | atavia # dev/ath/if_ath_pci.c optional ath_pci pci \ compile-with "${NORMAL_C} -I$S/dev/ath" # dev/ath/if_ath_ahb.c optional ath_ahb \ compile-with "${NORMAL_C} -I$S/dev/ath" # dev/ath/if_ath.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_alq.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_beacon.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_btcoex.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_btcoex_mci.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_debug.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_descdma.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_keycache.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_ioctl.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_led.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_lna_div.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_tx.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_tx_edma.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_tx_ht.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_tdma.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_sysctl.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_rx.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_rx_edma.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_spectral.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ah_osdep.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" # dev/ath/ath_hal/ah.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_eeprom_v1.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_eeprom_v3.c optional ath_hal | ath_ar5211 | ath_ar5212 \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_eeprom_v14.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_eeprom_v4k.c \ optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_eeprom_9287.c \ optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_regdomain.c optional ath \ compile-with "${NORMAL_C} ${NO_WSHIFT_COUNT_NEGATIVE} ${NO_WSHIFT_COUNT_OVERFLOW} -I$S/dev/ath" # ar5210 dev/ath/ath_hal/ar5210/ar5210_attach.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_beacon.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_interrupts.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_keycache.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_misc.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_phy.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_power.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_recv.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_reset.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_xmit.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar5211 dev/ath/ath_hal/ar5211/ar5211_attach.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_beacon.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_interrupts.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_keycache.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_misc.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_phy.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_power.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_recv.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_reset.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_xmit.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar5212 dev/ath/ath_hal/ar5212/ar5212_ani.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_attach.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_beacon.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_eeprom.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_gpio.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_interrupts.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_keycache.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_misc.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_phy.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_power.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_recv.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_reset.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_rfgain.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_xmit.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar5416 (depends on ar5212) dev/ath/ath_hal/ar5416/ar5416_ani.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_attach.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_beacon.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_btcoex.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal_iq.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal_adcgain.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal_adcdc.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_eeprom.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_gpio.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_interrupts.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_keycache.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_misc.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_phy.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_power.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_radar.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_recv.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_reset.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_spectral.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_xmit.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9130 (depends upon ar5416) - also requires AH_SUPPORT_AR9130 # # Since this is an embedded MAC SoC, there's no need to compile it into the # default HAL. dev/ath/ath_hal/ar9001/ar9130_attach.c optional ath_ar9130 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9001/ar9130_phy.c optional ath_ar9130 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9001/ar9130_eeprom.c optional ath_ar9130 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9160 (depends on ar5416) dev/ath/ath_hal/ar9001/ar9160_attach.c optional ath_hal | ath_ar9160 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9280 (depends on ar5416) dev/ath/ath_hal/ar9002/ar9280_attach.c optional ath_hal | ath_ar9280 | \ ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9280_olc.c optional ath_hal | ath_ar9280 | \ ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9285 (depends on ar5416 and ar9280) dev/ath/ath_hal/ar9002/ar9285_attach.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285_btcoex.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285_reset.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285_cal.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285_phy.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285_diversity.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9287 (depends on ar5416) dev/ath/ath_hal/ar9002/ar9287_attach.c optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9287_reset.c optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9287_cal.c optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9287_olc.c optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9300 contrib/dev/ath/ath_hal/ar9300/ar9300_ani.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_attach.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_eeprom.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WCONSTANT_CONVERSION}" contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_interrupts.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_keycache.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_mci.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_paprd.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_phy.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_power.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_radar.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_radio.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_recv.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_recv_ds.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_reset.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WSOMETIMES_UNINITIALIZED} -Wno-unused-function" contrib/dev/ath/ath_hal/ar9300/ar9300_stub.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_stub_funcs.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_spectral.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_timer.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_xmit.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_xmit_ds.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" # rf backends dev/ath/ath_hal/ar5212/ar2316.c optional ath_rf2316 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar2317.c optional ath_rf2317 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar2413.c optional ath_hal | ath_rf2413 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar2425.c optional ath_hal | ath_rf2425 | ath_rf2417 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5111.c optional ath_hal | ath_rf5111 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5112.c optional ath_hal | ath_rf5112 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5413.c optional ath_hal | ath_rf5413 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar2133.c optional ath_hal | ath_ar5416 | \ ath_ar9130 | ath_ar9160 | ath_ar9280 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9280.c optional ath_hal | ath_ar9280 | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9287.c optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ath rate control algorithms dev/ath/ath_rate/amrr/amrr.c optional ath_rate_amrr \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_rate/onoe/onoe.c optional ath_rate_onoe \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_rate/sample/sample.c optional ath_rate_sample \ compile-with "${NORMAL_C} -I$S/dev/ath" # ath DFS modules dev/ath/ath_dfs/null/dfs_null.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" # dev/bce/if_bce.c optional bce dev/bfe/if_bfe.c optional bfe dev/bge/if_bge.c optional bge dev/bhnd/bhnd.c optional bhnd dev/bhnd/bhnd_erom.c optional bhnd dev/bhnd/bhnd_erom_if.m optional bhnd dev/bhnd/bhnd_nexus.c optional bhnd siba_nexus | \ bhnd bcma_nexus dev/bhnd/bhnd_subr.c optional bhnd dev/bhnd/bhnd_bus_if.m optional bhnd dev/bhnd/bhndb/bhnd_bhndb.c optional bhndb bhnd dev/bhnd/bhndb/bhndb.c optional bhndb bhnd dev/bhnd/bhndb/bhndb_bus_if.m optional bhndb bhnd dev/bhnd/bhndb/bhndb_hwdata.c optional bhndb bhnd dev/bhnd/bhndb/bhndb_if.m optional bhndb bhnd dev/bhnd/bhndb/bhndb_pci.c optional bhndb bhnd pci dev/bhnd/bhndb/bhndb_pci_hwdata.c optional bhndb bhnd pci dev/bhnd/bhndb/bhndb_pci_sprom.c optional bhndb bhnd pci dev/bhnd/bhndb/bhndb_subr.c optional bhndb bhnd dev/bhnd/bcma/bcma.c optional bcma bhnd dev/bhnd/bcma/bcma_bhndb.c optional bcma bhnd bhndb dev/bhnd/bcma/bcma_erom.c optional bcma bhnd dev/bhnd/bcma/bcma_nexus.c optional bcma_nexus bcma bhnd dev/bhnd/bcma/bcma_subr.c optional bcma bhnd dev/bhnd/cores/chipc/bhnd_chipc_if.m optional bhnd dev/bhnd/cores/chipc/bhnd_sprom_chipc.c optional bhnd dev/bhnd/cores/chipc/bhnd_pmu_chipc.c optional bhnd dev/bhnd/cores/chipc/chipc.c optional bhnd dev/bhnd/cores/chipc/chipc_cfi.c optional bhnd cfi dev/bhnd/cores/chipc/chipc_slicer.c optional bhnd cfi | bhnd spibus dev/bhnd/cores/chipc/chipc_spi.c optional bhnd spibus dev/bhnd/cores/chipc/chipc_subr.c optional bhnd dev/bhnd/cores/chipc/pwrctl/bhnd_pwrctl.c optional bhnd dev/bhnd/cores/chipc/pwrctl/bhnd_pwrctl_subr.c optional bhnd dev/bhnd/cores/pci/bhnd_pci.c optional bhnd pci dev/bhnd/cores/pci/bhnd_pci_hostb.c optional bhndb bhnd pci dev/bhnd/cores/pci/bhnd_pcib.c optional bhnd_pcib bhnd pci dev/bhnd/cores/pcie2/bhnd_pcie2.c optional bhnd pci dev/bhnd/cores/pcie2/bhnd_pcie2_hostb.c optional bhndb bhnd pci dev/bhnd/cores/pcie2/bhnd_pcie2b.c optional bhnd_pcie2b bhnd pci dev/bhnd/cores/pmu/bhnd_pmu.c optional bhnd dev/bhnd/cores/pmu/bhnd_pmu_core.c optional bhnd dev/bhnd/cores/pmu/bhnd_pmu_if.m optional bhnd dev/bhnd/cores/pmu/bhnd_pmu_subr.c optional bhnd dev/bhnd/nvram/bhnd_nvram_data.c optional bhnd dev/bhnd/nvram/bhnd_nvram_data_bcm.c optional bhnd dev/bhnd/nvram/bhnd_nvram_data_bcmraw.c optional bhnd dev/bhnd/nvram/bhnd_nvram_data_btxt.c optional bhnd dev/bhnd/nvram/bhnd_nvram_data_sprom.c optional bhnd dev/bhnd/nvram/bhnd_nvram_data_sprom_subr.c optional bhnd dev/bhnd/nvram/bhnd_nvram_data_tlv.c optional bhnd dev/bhnd/nvram/bhnd_nvram_if.m optional bhnd dev/bhnd/nvram/bhnd_nvram_io.c optional bhnd dev/bhnd/nvram/bhnd_nvram_iobuf.c optional bhnd dev/bhnd/nvram/bhnd_nvram_ioptr.c optional bhnd dev/bhnd/nvram/bhnd_nvram_iores.c optional bhnd dev/bhnd/nvram/bhnd_nvram_plist.c optional bhnd dev/bhnd/nvram/bhnd_nvram_store.c optional bhnd dev/bhnd/nvram/bhnd_nvram_store_subr.c optional bhnd dev/bhnd/nvram/bhnd_nvram_subr.c optional bhnd dev/bhnd/nvram/bhnd_nvram_value.c optional bhnd dev/bhnd/nvram/bhnd_nvram_value_fmts.c optional bhnd dev/bhnd/nvram/bhnd_nvram_value_prf.c optional bhnd dev/bhnd/nvram/bhnd_nvram_value_subr.c optional bhnd dev/bhnd/nvram/bhnd_sprom.c optional bhnd dev/bhnd/siba/siba.c optional siba bhnd dev/bhnd/siba/siba_bhndb.c optional siba bhnd bhndb dev/bhnd/siba/siba_erom.c optional siba bhnd dev/bhnd/siba/siba_nexus.c optional siba_nexus siba bhnd dev/bhnd/siba/siba_subr.c optional siba bhnd # dev/bktr/bktr_audio.c optional bktr pci dev/bktr/bktr_card.c optional bktr pci dev/bktr/bktr_core.c optional bktr pci dev/bktr/bktr_i2c.c optional bktr pci smbus dev/bktr/bktr_os.c optional bktr pci dev/bktr/bktr_tuner.c optional bktr pci dev/bktr/msp34xx.c optional bktr pci dev/bnxt/bnxt_hwrm.c optional bnxt iflib pci dev/bnxt/bnxt_sysctl.c optional bnxt iflib pci dev/bnxt/bnxt_txrx.c optional bnxt iflib pci dev/bnxt/if_bnxt.c optional bnxt iflib pci dev/buslogic/bt.c optional bt dev/buslogic/bt_eisa.c optional bt eisa dev/buslogic/bt_isa.c optional bt isa dev/buslogic/bt_mca.c optional bt mca dev/buslogic/bt_pci.c optional bt pci dev/bwi/bwimac.c optional bwi dev/bwi/bwiphy.c optional bwi dev/bwi/bwirf.c optional bwi dev/bwi/if_bwi.c optional bwi dev/bwi/if_bwi_pci.c optional bwi pci # XXX Work around clang warnings, until maintainer approves fix. dev/bwn/if_bwn.c optional bwn siba_bwn \ compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}" dev/bwn/if_bwn_pci.c optional bwn pci bhnd dev/bwn/if_bwn_phy_common.c optional bwn siba_bwn dev/bwn/if_bwn_phy_g.c optional bwn siba_bwn \ compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED} ${NO_WCONSTANT_CONVERSION}" dev/bwn/if_bwn_phy_lp.c optional bwn siba_bwn \ compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}" dev/bwn/if_bwn_phy_n.c optional bwn siba_bwn dev/bwn/if_bwn_util.c optional bwn siba_bwn dev/bwn/bwn_mac.c optional bwn bhnd dev/cardbus/cardbus.c optional cardbus dev/cardbus/cardbus_cis.c optional cardbus dev/cardbus/cardbus_device.c optional cardbus dev/cas/if_cas.c optional cas dev/cfi/cfi_bus_fdt.c optional cfi fdt dev/cfi/cfi_bus_nexus.c optional cfi dev/cfi/cfi_core.c optional cfi dev/cfi/cfi_dev.c optional cfi dev/cfi/cfi_disk.c optional cfid dev/chromebook_platform/chromebook_platform.c optional chromebook_platform dev/ciss/ciss.c optional ciss dev/cm/smc90cx6.c optional cm dev/cmx/cmx.c optional cmx dev/cmx/cmx_pccard.c optional cmx pccard dev/cpufreq/ichss.c optional cpufreq pci dev/cs/if_cs.c optional cs dev/cs/if_cs_isa.c optional cs isa dev/cs/if_cs_pccard.c optional cs pccard dev/cxgb/cxgb_main.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/cxgb_sge.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_mc5.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_vsc7323.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_vsc8211.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_ael1002.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_aq100x.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_mv88e1xxx.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_xgmac.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_t3_hw.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_tn1010.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/sys/uipc_mvec.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/cxgb_t3fw.c optional cxgb cxgb_t3fw \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgbe/t4_if.m optional cxgbe pci dev/cxgbe/t4_iov.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_mp_ring.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_main.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_netmap.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_sge.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_l2t.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_tracer.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_vf.c optional cxgbev pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/common/t4_hw.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/common/t4vf_hw.c optional cxgbev pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" t4fw_cfg.c optional cxgbe \ compile-with "${AWK} -f $S/tools/fw_stub.awk t4fw_cfg.fw:t4fw_cfg t4fw_cfg_uwire.fw:t4fw_cfg_uwire t4fw.fw:t4fw -mt4fw_cfg -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "t4fw_cfg.c" t4fw_cfg.fwo optional cxgbe \ dependency "t4fw_cfg.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t4fw_cfg.fwo" t4fw_cfg.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t4fw_cfg.txt" \ compile-with "${CP} ${.ALLSRC} ${.TARGET}" \ no-obj no-implicit-rule \ clean "t4fw_cfg.fw" t4fw_cfg_uwire.fwo optional cxgbe \ dependency "t4fw_cfg_uwire.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t4fw_cfg_uwire.fwo" t4fw_cfg_uwire.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t4fw_cfg_uwire.txt" \ compile-with "${CP} ${.ALLSRC} ${.TARGET}" \ no-obj no-implicit-rule \ clean "t4fw_cfg_uwire.fw" t4fw.fwo optional cxgbe \ dependency "t4fw.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t4fw.fwo" t4fw.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t4fw-1.16.26.0.bin.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "t4fw.fw" t5fw_cfg.c optional cxgbe \ compile-with "${AWK} -f $S/tools/fw_stub.awk t5fw_cfg.fw:t5fw_cfg t5fw.fw:t5fw -mt5fw_cfg -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "t5fw_cfg.c" t5fw_cfg.fwo optional cxgbe \ dependency "t5fw_cfg.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t5fw_cfg.fwo" t5fw_cfg.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t5fw_cfg.txt" \ compile-with "${CP} ${.ALLSRC} ${.TARGET}" \ no-obj no-implicit-rule \ clean "t5fw_cfg.fw" t5fw.fwo optional cxgbe \ dependency "t5fw.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t5fw.fwo" t5fw.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t5fw-1.16.26.0.bin.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "t5fw.fw" t6fw_cfg.c optional cxgbe \ compile-with "${AWK} -f $S/tools/fw_stub.awk t6fw_cfg.fw:t6fw_cfg t6fw.fw:t6fw -mt6fw_cfg -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "t6fw_cfg.c" t6fw_cfg.fwo optional cxgbe \ dependency "t6fw_cfg.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t6fw_cfg.fwo" t6fw_cfg.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t6fw_cfg.txt" \ compile-with "${CP} ${.ALLSRC} ${.TARGET}" \ no-obj no-implicit-rule \ clean "t6fw_cfg.fw" t6fw.fwo optional cxgbe \ dependency "t6fw.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t6fw.fwo" t6fw.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t6fw-1.16.26.0.bin.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "t6fw.fw" dev/cy/cy.c optional cy dev/cy/cy_isa.c optional cy isa dev/cy/cy_pci.c optional cy pci dev/cyapa/cyapa.c optional cyapa iicbus dev/dc/if_dc.c optional dc pci dev/dc/dcphy.c optional dc pci dev/dc/pnphy.c optional dc pci dev/dcons/dcons.c optional dcons dev/dcons/dcons_crom.c optional dcons_crom dev/dcons/dcons_os.c optional dcons dev/de/if_de.c optional de pci dev/dme/if_dme.c optional dme dev/dpt/dpt_eisa.c optional dpt eisa dev/dpt/dpt_pci.c optional dpt pci dev/dpt/dpt_scsi.c optional dpt dev/drm/ati_pcigart.c optional drm dev/drm/drm_agpsupport.c optional drm dev/drm/drm_auth.c optional drm dev/drm/drm_bufs.c optional drm dev/drm/drm_context.c optional drm dev/drm/drm_dma.c optional drm dev/drm/drm_drawable.c optional drm dev/drm/drm_drv.c optional drm dev/drm/drm_fops.c optional drm dev/drm/drm_hashtab.c optional drm dev/drm/drm_ioctl.c optional drm dev/drm/drm_irq.c optional drm dev/drm/drm_lock.c optional drm dev/drm/drm_memory.c optional drm dev/drm/drm_mm.c optional drm dev/drm/drm_pci.c optional drm dev/drm/drm_scatter.c optional drm dev/drm/drm_sman.c optional drm dev/drm/drm_sysctl.c optional drm dev/drm/drm_vm.c optional drm dev/drm/i915_dma.c optional i915drm dev/drm/i915_drv.c optional i915drm dev/drm/i915_irq.c optional i915drm dev/drm/i915_mem.c optional i915drm dev/drm/i915_suspend.c optional i915drm dev/drm/mach64_dma.c optional mach64drm dev/drm/mach64_drv.c optional mach64drm dev/drm/mach64_irq.c optional mach64drm dev/drm/mach64_state.c optional mach64drm dev/drm/mga_dma.c optional mgadrm dev/drm/mga_drv.c optional mgadrm dev/drm/mga_irq.c optional mgadrm dev/drm/mga_state.c optional mgadrm dev/drm/mga_warp.c optional mgadrm dev/drm/r128_cce.c optional r128drm \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/drm/r128_drv.c optional r128drm dev/drm/r128_irq.c optional r128drm dev/drm/r128_state.c optional r128drm dev/drm/r300_cmdbuf.c optional radeondrm dev/drm/r600_blit.c optional radeondrm dev/drm/r600_cp.c optional radeondrm \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/drm/radeon_cp.c optional radeondrm \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/drm/radeon_cs.c optional radeondrm dev/drm/radeon_drv.c optional radeondrm dev/drm/radeon_irq.c optional radeondrm dev/drm/radeon_mem.c optional radeondrm dev/drm/radeon_state.c optional radeondrm dev/drm/savage_bci.c optional savagedrm dev/drm/savage_drv.c optional savagedrm dev/drm/savage_state.c optional savagedrm dev/drm/sis_drv.c optional sisdrm dev/drm/sis_ds.c optional sisdrm dev/drm/sis_mm.c optional sisdrm dev/drm/tdfx_drv.c optional tdfxdrm dev/drm/via_dma.c optional viadrm dev/drm/via_dmablit.c optional viadrm dev/drm/via_drv.c optional viadrm dev/drm/via_irq.c optional viadrm dev/drm/via_map.c optional viadrm dev/drm/via_mm.c optional viadrm dev/drm/via_verifier.c optional viadrm dev/drm/via_video.c optional viadrm dev/drm2/drm_agpsupport.c optional drm2 dev/drm2/drm_auth.c optional drm2 dev/drm2/drm_bufs.c optional drm2 dev/drm2/drm_buffer.c optional drm2 dev/drm2/drm_context.c optional drm2 dev/drm2/drm_crtc.c optional drm2 dev/drm2/drm_crtc_helper.c optional drm2 dev/drm2/drm_dma.c optional drm2 dev/drm2/drm_dp_helper.c optional drm2 dev/drm2/drm_dp_iic_helper.c optional drm2 dev/drm2/drm_drv.c optional drm2 dev/drm2/drm_edid.c optional drm2 dev/drm2/drm_fb_helper.c optional drm2 dev/drm2/drm_fops.c optional drm2 dev/drm2/drm_gem.c optional drm2 dev/drm2/drm_gem_names.c optional drm2 dev/drm2/drm_global.c optional drm2 dev/drm2/drm_hashtab.c optional drm2 dev/drm2/drm_ioctl.c optional drm2 dev/drm2/drm_irq.c optional drm2 dev/drm2/drm_linux_list_sort.c optional drm2 dev/drm2/drm_lock.c optional drm2 dev/drm2/drm_memory.c optional drm2 dev/drm2/drm_mm.c optional drm2 dev/drm2/drm_modes.c optional drm2 dev/drm2/drm_pci.c optional drm2 dev/drm2/drm_platform.c optional drm2 dev/drm2/drm_scatter.c optional drm2 dev/drm2/drm_stub.c optional drm2 dev/drm2/drm_sysctl.c optional drm2 dev/drm2/drm_vm.c optional drm2 dev/drm2/drm_os_freebsd.c optional drm2 dev/drm2/ttm/ttm_agp_backend.c optional drm2 dev/drm2/ttm/ttm_lock.c optional drm2 dev/drm2/ttm/ttm_object.c optional drm2 dev/drm2/ttm/ttm_tt.c optional drm2 dev/drm2/ttm/ttm_bo_util.c optional drm2 dev/drm2/ttm/ttm_bo.c optional drm2 dev/drm2/ttm/ttm_bo_manager.c optional drm2 dev/drm2/ttm/ttm_execbuf_util.c optional drm2 dev/drm2/ttm/ttm_memory.c optional drm2 dev/drm2/ttm/ttm_page_alloc.c optional drm2 dev/drm2/ttm/ttm_bo_vm.c optional drm2 dev/drm2/ati_pcigart.c optional drm2 agp pci dev/ed/if_ed.c optional ed dev/ed/if_ed_novell.c optional ed dev/ed/if_ed_rtl80x9.c optional ed dev/ed/if_ed_pccard.c optional ed pccard dev/ed/if_ed_pci.c optional ed pci dev/efidev/efidev.c optional efirt dev/eisa/eisa_if.m standard dev/eisa/eisaconf.c optional eisa dev/e1000/if_em.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/em_txrx.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/igb_txrx.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_80003es2lan.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82540.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82541.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82542.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82543.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82571.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82575.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_ich8lan.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_i210.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_api.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_mac.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_manage.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_nvm.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_phy.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_vf.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_mbx.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_osdep.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/et/if_et.c optional et dev/en/if_en_pci.c optional en pci dev/en/midway.c optional en dev/ep/if_ep.c optional ep dev/ep/if_ep_eisa.c optional ep eisa dev/ep/if_ep_isa.c optional ep isa dev/ep/if_ep_mca.c optional ep mca dev/ep/if_ep_pccard.c optional ep pccard dev/esp/esp_pci.c optional esp pci dev/esp/ncr53c9x.c optional esp dev/etherswitch/arswitch/arswitch.c optional arswitch dev/etherswitch/arswitch/arswitch_reg.c optional arswitch dev/etherswitch/arswitch/arswitch_phy.c optional arswitch dev/etherswitch/arswitch/arswitch_8216.c optional arswitch dev/etherswitch/arswitch/arswitch_8226.c optional arswitch dev/etherswitch/arswitch/arswitch_8316.c optional arswitch dev/etherswitch/arswitch/arswitch_8327.c optional arswitch dev/etherswitch/arswitch/arswitch_7240.c optional arswitch dev/etherswitch/arswitch/arswitch_9340.c optional arswitch dev/etherswitch/arswitch/arswitch_vlans.c optional arswitch dev/etherswitch/etherswitch.c optional etherswitch dev/etherswitch/etherswitch_if.m optional etherswitch dev/etherswitch/ip17x/ip17x.c optional ip17x dev/etherswitch/ip17x/ip175c.c optional ip17x dev/etherswitch/ip17x/ip175d.c optional ip17x dev/etherswitch/ip17x/ip17x_phy.c optional ip17x dev/etherswitch/ip17x/ip17x_vlans.c optional ip17x dev/etherswitch/miiproxy.c optional miiproxy dev/etherswitch/rtl8366/rtl8366rb.c optional rtl8366rb dev/etherswitch/e6000sw/e6000sw.c optional e6000sw dev/etherswitch/ukswitch/ukswitch.c optional ukswitch dev/evdev/cdev.c optional evdev dev/evdev/evdev.c optional evdev dev/evdev/evdev_mt.c optional evdev dev/evdev/evdev_utils.c optional evdev dev/evdev/uinput.c optional evdev uinput dev/ex/if_ex.c optional ex dev/ex/if_ex_isa.c optional ex isa dev/ex/if_ex_pccard.c optional ex pccard dev/exca/exca.c optional cbb dev/extres/clk/clk.c optional ext_resources clk fdt dev/extres/clk/clkdev_if.m optional ext_resources clk fdt dev/extres/clk/clknode_if.m optional ext_resources clk fdt dev/extres/clk/clk_bus.c optional ext_resources clk fdt dev/extres/clk/clk_div.c optional ext_resources clk fdt dev/extres/clk/clk_fixed.c optional ext_resources clk fdt dev/extres/clk/clk_gate.c optional ext_resources clk fdt dev/extres/clk/clk_mux.c optional ext_resources clk fdt dev/extres/phy/phy.c optional ext_resources phy fdt dev/extres/phy/phy_if.m optional ext_resources phy fdt dev/extres/hwreset/hwreset.c optional ext_resources hwreset fdt dev/extres/hwreset/hwreset_if.m optional ext_resources hwreset fdt dev/extres/regulator/regdev_if.m optional ext_resources regulator fdt dev/extres/regulator/regnode_if.m optional ext_resources regulator fdt dev/extres/regulator/regulator.c optional ext_resources regulator fdt dev/extres/regulator/regulator_bus.c optional ext_resources regulator fdt dev/extres/regulator/regulator_fixed.c optional ext_resources regulator fdt dev/fatm/if_fatm.c optional fatm pci dev/fb/fbd.c optional fbd | vt dev/fb/fb_if.m standard dev/fb/splash.c optional sc splash dev/fdt/fdt_clock.c optional fdt fdt_clock dev/fdt/fdt_clock_if.m optional fdt fdt_clock dev/fdt/fdt_common.c optional fdt dev/fdt/fdt_pinctrl.c optional fdt fdt_pinctrl dev/fdt/fdt_pinctrl_if.m optional fdt fdt_pinctrl dev/fdt/fdt_slicer.c optional fdt cfi | fdt nand | fdt mx25l dev/fdt/fdt_static_dtb.S optional fdt fdt_dtb_static \ dependency "fdt_dtb_file" dev/fdt/simplebus.c optional fdt dev/fe/if_fe.c optional fe dev/fe/if_fe_pccard.c optional fe pccard dev/filemon/filemon.c optional filemon dev/firewire/firewire.c optional firewire dev/firewire/fwcrom.c optional firewire dev/firewire/fwdev.c optional firewire dev/firewire/fwdma.c optional firewire dev/firewire/fwmem.c optional firewire dev/firewire/fwohci.c optional firewire dev/firewire/fwohci_pci.c optional firewire pci dev/firewire/if_fwe.c optional fwe dev/firewire/if_fwip.c optional fwip dev/firewire/sbp.c optional sbp dev/firewire/sbp_targ.c optional sbp_targ dev/flash/at45d.c optional at45d dev/flash/mx25l.c optional mx25l dev/fxp/if_fxp.c optional fxp dev/fxp/inphy.c optional fxp dev/gem/if_gem.c optional gem dev/gem/if_gem_pci.c optional gem pci dev/gem/if_gem_sbus.c optional gem sbus dev/gpio/gpiobacklight.c optional gpiobacklight fdt dev/gpio/gpiokeys.c optional gpiokeys fdt dev/gpio/gpiokeys_codes.c optional gpiokeys fdt dev/gpio/gpiobus.c optional gpio \ dependency "gpiobus_if.h" dev/gpio/gpioc.c optional gpio \ dependency "gpio_if.h" dev/gpio/gpioiic.c optional gpioiic dev/gpio/gpioled.c optional gpioled !fdt dev/gpio/gpioled_fdt.c optional gpioled fdt dev/gpio/gpiopower.c optional gpiopower fdt dev/gpio/gpioregulator.c optional gpioregulator fdt ext_resources dev/gpio/gpiospi.c optional gpiospi dev/gpio/gpioths.c optional gpioths dev/gpio/gpio_if.m optional gpio dev/gpio/gpiobus_if.m optional gpio dev/gpio/gpiopps.c optional gpiopps dev/gpio/ofw_gpiobus.c optional fdt gpio dev/hatm/if_hatm.c optional hatm pci dev/hatm/if_hatm_intr.c optional hatm pci dev/hatm/if_hatm_ioctl.c optional hatm pci dev/hatm/if_hatm_rx.c optional hatm pci dev/hatm/if_hatm_tx.c optional hatm pci dev/hifn/hifn7751.c optional hifn dev/hme/if_hme.c optional hme dev/hme/if_hme_pci.c optional hme pci dev/hme/if_hme_sbus.c optional hme sbus dev/hptiop/hptiop.c optional hptiop scbus dev/hwpmc/hwpmc_logging.c optional hwpmc dev/hwpmc/hwpmc_mod.c optional hwpmc dev/hwpmc/hwpmc_soft.c optional hwpmc dev/ichiic/ig4_acpi.c optional ig4 acpi iicbus dev/ichiic/ig4_iic.c optional ig4 iicbus dev/ichiic/ig4_pci.c optional ig4 pci iicbus dev/ichsmb/ichsmb.c optional ichsmb dev/ichsmb/ichsmb_pci.c optional ichsmb pci dev/ida/ida.c optional ida dev/ida/ida_disk.c optional ida dev/ida/ida_eisa.c optional ida eisa dev/ida/ida_pci.c optional ida pci dev/iicbus/ad7418.c optional ad7418 dev/iicbus/ds1307.c optional ds1307 dev/iicbus/ds133x.c optional ds133x dev/iicbus/ds1374.c optional ds1374 dev/iicbus/ds1672.c optional ds1672 dev/iicbus/ds3231.c optional ds3231 dev/iicbus/icee.c optional icee dev/iicbus/if_ic.c optional ic dev/iicbus/iic.c optional iic dev/iicbus/iicbb.c optional iicbb dev/iicbus/iicbb_if.m optional iicbb dev/iicbus/iicbus.c optional iicbus dev/iicbus/iicbus_if.m optional iicbus dev/iicbus/iiconf.c optional iicbus dev/iicbus/iicsmb.c optional iicsmb \ dependency "iicbus_if.h" dev/iicbus/iicoc.c optional iicoc dev/iicbus/lm75.c optional lm75 dev/iicbus/ofw_iicbus.c optional fdt iicbus dev/iicbus/pcf8563.c optional pcf8563 dev/iicbus/s35390a.c optional s35390a dev/iir/iir.c optional iir dev/iir/iir_ctrl.c optional iir dev/iir/iir_pci.c optional iir pci dev/intpm/intpm.c optional intpm pci # XXX Work around clang warning, until maintainer approves fix. dev/ips/ips.c optional ips \ compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}" dev/ips/ips_commands.c optional ips dev/ips/ips_disk.c optional ips dev/ips/ips_ioctl.c optional ips dev/ips/ips_pci.c optional ips pci dev/ipw/if_ipw.c optional ipw ipwbssfw.c optional ipwbssfw | ipwfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk ipw_bss.fw:ipw_bss:130 -lintel_ipw -mipw_bss -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "ipwbssfw.c" ipw_bss.fwo optional ipwbssfw | ipwfw \ dependency "ipw_bss.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "ipw_bss.fwo" ipw_bss.fw optional ipwbssfw | ipwfw \ dependency "$S/contrib/dev/ipw/ipw2100-1.3.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "ipw_bss.fw" ipwibssfw.c optional ipwibssfw | ipwfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk ipw_ibss.fw:ipw_ibss:130 -lintel_ipw -mipw_ibss -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "ipwibssfw.c" ipw_ibss.fwo optional ipwibssfw | ipwfw \ dependency "ipw_ibss.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "ipw_ibss.fwo" ipw_ibss.fw optional ipwibssfw | ipwfw \ dependency "$S/contrib/dev/ipw/ipw2100-1.3-i.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "ipw_ibss.fw" ipwmonitorfw.c optional ipwmonitorfw | ipwfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk ipw_monitor.fw:ipw_monitor:130 -lintel_ipw -mipw_monitor -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "ipwmonitorfw.c" ipw_monitor.fwo optional ipwmonitorfw | ipwfw \ dependency "ipw_monitor.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "ipw_monitor.fwo" ipw_monitor.fw optional ipwmonitorfw | ipwfw \ dependency "$S/contrib/dev/ipw/ipw2100-1.3-p.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "ipw_monitor.fw" dev/iscsi/icl.c optional iscsi | ctl dev/iscsi/icl_conn_if.m optional iscsi | ctl dev/iscsi/icl_soft.c optional iscsi | ctl dev/iscsi/icl_soft_proxy.c optional iscsi | ctl dev/iscsi/iscsi.c optional iscsi scbus dev/iscsi_initiator/iscsi.c optional iscsi_initiator scbus dev/iscsi_initiator/iscsi_subr.c optional iscsi_initiator scbus dev/iscsi_initiator/isc_cam.c optional iscsi_initiator scbus dev/iscsi_initiator/isc_soc.c optional iscsi_initiator scbus dev/iscsi_initiator/isc_sm.c optional iscsi_initiator scbus dev/iscsi_initiator/isc_subr.c optional iscsi_initiator scbus dev/ismt/ismt.c optional ismt dev/isl/isl.c optional isl iicbus dev/isp/isp.c optional isp dev/isp/isp_freebsd.c optional isp dev/isp/isp_library.c optional isp dev/isp/isp_pci.c optional isp pci dev/isp/isp_sbus.c optional isp sbus dev/isp/isp_target.c optional isp dev/ispfw/ispfw.c optional ispfw dev/iwi/if_iwi.c optional iwi iwibssfw.c optional iwibssfw | iwifw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwi_bss.fw:iwi_bss:300 -lintel_iwi -miwi_bss -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwibssfw.c" iwi_bss.fwo optional iwibssfw | iwifw \ dependency "iwi_bss.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwi_bss.fwo" iwi_bss.fw optional iwibssfw | iwifw \ dependency "$S/contrib/dev/iwi/ipw2200-bss.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwi_bss.fw" iwiibssfw.c optional iwiibssfw | iwifw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwi_ibss.fw:iwi_ibss:300 -lintel_iwi -miwi_ibss -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwiibssfw.c" iwi_ibss.fwo optional iwiibssfw | iwifw \ dependency "iwi_ibss.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwi_ibss.fwo" iwi_ibss.fw optional iwiibssfw | iwifw \ dependency "$S/contrib/dev/iwi/ipw2200-ibss.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwi_ibss.fw" iwimonitorfw.c optional iwimonitorfw | iwifw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwi_monitor.fw:iwi_monitor:300 -lintel_iwi -miwi_monitor -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwimonitorfw.c" iwi_monitor.fwo optional iwimonitorfw | iwifw \ dependency "iwi_monitor.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwi_monitor.fwo" iwi_monitor.fw optional iwimonitorfw | iwifw \ dependency "$S/contrib/dev/iwi/ipw2200-sniffer.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwi_monitor.fw" dev/iwm/if_iwm.c optional iwm dev/iwm/if_iwm_binding.c optional iwm dev/iwm/if_iwm_led.c optional iwm dev/iwm/if_iwm_mac_ctxt.c optional iwm dev/netif/iwm/if_iwm_notif_wait.c optional iwm dev/iwm/if_iwm_pcie_trans.c optional iwm dev/iwm/if_iwm_phy_ctxt.c optional iwm dev/iwm/if_iwm_phy_db.c optional iwm dev/iwm/if_iwm_power.c optional iwm dev/iwm/if_iwm_scan.c optional iwm dev/iwm/if_iwm_time_event.c optional iwm dev/iwm/if_iwm_util.c optional iwm iwm3160fw.c optional iwm3160fw | iwmfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwm3160.fw:iwm3160fw -miwm3160fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwm3160fw.c" iwm3160fw.fwo optional iwm3160fw | iwmfw \ dependency "iwm3160.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwm3160fw.fwo" iwm3160.fw optional iwm3160fw | iwmfw \ dependency "$S/contrib/dev/iwm/iwm-3160-16.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwm3160.fw" iwm7260fw.c optional iwm7260fw | iwmfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwm7260.fw:iwm7260fw -miwm7260fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwm7260fw.c" iwm7260fw.fwo optional iwm7260fw | iwmfw \ dependency "iwm7260.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwm7260fw.fwo" iwm7260.fw optional iwm7260fw | iwmfw \ dependency "$S/contrib/dev/iwm/iwm-7260-16.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwm7260.fw" iwm7265fw.c optional iwm7265fw | iwmfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwm7265.fw:iwm7265fw -miwm7265fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwm7265fw.c" iwm7265fw.fwo optional iwm7265fw | iwmfw \ dependency "iwm7265.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwm7265fw.fwo" iwm7265.fw optional iwm7265fw | iwmfw \ dependency "$S/contrib/dev/iwm/iwm-7265-16.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwm7265.fw" iwm8000Cfw.c optional iwm8000Cfw | iwmfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwm8000C.fw:iwm8000Cfw -miwm8000Cfw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwm8000Cfw.c" iwm8000Cfw.fwo optional iwm8000Cfw | iwmfw \ dependency "iwm8000C.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwm8000Cfw.fwo" iwm8000C.fw optional iwm8000Cfw | iwmfw \ dependency "$S/contrib/dev/iwm/iwm-8000C-16.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwm8000C.fw" dev/iwn/if_iwn.c optional iwn iwn1000fw.c optional iwn1000fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn1000.fw:iwn1000fw -miwn1000fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn1000fw.c" iwn1000fw.fwo optional iwn1000fw | iwnfw \ dependency "iwn1000.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn1000fw.fwo" iwn1000.fw optional iwn1000fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-1000-39.31.5.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn1000.fw" iwn100fw.c optional iwn100fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn100.fw:iwn100fw -miwn100fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn100fw.c" iwn100fw.fwo optional iwn100fw | iwnfw \ dependency "iwn100.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn100fw.fwo" iwn100.fw optional iwn100fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-100-39.31.5.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn100.fw" iwn105fw.c optional iwn105fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn105.fw:iwn105fw -miwn105fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn105fw.c" iwn105fw.fwo optional iwn105fw | iwnfw \ dependency "iwn105.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn105fw.fwo" iwn105.fw optional iwn105fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-105-6-18.168.6.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn105.fw" iwn135fw.c optional iwn135fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn135.fw:iwn135fw -miwn135fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn135fw.c" iwn135fw.fwo optional iwn135fw | iwnfw \ dependency "iwn135.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn135fw.fwo" iwn135.fw optional iwn135fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-135-6-18.168.6.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn135.fw" iwn2000fw.c optional iwn2000fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn2000.fw:iwn2000fw -miwn2000fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn2000fw.c" iwn2000fw.fwo optional iwn2000fw | iwnfw \ dependency "iwn2000.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn2000fw.fwo" iwn2000.fw optional iwn2000fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-2000-18.168.6.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn2000.fw" iwn2030fw.c optional iwn2030fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn2030.fw:iwn2030fw -miwn2030fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn2030fw.c" iwn2030fw.fwo optional iwn2030fw | iwnfw \ dependency "iwn2030.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn2030fw.fwo" iwn2030.fw optional iwn2030fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwnwifi-2030-18.168.6.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn2030.fw" iwn4965fw.c optional iwn4965fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn4965.fw:iwn4965fw -miwn4965fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn4965fw.c" iwn4965fw.fwo optional iwn4965fw | iwnfw \ dependency "iwn4965.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn4965fw.fwo" iwn4965.fw optional iwn4965fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-4965-228.61.2.24.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn4965.fw" iwn5000fw.c optional iwn5000fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn5000.fw:iwn5000fw -miwn5000fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn5000fw.c" iwn5000fw.fwo optional iwn5000fw | iwnfw \ dependency "iwn5000.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn5000fw.fwo" iwn5000.fw optional iwn5000fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-5000-8.83.5.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn5000.fw" iwn5150fw.c optional iwn5150fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn5150.fw:iwn5150fw -miwn5150fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn5150fw.c" iwn5150fw.fwo optional iwn5150fw | iwnfw \ dependency "iwn5150.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn5150fw.fwo" iwn5150.fw optional iwn5150fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-5150-8.24.2.2.fw.uu"\ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn5150.fw" iwn6000fw.c optional iwn6000fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn6000.fw:iwn6000fw -miwn6000fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn6000fw.c" iwn6000fw.fwo optional iwn6000fw | iwnfw \ dependency "iwn6000.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn6000fw.fwo" iwn6000.fw optional iwn6000fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-6000-9.221.4.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn6000.fw" iwn6000g2afw.c optional iwn6000g2afw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn6000g2a.fw:iwn6000g2afw -miwn6000g2afw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn6000g2afw.c" iwn6000g2afw.fwo optional iwn6000g2afw | iwnfw \ dependency "iwn6000g2a.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn6000g2afw.fwo" iwn6000g2a.fw optional iwn6000g2afw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-6000g2a-18.168.6.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn6000g2a.fw" iwn6000g2bfw.c optional iwn6000g2bfw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn6000g2b.fw:iwn6000g2bfw -miwn6000g2bfw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn6000g2bfw.c" iwn6000g2bfw.fwo optional iwn6000g2bfw | iwnfw \ dependency "iwn6000g2b.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn6000g2bfw.fwo" iwn6000g2b.fw optional iwn6000g2bfw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-6000g2b-18.168.6.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn6000g2b.fw" iwn6050fw.c optional iwn6050fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn6050.fw:iwn6050fw -miwn6050fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn6050fw.c" iwn6050fw.fwo optional iwn6050fw | iwnfw \ dependency "iwn6050.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn6050fw.fwo" iwn6050.fw optional iwn6050fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-6050-41.28.5.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn6050.fw" dev/ixgb/if_ixgb.c optional ixgb dev/ixgb/ixgb_ee.c optional ixgb dev/ixgb/ixgb_hw.c optional ixgb dev/ixgbe/if_ix.c optional ix inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP" dev/ixgbe/if_ixv.c optional ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP" dev/ixgbe/ix_txrx.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_osdep.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_phy.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_api.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_common.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_mbx.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_vf.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_82598.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_82599.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_x540.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_x550.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_dcb.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_dcb_82598.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_dcb_82599.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/jedec_ts/jedec_ts.c optional jedec_ts smbus dev/jme/if_jme.c optional jme pci dev/joy/joy.c optional joy dev/joy/joy_isa.c optional joy isa dev/kbd/kbd.c optional atkbd | pckbd | sc | ukbd | vt dev/kbdmux/kbdmux.c optional kbdmux dev/ksyms/ksyms.c optional ksyms dev/le/am7990.c optional le dev/le/am79900.c optional le dev/le/if_le_pci.c optional le pci dev/le/lance.c optional le dev/led/led.c standard dev/lge/if_lge.c optional lge dev/lmc/if_lmc.c optional lmc dev/malo/if_malo.c optional malo dev/malo/if_malohal.c optional malo dev/malo/if_malo_pci.c optional malo pci dev/mc146818/mc146818.c optional mc146818 dev/mca/mca_bus.c optional mca dev/md/md.c optional md dev/mdio/mdio_if.m optional miiproxy | mdio dev/mdio/mdio.c optional miiproxy | mdio dev/mem/memdev.c optional mem dev/mem/memutil.c optional mem dev/mfi/mfi.c optional mfi dev/mfi/mfi_debug.c optional mfi dev/mfi/mfi_pci.c optional mfi pci dev/mfi/mfi_disk.c optional mfi dev/mfi/mfi_syspd.c optional mfi dev/mfi/mfi_tbolt.c optional mfi dev/mfi/mfi_linux.c optional mfi compat_linux dev/mfi/mfi_cam.c optional mfip scbus dev/mii/acphy.c optional miibus | acphy dev/mii/amphy.c optional miibus | amphy dev/mii/atphy.c optional miibus | atphy dev/mii/axphy.c optional miibus | axphy dev/mii/bmtphy.c optional miibus | bmtphy dev/mii/brgphy.c optional miibus | brgphy dev/mii/ciphy.c optional miibus | ciphy dev/mii/e1000phy.c optional miibus | e1000phy dev/mii/gentbi.c optional miibus | gentbi dev/mii/icsphy.c optional miibus | icsphy dev/mii/ip1000phy.c optional miibus | ip1000phy dev/mii/jmphy.c optional miibus | jmphy dev/mii/lxtphy.c optional miibus | lxtphy dev/mii/micphy.c optional miibus fdt | micphy fdt dev/mii/mii.c optional miibus | mii dev/mii/mii_bitbang.c optional miibus | mii_bitbang dev/mii/mii_physubr.c optional miibus | mii dev/mii/miibus_if.m optional miibus | mii dev/mii/mlphy.c optional miibus | mlphy dev/mii/nsgphy.c optional miibus | nsgphy dev/mii/nsphy.c optional miibus | nsphy dev/mii/nsphyter.c optional miibus | nsphyter dev/mii/pnaphy.c optional miibus | pnaphy dev/mii/qsphy.c optional miibus | qsphy dev/mii/rdcphy.c optional miibus | rdcphy dev/mii/rgephy.c optional miibus | rgephy dev/mii/rlphy.c optional miibus | rlphy dev/mii/rlswitch.c optional rlswitch dev/mii/smcphy.c optional miibus | smcphy dev/mii/smscphy.c optional miibus | smscphy dev/mii/tdkphy.c optional miibus | tdkphy dev/mii/tlphy.c optional miibus | tlphy dev/mii/truephy.c optional miibus | truephy dev/mii/ukphy.c optional miibus | mii dev/mii/ukphy_subr.c optional miibus | mii dev/mii/xmphy.c optional miibus | xmphy dev/mk48txx/mk48txx.c optional mk48txx dev/mlx/mlx.c optional mlx dev/mlx/mlx_disk.c optional mlx dev/mlx/mlx_pci.c optional mlx pci dev/mly/mly.c optional mly dev/mmc/mmc.c optional mmc dev/mmc/mmcbr_if.m standard dev/mmc/mmcbus_if.m standard dev/mmc/mmcsd.c optional mmcsd dev/mn/if_mn.c optional mn pci dev/mpr/mpr.c optional mpr dev/mpr/mpr_config.c optional mpr # XXX Work around clang warning, until maintainer approves fix. dev/mpr/mpr_mapping.c optional mpr \ compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}" dev/mpr/mpr_pci.c optional mpr pci dev/mpr/mpr_sas.c optional mpr \ compile-with "${NORMAL_C} ${NO_WUNNEEDED_INTERNAL_DECL}" dev/mpr/mpr_sas_lsi.c optional mpr dev/mpr/mpr_table.c optional mpr dev/mpr/mpr_user.c optional mpr dev/mps/mps.c optional mps dev/mps/mps_config.c optional mps # XXX Work around clang warning, until maintainer approves fix. dev/mps/mps_mapping.c optional mps \ compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}" dev/mps/mps_pci.c optional mps pci dev/mps/mps_sas.c optional mps \ compile-with "${NORMAL_C} ${NO_WUNNEEDED_INTERNAL_DECL}" dev/mps/mps_sas_lsi.c optional mps dev/mps/mps_table.c optional mps dev/mps/mps_user.c optional mps dev/mpt/mpt.c optional mpt dev/mpt/mpt_cam.c optional mpt dev/mpt/mpt_debug.c optional mpt dev/mpt/mpt_pci.c optional mpt pci dev/mpt/mpt_raid.c optional mpt dev/mpt/mpt_user.c optional mpt dev/mrsas/mrsas.c optional mrsas dev/mrsas/mrsas_cam.c optional mrsas dev/mrsas/mrsas_ioctl.c optional mrsas dev/mrsas/mrsas_fp.c optional mrsas dev/msk/if_msk.c optional msk dev/mvs/mvs.c optional mvs dev/mvs/mvs_if.m optional mvs dev/mvs/mvs_pci.c optional mvs pci dev/mwl/if_mwl.c optional mwl dev/mwl/if_mwl_pci.c optional mwl pci dev/mwl/mwlhal.c optional mwl mwlfw.c optional mwlfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk mw88W8363.fw:mw88W8363fw mwlboot.fw:mwlboot -mmwl -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "mwlfw.c" mw88W8363.fwo optional mwlfw \ dependency "mw88W8363.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "mw88W8363.fwo" mw88W8363.fw optional mwlfw \ dependency "$S/contrib/dev/mwl/mw88W8363.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "mw88W8363.fw" mwlboot.fwo optional mwlfw \ dependency "mwlboot.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "mwlboot.fwo" mwlboot.fw optional mwlfw \ dependency "$S/contrib/dev/mwl/mwlboot.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "mwlboot.fw" dev/mxge/if_mxge.c optional mxge pci dev/mxge/mxge_eth_z8e.c optional mxge pci dev/mxge/mxge_ethp_z8e.c optional mxge pci dev/mxge/mxge_rss_eth_z8e.c optional mxge pci dev/mxge/mxge_rss_ethp_z8e.c optional mxge pci dev/my/if_my.c optional my dev/nand/nand.c optional nand dev/nand/nand_bbt.c optional nand dev/nand/nand_cdev.c optional nand dev/nand/nand_generic.c optional nand dev/nand/nand_geom.c optional nand dev/nand/nand_id.c optional nand dev/nand/nandbus.c optional nand dev/nand/nandbus_if.m optional nand dev/nand/nand_if.m optional nand dev/nand/nandsim.c optional nandsim nand dev/nand/nandsim_chip.c optional nandsim nand dev/nand/nandsim_ctrl.c optional nandsim nand dev/nand/nandsim_log.c optional nandsim nand dev/nand/nandsim_swap.c optional nandsim nand dev/nand/nfc_if.m optional nand dev/ncr/ncr.c optional ncr pci dev/ncv/ncr53c500.c optional ncv dev/ncv/ncr53c500_pccard.c optional ncv pccard dev/netmap/if_ptnet.c optional netmap inet dev/netmap/netmap.c optional netmap dev/netmap/netmap_freebsd.c optional netmap dev/netmap/netmap_generic.c optional netmap dev/netmap/netmap_mbq.c optional netmap dev/netmap/netmap_mem2.c optional netmap dev/netmap/netmap_monitor.c optional netmap dev/netmap/netmap_offloadings.c optional netmap dev/netmap/netmap_pipe.c optional netmap dev/netmap/netmap_pt.c optional netmap dev/netmap/netmap_vale.c optional netmap # compile-with "${NORMAL_C} -Wconversion -Wextra" dev/nfsmb/nfsmb.c optional nfsmb pci dev/nge/if_nge.c optional nge dev/nxge/if_nxge.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-device.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-mm.c optional nxge dev/nxge/xgehal/xge-queue.c optional nxge dev/nxge/xgehal/xgehal-driver.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-ring.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-channel.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-fifo.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-stats.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-config.c optional nxge dev/nxge/xgehal/xgehal-mgmt.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nmdm/nmdm.c optional nmdm dev/nsp/nsp.c optional nsp dev/nsp/nsp_pccard.c optional nsp pccard dev/null/null.c standard dev/oce/oce_hw.c optional oce pci dev/oce/oce_if.c optional oce pci dev/oce/oce_mbox.c optional oce pci dev/oce/oce_queue.c optional oce pci dev/oce/oce_sysctl.c optional oce pci dev/oce/oce_util.c optional oce pci dev/ofw/ofw_bus_if.m optional fdt dev/ofw/ofw_bus_subr.c optional fdt dev/ofw/ofw_cpu.c optional fdt dev/ofw/ofw_fdt.c optional fdt dev/ofw/ofw_if.m optional fdt dev/ofw/ofw_subr.c optional fdt dev/ofw/ofwbus.c optional fdt dev/ofw/openfirm.c optional fdt dev/ofw/openfirmio.c optional fdt dev/ow/ow.c optional ow \ dependency "owll_if.h" \ dependency "own_if.h" dev/ow/owll_if.m optional ow dev/ow/own_if.m optional ow dev/ow/ow_temp.c optional ow_temp dev/ow/owc_gpiobus.c optional owc gpio dev/patm/if_patm.c optional patm pci dev/patm/if_patm_attach.c optional patm pci dev/patm/if_patm_intr.c optional patm pci dev/patm/if_patm_ioctl.c optional patm pci dev/patm/if_patm_rtables.c optional patm pci dev/patm/if_patm_rx.c optional patm pci dev/patm/if_patm_tx.c optional patm pci dev/pbio/pbio.c optional pbio isa dev/pccard/card_if.m standard dev/pccard/pccard.c optional pccard dev/pccard/pccard_cis.c optional pccard dev/pccard/pccard_cis_quirks.c optional pccard dev/pccard/pccard_device.c optional pccard dev/pccard/power_if.m standard dev/pccbb/pccbb.c optional cbb dev/pccbb/pccbb_isa.c optional cbb isa dev/pccbb/pccbb_pci.c optional cbb pci dev/pcf/pcf.c optional pcf dev/pci/eisa_pci.c optional pci eisa dev/pci/fixup_pci.c optional pci dev/pci/hostb_pci.c optional pci dev/pci/ignore_pci.c optional pci dev/pci/isa_pci.c optional pci isa dev/pci/pci.c optional pci dev/pci/pci_if.m standard dev/pci/pci_iov.c optional pci pci_iov dev/pci/pci_iov_if.m standard dev/pci/pci_iov_schema.c optional pci pci_iov dev/pci/pci_pci.c optional pci dev/pci/pci_subr.c optional pci dev/pci/pci_user.c optional pci dev/pci/pcib_if.m standard dev/pci/pcib_support.c standard dev/pci/vga_pci.c optional pci dev/pcn/if_pcn.c optional pcn pci dev/pdq/if_fea.c optional fea eisa dev/pdq/if_fpa.c optional fpa pci dev/pdq/pdq.c optional nowerror fea eisa | fpa pci dev/pdq/pdq_ifsubr.c optional nowerror fea eisa | fpa pci dev/pms/freebsd/driver/ini/src/agtiapi.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/sadisc.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/mpi.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/saframe.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/sahw.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/sainit.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/saint.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/sampicmd.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/sampirsp.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/saphy.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/saport.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/sasata.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/sasmp.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/sassp.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/satimer.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/sautil.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/saioctlcmd.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sallsdk/spc/mpidebug.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/discovery/dm/dminit.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/discovery/dm/dmsmp.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/discovery/dm/dmdisc.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/discovery/dm/dmport.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/discovery/dm/dmtimer.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/discovery/dm/dmmisc.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sat/src/sminit.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sat/src/smmisc.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sat/src/smsat.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sat/src/smsatcb.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sat/src/smsathw.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/sat/src/smtimer.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/common/tdinit.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/common/tdmisc.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/common/tdesgl.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/common/tdport.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/common/tdint.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/common/tdioctl.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/common/tdhw.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/common/ossacmnapi.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/common/tddmcmnapi.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/common/tdsmcmnapi.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/common/tdtimers.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/sas/ini/itdio.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/sas/ini/itdcb.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/sas/ini/itdinit.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/sas/ini/itddisc.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/sata/host/sat.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/sata/host/ossasat.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/pms/RefTisa/tisa/sassata/sata/host/sathw.c optional pmspcv \ compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w" dev/ppbus/if_plip.c optional plip dev/ppbus/immio.c optional vpo dev/ppbus/lpbb.c optional lpbb dev/ppbus/lpt.c optional lpt dev/ppbus/pcfclock.c optional pcfclock dev/ppbus/ppb_1284.c optional ppbus dev/ppbus/ppb_base.c optional ppbus dev/ppbus/ppb_msq.c optional ppbus dev/ppbus/ppbconf.c optional ppbus dev/ppbus/ppbus_if.m optional ppbus dev/ppbus/ppi.c optional ppi dev/ppbus/pps.c optional pps dev/ppbus/vpo.c optional vpo dev/ppbus/vpoio.c optional vpo dev/ppc/ppc.c optional ppc dev/ppc/ppc_acpi.c optional ppc acpi dev/ppc/ppc_isa.c optional ppc isa dev/ppc/ppc_pci.c optional ppc pci dev/ppc/ppc_puc.c optional ppc puc dev/proto/proto_bus_isa.c optional proto acpi | proto isa dev/proto/proto_bus_pci.c optional proto pci dev/proto/proto_busdma.c optional proto dev/proto/proto_core.c optional proto dev/pst/pst-iop.c optional pst dev/pst/pst-pci.c optional pst pci dev/pst/pst-raid.c optional pst dev/pty/pty.c optional pty dev/puc/puc.c optional puc dev/puc/puc_cfg.c optional puc dev/puc/puc_pccard.c optional puc pccard dev/puc/puc_pci.c optional puc pci dev/puc/pucdata.c optional puc pci dev/quicc/quicc_core.c optional quicc dev/ral/rt2560.c optional ral dev/ral/rt2661.c optional ral dev/ral/rt2860.c optional ral dev/ral/if_ral_pci.c optional ral pci rt2561fw.c optional rt2561fw | ralfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rt2561.fw:rt2561fw -mrt2561 -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rt2561fw.c" rt2561fw.fwo optional rt2561fw | ralfw \ dependency "rt2561.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rt2561fw.fwo" rt2561.fw optional rt2561fw | ralfw \ dependency "$S/contrib/dev/ral/rt2561.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rt2561.fw" rt2561sfw.c optional rt2561sfw | ralfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rt2561s.fw:rt2561sfw -mrt2561s -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rt2561sfw.c" rt2561sfw.fwo optional rt2561sfw | ralfw \ dependency "rt2561s.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rt2561sfw.fwo" rt2561s.fw optional rt2561sfw | ralfw \ dependency "$S/contrib/dev/ral/rt2561s.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rt2561s.fw" rt2661fw.c optional rt2661fw | ralfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rt2661.fw:rt2661fw -mrt2661 -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rt2661fw.c" rt2661fw.fwo optional rt2661fw | ralfw \ dependency "rt2661.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rt2661fw.fwo" rt2661.fw optional rt2661fw | ralfw \ dependency "$S/contrib/dev/ral/rt2661.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rt2661.fw" rt2860fw.c optional rt2860fw | ralfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rt2860.fw:rt2860fw -mrt2860 -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rt2860fw.c" rt2860fw.fwo optional rt2860fw | ralfw \ dependency "rt2860.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rt2860fw.fwo" rt2860.fw optional rt2860fw | ralfw \ dependency "$S/contrib/dev/ral/rt2860.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rt2860.fw" dev/random/random_infra.c optional random dev/random/random_harvestq.c optional random dev/random/randomdev.c optional random random_yarrow | \ random !random_yarrow !random_loadable dev/random/yarrow.c optional random random_yarrow dev/random/fortuna.c optional random !random_yarrow !random_loadable dev/random/hash.c optional random random_yarrow | \ random !random_yarrow !random_loadable dev/rc/rc.c optional rc dev/rccgpio/rccgpio.c optional rccgpio gpio dev/re/if_re.c optional re dev/rl/if_rl.c optional rl pci dev/rndtest/rndtest.c optional rndtest dev/rp/rp.c optional rp dev/rp/rp_isa.c optional rp isa dev/rp/rp_pci.c optional rp pci # dev/rtwn/if_rtwn.c optional rtwn dev/rtwn/if_rtwn_beacon.c optional rtwn dev/rtwn/if_rtwn_calib.c optional rtwn dev/rtwn/if_rtwn_cam.c optional rtwn dev/rtwn/if_rtwn_efuse.c optional rtwn dev/rtwn/if_rtwn_fw.c optional rtwn dev/rtwn/if_rtwn_rx.c optional rtwn dev/rtwn/if_rtwn_task.c optional rtwn dev/rtwn/if_rtwn_tx.c optional rtwn # dev/rtwn/pci/rtwn_pci_attach.c optional rtwn_pci pci dev/rtwn/pci/rtwn_pci_reg.c optional rtwn_pci pci dev/rtwn/pci/rtwn_pci_rx.c optional rtwn_pci pci dev/rtwn/pci/rtwn_pci_tx.c optional rtwn_pci pci # dev/rtwn/usb/rtwn_usb_attach.c optional rtwn_usb dev/rtwn/usb/rtwn_usb_ep.c optional rtwn_usb dev/rtwn/usb/rtwn_usb_reg.c optional rtwn_usb dev/rtwn/usb/rtwn_usb_rx.c optional rtwn_usb dev/rtwn/usb/rtwn_usb_tx.c optional rtwn_usb # RTL8188E dev/rtwn/rtl8188e/r88e_beacon.c optional rtwn dev/rtwn/rtl8188e/r88e_calib.c optional rtwn dev/rtwn/rtl8188e/r88e_chan.c optional rtwn dev/rtwn/rtl8188e/r88e_fw.c optional rtwn dev/rtwn/rtl8188e/r88e_init.c optional rtwn dev/rtwn/rtl8188e/r88e_led.c optional rtwn dev/rtwn/rtl8188e/r88e_tx.c optional rtwn dev/rtwn/rtl8188e/r88e_rf.c optional rtwn dev/rtwn/rtl8188e/r88e_rom.c optional rtwn dev/rtwn/rtl8188e/r88e_rx.c optional rtwn dev/rtwn/rtl8188e/usb/r88eu_attach.c optional rtwn_usb dev/rtwn/rtl8188e/usb/r88eu_init.c optional rtwn_usb dev/rtwn/rtl8188e/usb/r88eu_rx.c optional rtwn_usb # RTL8192C dev/rtwn/rtl8192c/r92c_attach.c optional rtwn dev/rtwn/rtl8192c/r92c_beacon.c optional rtwn dev/rtwn/rtl8192c/r92c_calib.c optional rtwn dev/rtwn/rtl8192c/r92c_chan.c optional rtwn dev/rtwn/rtl8192c/r92c_fw.c optional rtwn dev/rtwn/rtl8192c/r92c_init.c optional rtwn dev/rtwn/rtl8192c/r92c_llt.c optional rtwn dev/rtwn/rtl8192c/r92c_rf.c optional rtwn dev/rtwn/rtl8192c/r92c_rom.c optional rtwn dev/rtwn/rtl8192c/r92c_rx.c optional rtwn dev/rtwn/rtl8192c/r92c_tx.c optional rtwn dev/rtwn/rtl8192c/pci/r92ce_attach.c optional rtwn_pci pci dev/rtwn/rtl8192c/pci/r92ce_calib.c optional rtwn_pci pci dev/rtwn/rtl8192c/pci/r92ce_fw.c optional rtwn_pci pci dev/rtwn/rtl8192c/pci/r92ce_init.c optional rtwn_pci pci dev/rtwn/rtl8192c/pci/r92ce_led.c optional rtwn_pci pci dev/rtwn/rtl8192c/pci/r92ce_rx.c optional rtwn_pci pci dev/rtwn/rtl8192c/pci/r92ce_tx.c optional rtwn_pci pci dev/rtwn/rtl8192c/usb/r92cu_attach.c optional rtwn_usb dev/rtwn/rtl8192c/usb/r92cu_init.c optional rtwn_usb dev/rtwn/rtl8192c/usb/r92cu_led.c optional rtwn_usb dev/rtwn/rtl8192c/usb/r92cu_rx.c optional rtwn_usb dev/rtwn/rtl8192c/usb/r92cu_tx.c optional rtwn_usb # RTL8192E dev/rtwn/rtl8192e/r92e_chan.c optional rtwn dev/rtwn/rtl8192e/r92e_fw.c optional rtwn dev/rtwn/rtl8192e/r92e_init.c optional rtwn dev/rtwn/rtl8192e/r92e_led.c optional rtwn dev/rtwn/rtl8192e/r92e_rf.c optional rtwn dev/rtwn/rtl8192e/r92e_rom.c optional rtwn dev/rtwn/rtl8192e/r92e_rx.c optional rtwn dev/rtwn/rtl8192e/usb/r92eu_attach.c optional rtwn_usb dev/rtwn/rtl8192e/usb/r92eu_init.c optional rtwn_usb # RTL8812A dev/rtwn/rtl8812a/r12a_beacon.c optional rtwn dev/rtwn/rtl8812a/r12a_calib.c optional rtwn dev/rtwn/rtl8812a/r12a_caps.c optional rtwn dev/rtwn/rtl8812a/r12a_chan.c optional rtwn dev/rtwn/rtl8812a/r12a_fw.c optional rtwn dev/rtwn/rtl8812a/r12a_init.c optional rtwn dev/rtwn/rtl8812a/r12a_led.c optional rtwn dev/rtwn/rtl8812a/r12a_rf.c optional rtwn dev/rtwn/rtl8812a/r12a_rom.c optional rtwn dev/rtwn/rtl8812a/r12a_rx.c optional rtwn dev/rtwn/rtl8812a/r12a_tx.c optional rtwn dev/rtwn/rtl8812a/usb/r12au_attach.c optional rtwn_usb dev/rtwn/rtl8812a/usb/r12au_init.c optional rtwn_usb dev/rtwn/rtl8812a/usb/r12au_rx.c optional rtwn_usb dev/rtwn/rtl8812a/usb/r12au_tx.c optional rtwn_usb # RTL8821A dev/rtwn/rtl8821a/r21a_beacon.c optional rtwn dev/rtwn/rtl8821a/r21a_calib.c optional rtwn dev/rtwn/rtl8821a/r21a_chan.c optional rtwn dev/rtwn/rtl8821a/r21a_fw.c optional rtwn dev/rtwn/rtl8821a/r21a_init.c optional rtwn dev/rtwn/rtl8821a/r21a_led.c optional rtwn dev/rtwn/rtl8821a/r21a_rom.c optional rtwn dev/rtwn/rtl8821a/r21a_rx.c optional rtwn dev/rtwn/rtl8821a/usb/r21au_attach.c optional rtwn_usb dev/rtwn/rtl8821a/usb/r21au_dfs.c optional rtwn_usb dev/rtwn/rtl8821a/usb/r21au_init.c optional rtwn_usb rtwn-rtl8188eufw.c optional rtwn-rtl8188eufw | rtwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rtwn-rtl8188eufw.fw:rtwn-rtl8188eufw:111 -mrtwn-rtl8188eufw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rtwn-rtl8188eufw.c" rtwn-rtl8188eufw.fwo optional rtwn-rtl8188eufw | rtwnfw \ dependency "rtwn-rtl8188eufw.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rtwn-rtl8188eufw.fwo" rtwn-rtl8188eufw.fw optional rtwn-rtl8188eufw | rtwnfw \ dependency "$S/contrib/dev/rtwn/rtwn-rtl8188eufw.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rtwn-rtl8188eufw.fw" rtwn-rtl8192cfwE.c optional rtwn-rtl8192cfwE | rtwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rtwn-rtl8192cfwE.fw:rtwn-rtl8192cfwE:111 -mrtwn-rtl8192cfwE -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rtwn-rtl8192cfwE.c" rtwn-rtl8192cfwE.fwo optional rtwn-rtl8192cfwE | rtwnfw \ dependency "rtwn-rtl8192cfwE.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rtwn-rtl8192cfwE.fwo" rtwn-rtl8192cfwE.fw optional rtwn-rtl8192cfwE | rtwnfw \ dependency "$S/contrib/dev/rtwn/rtwn-rtl8192cfwE.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rtwn-rtl8192cfwE.fw" rtwn-rtl8192cfwE_B.c optional rtwn-rtl8192cfwE_B | rtwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rtwn-rtl8192cfwE_B.fw:rtwn-rtl8192cfwE_B:111 -mrtwn-rtl8192cfwE_B -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rtwn-rtl8192cfwE_B.c" rtwn-rtl8192cfwE_B.fwo optional rtwn-rtl8192cfwE_B | rtwnfw \ dependency "rtwn-rtl8192cfwE_B.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rtwn-rtl8192cfwE_B.fwo" rtwn-rtl8192cfwE_B.fw optional rtwn-rtl8192cfwE_B | rtwnfw \ dependency "$S/contrib/dev/rtwn/rtwn-rtl8192cfwE_B.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rtwn-rtl8192cfwE_B.fw" rtwn-rtl8192cfwT.c optional rtwn-rtl8192cfwT | rtwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rtwn-rtl8192cfwT.fw:rtwn-rtl8192cfwT:111 -mrtwn-rtl8192cfwT -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rtwn-rtl8192cfwT.c" rtwn-rtl8192cfwT.fwo optional rtwn-rtl8192cfwT | rtwnfw \ dependency "rtwn-rtl8192cfwT.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rtwn-rtl8192cfwT.fwo" rtwn-rtl8192cfwT.fw optional rtwn-rtl8192cfwT | rtwnfw \ dependency "$S/contrib/dev/rtwn/rtwn-rtl8192cfwT.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rtwn-rtl8192cfwT.fw" rtwn-rtl8192cfwU.c optional rtwn-rtl8192cfwU | rtwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rtwn-rtl8192cfwU.fw:rtwn-rtl8192cfwU:111 -mrtwn-rtl8192cfwU -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rtwn-rtl8192cfwU.c" rtwn-rtl8192cfwU.fwo optional rtwn-rtl8192cfwU | rtwnfw \ dependency "rtwn-rtl8192cfwU.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rtwn-rtl8192cfwU.fwo" rtwn-rtl8192cfwU.fw optional rtwn-rtl8192cfwU | rtwnfw \ dependency "$S/contrib/dev/rtwn/rtwn-rtl8192cfwU.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rtwn-rtl8192cfwU.fw" rtwn-rtl8192eufw.c optional rtwn-rtl8192eufw | rtwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rtwn-rtl8192eufw.fw:rtwn-rtl8192eufw:111 -mrtwn-rtl8192eufw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rtwn-rtl8192eufw.c" rtwn-rtl8192eufw.fwo optional rtwn-rtl8192eufw | rtwnfw \ dependency "rtwn-rtl8192eufw.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rtwn-rtl8192eufw.fwo" rtwn-rtl8192eufw.fw optional rtwn-rtl8192eufw | rtwnfw \ dependency "$S/contrib/dev/rtwn/rtwn-rtl8192eufw.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rtwn-rtl8192eufw.fw" rtwn-rtl8812aufw.c optional rtwn-rtl8812aufw | rtwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rtwn-rtl8812aufw.fw:rtwn-rtl8812aufw:111 -mrtwn-rtl8812aufw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rtwn-rtl8812aufw.c" rtwn-rtl8812aufw.fwo optional rtwn-rtl8812aufw | rtwnfw \ dependency "rtwn-rtl8812aufw.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rtwn-rtl8812aufw.fwo" rtwn-rtl8812aufw.fw optional rtwn-rtl8812aufw | rtwnfw \ dependency "$S/contrib/dev/rtwn/rtwn-rtl8812aufw.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rtwn-rtl8812aufw.fw" rtwn-rtl8821aufw.c optional rtwn-rtl8821aufw | rtwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rtwn-rtl8821aufw.fw:rtwn-rtl8821aufw:111 -mrtwn-rtl8821aufw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rtwn-rtl8821aufw.c" rtwn-rtl8821aufw.fwo optional rtwn-rtl8821aufw | rtwnfw \ dependency "rtwn-rtl8821aufw.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rtwn-rtl8821aufw.fwo" rtwn-rtl8821aufw.fw optional rtwn-rtl8821aufw | rtwnfw \ dependency "$S/contrib/dev/rtwn/rtwn-rtl8821aufw.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rtwn-rtl8821aufw.fw" dev/safe/safe.c optional safe dev/scc/scc_if.m optional scc dev/scc/scc_bfe_ebus.c optional scc ebus dev/scc/scc_bfe_quicc.c optional scc quicc dev/scc/scc_bfe_sbus.c optional scc fhc | scc sbus dev/scc/scc_core.c optional scc dev/scc/scc_dev_quicc.c optional scc quicc dev/scc/scc_dev_sab82532.c optional scc dev/scc/scc_dev_z8530.c optional scc dev/sdhci/sdhci.c optional sdhci dev/sdhci/sdhci_fdt_gpio.c optional sdhci fdt gpio dev/sdhci/sdhci_if.m optional sdhci dev/sdhci/sdhci_acpi.c optional sdhci acpi dev/sdhci/sdhci_pci.c optional sdhci pci dev/sf/if_sf.c optional sf pci dev/sge/if_sge.c optional sge pci dev/siba/siba_bwn.c optional siba_bwn pci dev/siba/siba_core.c optional siba_bwn pci dev/siis/siis.c optional siis pci dev/sis/if_sis.c optional sis pci dev/sk/if_sk.c optional sk pci dev/smbus/smb.c optional smb dev/smbus/smbconf.c optional smbus dev/smbus/smbus.c optional smbus dev/smbus/smbus_if.m optional smbus dev/smc/if_smc.c optional smc dev/smc/if_smc_fdt.c optional smc fdt dev/sn/if_sn.c optional sn dev/sn/if_sn_isa.c optional sn isa dev/sn/if_sn_pccard.c optional sn pccard dev/snp/snp.c optional snp dev/sound/clone.c optional sound dev/sound/unit.c optional sound dev/sound/isa/ad1816.c optional snd_ad1816 isa dev/sound/isa/ess.c optional snd_ess isa dev/sound/isa/gusc.c optional snd_gusc isa dev/sound/isa/mss.c optional snd_mss isa dev/sound/isa/sb16.c optional snd_sb16 isa dev/sound/isa/sb8.c optional snd_sb8 isa dev/sound/isa/sbc.c optional snd_sbc isa dev/sound/isa/sndbuf_dma.c optional sound isa dev/sound/pci/als4000.c optional snd_als4000 pci dev/sound/pci/atiixp.c optional snd_atiixp pci dev/sound/pci/cmi.c optional snd_cmi pci dev/sound/pci/cs4281.c optional snd_cs4281 pci dev/sound/pci/csa.c optional snd_csa pci dev/sound/pci/csapcm.c optional snd_csa pci dev/sound/pci/ds1.c optional snd_ds1 pci dev/sound/pci/emu10k1.c optional snd_emu10k1 pci dev/sound/pci/emu10kx.c optional snd_emu10kx pci dev/sound/pci/emu10kx-pcm.c optional snd_emu10kx pci dev/sound/pci/emu10kx-midi.c optional snd_emu10kx pci dev/sound/pci/envy24.c optional snd_envy24 pci dev/sound/pci/envy24ht.c optional snd_envy24ht pci dev/sound/pci/es137x.c optional snd_es137x pci dev/sound/pci/fm801.c optional snd_fm801 pci dev/sound/pci/ich.c optional snd_ich pci dev/sound/pci/maestro.c optional snd_maestro pci dev/sound/pci/maestro3.c optional snd_maestro3 pci dev/sound/pci/neomagic.c optional snd_neomagic pci dev/sound/pci/solo.c optional snd_solo pci dev/sound/pci/spicds.c optional snd_spicds pci dev/sound/pci/t4dwave.c optional snd_t4dwave pci dev/sound/pci/via8233.c optional snd_via8233 pci dev/sound/pci/via82c686.c optional snd_via82c686 pci dev/sound/pci/vibes.c optional snd_vibes pci dev/sound/pci/hda/hdaa.c optional snd_hda pci dev/sound/pci/hda/hdaa_patches.c optional snd_hda pci dev/sound/pci/hda/hdac.c optional snd_hda pci dev/sound/pci/hda/hdac_if.m optional snd_hda pci dev/sound/pci/hda/hdacc.c optional snd_hda pci dev/sound/pci/hdspe.c optional snd_hdspe pci dev/sound/pci/hdspe-pcm.c optional snd_hdspe pci dev/sound/pcm/ac97.c optional sound dev/sound/pcm/ac97_if.m optional sound dev/sound/pcm/ac97_patch.c optional sound dev/sound/pcm/buffer.c optional sound \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/channel.c optional sound dev/sound/pcm/channel_if.m optional sound dev/sound/pcm/dsp.c optional sound dev/sound/pcm/feeder.c optional sound dev/sound/pcm/feeder_chain.c optional sound dev/sound/pcm/feeder_eq.c optional sound \ dependency "feeder_eq_gen.h" \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/feeder_if.m optional sound dev/sound/pcm/feeder_format.c optional sound \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/feeder_matrix.c optional sound \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/feeder_mixer.c optional sound \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/feeder_rate.c optional sound \ dependency "feeder_rate_gen.h" \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/feeder_volume.c optional sound \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/mixer.c optional sound dev/sound/pcm/mixer_if.m optional sound dev/sound/pcm/sndstat.c optional sound dev/sound/pcm/sound.c optional sound dev/sound/pcm/vchan.c optional sound dev/sound/usb/uaudio.c optional snd_uaudio usb dev/sound/usb/uaudio_pcm.c optional snd_uaudio usb dev/sound/midi/midi.c optional sound dev/sound/midi/mpu401.c optional sound dev/sound/midi/mpu_if.m optional sound dev/sound/midi/mpufoi_if.m optional sound dev/sound/midi/sequencer.c optional sound dev/sound/midi/synth_if.m optional sound dev/spibus/ofw_spibus.c optional fdt spibus dev/spibus/spibus.c optional spibus \ dependency "spibus_if.h" dev/spibus/spigen.c optional spigen dev/spibus/spibus_if.m optional spibus dev/ste/if_ste.c optional ste pci dev/stg/tmc18c30.c optional stg dev/stg/tmc18c30_isa.c optional stg isa dev/stg/tmc18c30_pccard.c optional stg pccard dev/stg/tmc18c30_pci.c optional stg pci dev/stg/tmc18c30_subr.c optional stg dev/stge/if_stge.c optional stge dev/streams/streams.c optional streams dev/sym/sym_hipd.c optional sym \ dependency "$S/dev/sym/sym_{conf,defs}.h" dev/syscons/blank/blank_saver.c optional blank_saver dev/syscons/daemon/daemon_saver.c optional daemon_saver dev/syscons/dragon/dragon_saver.c optional dragon_saver dev/syscons/fade/fade_saver.c optional fade_saver dev/syscons/fire/fire_saver.c optional fire_saver dev/syscons/green/green_saver.c optional green_saver dev/syscons/logo/logo.c optional logo_saver dev/syscons/logo/logo_saver.c optional logo_saver dev/syscons/rain/rain_saver.c optional rain_saver dev/syscons/schistory.c optional sc dev/syscons/scmouse.c optional sc dev/syscons/scterm.c optional sc dev/syscons/scvidctl.c optional sc dev/syscons/snake/snake_saver.c optional snake_saver dev/syscons/star/star_saver.c optional star_saver dev/syscons/syscons.c optional sc dev/syscons/sysmouse.c optional sc dev/syscons/warp/warp_saver.c optional warp_saver dev/tdfx/tdfx_linux.c optional tdfx_linux tdfx compat_linux dev/tdfx/tdfx_pci.c optional tdfx pci dev/ti/if_ti.c optional ti pci dev/tl/if_tl.c optional tl pci dev/trm/trm.c optional trm dev/twa/tw_cl_init.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twa/tw_cl_intr.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twa/tw_cl_io.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twa/tw_cl_misc.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twa/tw_osl_cam.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twa/tw_osl_freebsd.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twe/twe.c optional twe dev/twe/twe_freebsd.c optional twe dev/tws/tws.c optional tws dev/tws/tws_cam.c optional tws dev/tws/tws_hdm.c optional tws dev/tws/tws_services.c optional tws dev/tws/tws_user.c optional tws dev/tx/if_tx.c optional tx dev/txp/if_txp.c optional txp dev/uart/uart_bus_acpi.c optional uart acpi dev/uart/uart_bus_ebus.c optional uart ebus dev/uart/uart_bus_fdt.c optional uart fdt dev/uart/uart_bus_isa.c optional uart isa dev/uart/uart_bus_pccard.c optional uart pccard dev/uart/uart_bus_pci.c optional uart pci dev/uart/uart_bus_puc.c optional uart puc dev/uart/uart_bus_scc.c optional uart scc dev/uart/uart_core.c optional uart dev/uart/uart_dbg.c optional uart gdb dev/uart/uart_dev_ns8250.c optional uart uart_ns8250 | uart uart_snps dev/uart/uart_dev_pl011.c optional uart pl011 dev/uart/uart_dev_quicc.c optional uart quicc dev/uart/uart_dev_sab82532.c optional uart uart_sab82532 dev/uart/uart_dev_sab82532.c optional uart scc dev/uart/uart_dev_snps.c optional uart uart_snps fdt dev/uart/uart_dev_z8530.c optional uart uart_z8530 dev/uart/uart_dev_z8530.c optional uart scc dev/uart/uart_if.m optional uart dev/uart/uart_subr.c optional uart dev/uart/uart_tty.c optional uart dev/ubsec/ubsec.c optional ubsec # # USB controller drivers # dev/usb/controller/at91dci.c optional at91dci dev/usb/controller/at91dci_atmelarm.c optional at91dci at91rm9200 dev/usb/controller/musb_otg.c optional musb dev/usb/controller/musb_otg_atmelarm.c optional musb at91rm9200 dev/usb/controller/dwc_otg.c optional dwcotg dev/usb/controller/dwc_otg_fdt.c optional dwcotg fdt dev/usb/controller/ehci.c optional ehci dev/usb/controller/ehci_pci.c optional ehci pci dev/usb/controller/ohci.c optional ohci dev/usb/controller/ohci_pci.c optional ohci pci dev/usb/controller/uhci.c optional uhci dev/usb/controller/uhci_pci.c optional uhci pci dev/usb/controller/xhci.c optional xhci dev/usb/controller/xhci_pci.c optional xhci pci dev/usb/controller/saf1761_otg.c optional saf1761otg dev/usb/controller/saf1761_otg_fdt.c optional saf1761otg fdt dev/usb/controller/uss820dci.c optional uss820dci dev/usb/controller/uss820dci_atmelarm.c optional uss820dci at91rm9200 dev/usb/controller/usb_controller.c optional usb # # USB storage drivers # dev/usb/storage/umass.c optional umass dev/usb/storage/urio.c optional urio dev/usb/storage/ustorage_fs.c optional usfs # # USB core # dev/usb/usb_busdma.c optional usb dev/usb/usb_core.c optional usb dev/usb/usb_debug.c optional usb dev/usb/usb_dev.c optional usb dev/usb/usb_device.c optional usb dev/usb/usb_dynamic.c optional usb dev/usb/usb_error.c optional usb dev/usb/usb_generic.c optional usb dev/usb/usb_handle_request.c optional usb dev/usb/usb_hid.c optional usb dev/usb/usb_hub.c optional usb dev/usb/usb_if.m optional usb dev/usb/usb_lookup.c optional usb dev/usb/usb_mbuf.c optional usb dev/usb/usb_msctest.c optional usb dev/usb/usb_parse.c optional usb dev/usb/usb_pf.c optional usb dev/usb/usb_process.c optional usb dev/usb/usb_request.c optional usb dev/usb/usb_transfer.c optional usb dev/usb/usb_util.c optional usb # # USB network drivers # dev/usb/net/if_aue.c optional aue dev/usb/net/if_axe.c optional axe dev/usb/net/if_axge.c optional axge dev/usb/net/if_cdce.c optional cdce dev/usb/net/if_cue.c optional cue dev/usb/net/if_ipheth.c optional ipheth dev/usb/net/if_kue.c optional kue dev/usb/net/if_mos.c optional mos dev/usb/net/if_rue.c optional rue dev/usb/net/if_smsc.c optional smsc dev/usb/net/if_udav.c optional udav dev/usb/net/if_ure.c optional ure dev/usb/net/if_usie.c optional usie dev/usb/net/if_urndis.c optional urndis dev/usb/net/ruephy.c optional rue dev/usb/net/usb_ethernet.c optional uether | aue | axe | axge | cdce | \ cue | ipheth | kue | mos | rue | \ smsc | udav | ure | urndis dev/usb/net/uhso.c optional uhso # # USB WLAN drivers # dev/usb/wlan/if_rsu.c optional rsu rsu-rtl8712fw.c optional rsu-rtl8712fw | rsufw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rsu-rtl8712fw.fw:rsu-rtl8712fw:120 -mrsu-rtl8712fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rsu-rtl8712fw.c" rsu-rtl8712fw.fwo optional rsu-rtl8712fw | rsufw \ dependency "rsu-rtl8712fw.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rsu-rtl8712fw.fwo" rsu-rtl8712fw.fw optional rsu-rtl8712.fw | rsufw \ dependency "$S/contrib/dev/rsu/rsu-rtl8712fw.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rsu-rtl8712fw.fw" dev/usb/wlan/if_rum.c optional rum dev/usb/wlan/if_run.c optional run runfw.c optional runfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk run.fw:runfw -mrunfw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "runfw.c" runfw.fwo optional runfw \ dependency "run.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "runfw.fwo" run.fw optional runfw \ dependency "$S/contrib/dev/run/rt2870.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "run.fw" dev/usb/wlan/if_uath.c optional uath dev/usb/wlan/if_upgt.c optional upgt dev/usb/wlan/if_ural.c optional ural dev/usb/wlan/if_urtw.c optional urtw dev/usb/wlan/if_zyd.c optional zyd # # USB serial and parallel port drivers # dev/usb/serial/u3g.c optional u3g dev/usb/serial/uark.c optional uark dev/usb/serial/ubsa.c optional ubsa dev/usb/serial/ubser.c optional ubser dev/usb/serial/uchcom.c optional uchcom dev/usb/serial/ucycom.c optional ucycom dev/usb/serial/ufoma.c optional ufoma dev/usb/serial/uftdi.c optional uftdi dev/usb/serial/ugensa.c optional ugensa dev/usb/serial/uipaq.c optional uipaq dev/usb/serial/ulpt.c optional ulpt dev/usb/serial/umcs.c optional umcs dev/usb/serial/umct.c optional umct dev/usb/serial/umodem.c optional umodem dev/usb/serial/umoscom.c optional umoscom dev/usb/serial/uplcom.c optional uplcom dev/usb/serial/uslcom.c optional uslcom dev/usb/serial/uvisor.c optional uvisor dev/usb/serial/uvscom.c optional uvscom dev/usb/serial/usb_serial.c optional ucom | u3g | uark | ubsa | ubser | \ uchcom | ucycom | ufoma | uftdi | \ ugensa | uipaq | umcs | umct | \ umodem | umoscom | uplcom | usie | \ uslcom | uvisor | uvscom # # USB misc drivers # dev/usb/misc/ufm.c optional ufm dev/usb/misc/udbp.c optional udbp dev/usb/misc/ugold.c optional ugold dev/usb/misc/uled.c optional uled # # USB input drivers # dev/usb/input/atp.c optional atp dev/usb/input/uep.c optional uep dev/usb/input/uhid.c optional uhid dev/usb/input/ukbd.c optional ukbd dev/usb/input/ums.c optional ums dev/usb/input/wsp.c optional wsp # # USB quirks # dev/usb/quirk/usb_quirk.c optional usb # # USB templates # dev/usb/template/usb_template.c optional usb_template dev/usb/template/usb_template_audio.c optional usb_template dev/usb/template/usb_template_cdce.c optional usb_template dev/usb/template/usb_template_kbd.c optional usb_template dev/usb/template/usb_template_modem.c optional usb_template dev/usb/template/usb_template_mouse.c optional usb_template dev/usb/template/usb_template_msc.c optional usb_template dev/usb/template/usb_template_mtp.c optional usb_template dev/usb/template/usb_template_phone.c optional usb_template dev/usb/template/usb_template_serialnet.c optional usb_template dev/usb/template/usb_template_midi.c optional usb_template # # USB video drivers # dev/usb/video/udl.c optional udl # # USB END # dev/videomode/videomode.c optional videomode dev/videomode/edid.c optional videomode dev/videomode/pickmode.c optional videomode dev/videomode/vesagtf.c optional videomode dev/utopia/idtphy.c optional utopia dev/utopia/suni.c optional utopia dev/utopia/utopia.c optional utopia dev/vge/if_vge.c optional vge dev/viapm/viapm.c optional viapm pci dev/virtio/virtio.c optional virtio dev/virtio/virtqueue.c optional virtio dev/virtio/virtio_bus_if.m optional virtio dev/virtio/virtio_if.m optional virtio dev/virtio/pci/virtio_pci.c optional virtio_pci dev/virtio/mmio/virtio_mmio.c optional virtio_mmio fdt dev/virtio/mmio/virtio_mmio_if.m optional virtio_mmio fdt dev/virtio/network/if_vtnet.c optional vtnet dev/virtio/block/virtio_blk.c optional virtio_blk dev/virtio/balloon/virtio_balloon.c optional virtio_balloon dev/virtio/scsi/virtio_scsi.c optional virtio_scsi dev/virtio/random/virtio_random.c optional virtio_random dev/virtio/console/virtio_console.c optional virtio_console dev/vkbd/vkbd.c optional vkbd dev/vr/if_vr.c optional vr pci dev/vt/colors/vt_termcolors.c optional vt dev/vt/font/vt_font_default.c optional vt dev/vt/font/vt_mouse_cursor.c optional vt dev/vt/hw/efifb/efifb.c optional vt_efifb dev/vt/hw/fb/vt_fb.c optional vt dev/vt/hw/vga/vt_vga.c optional vt vt_vga dev/vt/logo/logo_freebsd.c optional vt splash dev/vt/logo/logo_beastie.c optional vt splash dev/vt/vt_buf.c optional vt dev/vt/vt_consolectl.c optional vt dev/vt/vt_core.c optional vt dev/vt/vt_cpulogos.c optional vt splash dev/vt/vt_font.c optional vt dev/vt/vt_sysmouse.c optional vt dev/vte/if_vte.c optional vte pci dev/vx/if_vx.c optional vx dev/vx/if_vx_eisa.c optional vx eisa dev/vx/if_vx_pci.c optional vx pci dev/vxge/vxge.c optional vxge dev/vxge/vxgehal/vxgehal-ifmsg.c optional vxge dev/vxge/vxgehal/vxgehal-mrpcim.c optional vxge dev/vxge/vxgehal/vxge-queue.c optional vxge dev/vxge/vxgehal/vxgehal-ring.c optional vxge dev/vxge/vxgehal/vxgehal-swapper.c optional vxge dev/vxge/vxgehal/vxgehal-mgmt.c optional vxge dev/vxge/vxgehal/vxgehal-srpcim.c optional vxge dev/vxge/vxgehal/vxgehal-config.c optional vxge dev/vxge/vxgehal/vxgehal-blockpool.c optional vxge dev/vxge/vxgehal/vxgehal-doorbells.c optional vxge dev/vxge/vxgehal/vxgehal-mgmtaux.c optional vxge dev/vxge/vxgehal/vxgehal-device.c optional vxge dev/vxge/vxgehal/vxgehal-mm.c optional vxge dev/vxge/vxgehal/vxgehal-driver.c optional vxge dev/vxge/vxgehal/vxgehal-virtualpath.c optional vxge dev/vxge/vxgehal/vxgehal-channel.c optional vxge dev/vxge/vxgehal/vxgehal-fifo.c optional vxge dev/watchdog/watchdog.c standard dev/wb/if_wb.c optional wb pci dev/wi/if_wi.c optional wi dev/wi/if_wi_pccard.c optional wi pccard dev/wi/if_wi_pci.c optional wi pci dev/wpi/if_wpi.c optional wpi pci wpifw.c optional wpifw \ compile-with "${AWK} -f $S/tools/fw_stub.awk wpi.fw:wpifw:153229 -mwpi -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "wpifw.c" wpifw.fwo optional wpifw \ dependency "wpi.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "wpifw.fwo" wpi.fw optional wpifw \ dependency "$S/contrib/dev/wpi/iwlwifi-3945-15.32.2.9.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "wpi.fw" dev/xdma/xdma.c optional xdma dev/xdma/xdma_if.m optional xdma dev/xdma/xdma_fdt_test.c optional xdma xdma_test fdt dev/xe/if_xe.c optional xe dev/xe/if_xe_pccard.c optional xe pccard dev/xen/balloon/balloon.c optional xenhvm dev/xen/blkfront/blkfront.c optional xenhvm dev/xen/blkback/blkback.c optional xenhvm dev/xen/console/xen_console.c optional xenhvm dev/xen/control/control.c optional xenhvm dev/xen/grant_table/grant_table.c optional xenhvm dev/xen/netback/netback.c optional xenhvm dev/xen/netfront/netfront.c optional xenhvm dev/xen/xenpci/xenpci.c optional xenpci dev/xen/timer/timer.c optional xenhvm dev/xen/pvcpu/pvcpu.c optional xenhvm dev/xen/xenstore/xenstore.c optional xenhvm dev/xen/xenstore/xenstore_dev.c optional xenhvm dev/xen/xenstore/xenstored_dev.c optional xenhvm dev/xen/evtchn/evtchn_dev.c optional xenhvm dev/xen/privcmd/privcmd.c optional xenhvm dev/xen/gntdev/gntdev.c optional xenhvm dev/xen/debug/debug.c optional xenhvm dev/xl/if_xl.c optional xl pci dev/xl/xlphy.c optional xl pci fs/autofs/autofs.c optional autofs fs/autofs/autofs_vfsops.c optional autofs fs/autofs/autofs_vnops.c optional autofs fs/deadfs/dead_vnops.c standard fs/devfs/devfs_devs.c standard fs/devfs/devfs_dir.c standard fs/devfs/devfs_rule.c standard fs/devfs/devfs_vfsops.c standard fs/devfs/devfs_vnops.c standard fs/fdescfs/fdesc_vfsops.c optional fdescfs fs/fdescfs/fdesc_vnops.c optional fdescfs fs/fifofs/fifo_vnops.c standard fs/cuse/cuse.c optional cuse fs/fuse/fuse_device.c optional fuse fs/fuse/fuse_file.c optional fuse fs/fuse/fuse_internal.c optional fuse fs/fuse/fuse_io.c optional fuse fs/fuse/fuse_ipc.c optional fuse fs/fuse/fuse_main.c optional fuse fs/fuse/fuse_node.c optional fuse fs/fuse/fuse_vfsops.c optional fuse fs/fuse/fuse_vnops.c optional fuse fs/msdosfs/msdosfs_conv.c optional msdosfs fs/msdosfs/msdosfs_denode.c optional msdosfs fs/msdosfs/msdosfs_fat.c optional msdosfs fs/msdosfs/msdosfs_fileno.c optional msdosfs fs/msdosfs/msdosfs_iconv.c optional msdosfs_iconv fs/msdosfs/msdosfs_lookup.c optional msdosfs fs/msdosfs/msdosfs_vfsops.c optional msdosfs fs/msdosfs/msdosfs_vnops.c optional msdosfs fs/nandfs/bmap.c optional nandfs fs/nandfs/nandfs_alloc.c optional nandfs fs/nandfs/nandfs_bmap.c optional nandfs fs/nandfs/nandfs_buffer.c optional nandfs fs/nandfs/nandfs_cleaner.c optional nandfs fs/nandfs/nandfs_cpfile.c optional nandfs fs/nandfs/nandfs_dat.c optional nandfs fs/nandfs/nandfs_dir.c optional nandfs fs/nandfs/nandfs_ifile.c optional nandfs fs/nandfs/nandfs_segment.c optional nandfs fs/nandfs/nandfs_subr.c optional nandfs fs/nandfs/nandfs_sufile.c optional nandfs fs/nandfs/nandfs_vfsops.c optional nandfs fs/nandfs/nandfs_vnops.c optional nandfs fs/nfs/nfs_commonkrpc.c optional nfscl | nfsd fs/nfs/nfs_commonsubs.c optional nfscl | nfsd fs/nfs/nfs_commonport.c optional nfscl | nfsd fs/nfs/nfs_commonacl.c optional nfscl | nfsd fs/nfsclient/nfs_clcomsubs.c optional nfscl fs/nfsclient/nfs_clsubs.c optional nfscl fs/nfsclient/nfs_clstate.c optional nfscl fs/nfsclient/nfs_clkrpc.c optional nfscl fs/nfsclient/nfs_clrpcops.c optional nfscl fs/nfsclient/nfs_clvnops.c optional nfscl fs/nfsclient/nfs_clnode.c optional nfscl fs/nfsclient/nfs_clvfsops.c optional nfscl fs/nfsclient/nfs_clport.c optional nfscl fs/nfsclient/nfs_clbio.c optional nfscl fs/nfsclient/nfs_clnfsiod.c optional nfscl fs/nfsserver/nfs_fha_new.c optional nfsd inet fs/nfsserver/nfs_nfsdsocket.c optional nfsd inet fs/nfsserver/nfs_nfsdsubs.c optional nfsd inet fs/nfsserver/nfs_nfsdstate.c optional nfsd inet fs/nfsserver/nfs_nfsdkrpc.c optional nfsd inet fs/nfsserver/nfs_nfsdserv.c optional nfsd inet fs/nfsserver/nfs_nfsdport.c optional nfsd inet fs/nfsserver/nfs_nfsdcache.c optional nfsd inet fs/nullfs/null_subr.c optional nullfs fs/nullfs/null_vfsops.c optional nullfs fs/nullfs/null_vnops.c optional nullfs fs/procfs/procfs.c optional procfs fs/procfs/procfs_ctl.c optional procfs fs/procfs/procfs_dbregs.c optional procfs fs/procfs/procfs_fpregs.c optional procfs fs/procfs/procfs_ioctl.c optional procfs fs/procfs/procfs_map.c optional procfs fs/procfs/procfs_mem.c optional procfs fs/procfs/procfs_note.c optional procfs fs/procfs/procfs_osrel.c optional procfs fs/procfs/procfs_regs.c optional procfs fs/procfs/procfs_rlimit.c optional procfs fs/procfs/procfs_status.c optional procfs fs/procfs/procfs_type.c optional procfs fs/pseudofs/pseudofs.c optional pseudofs fs/pseudofs/pseudofs_fileno.c optional pseudofs fs/pseudofs/pseudofs_vncache.c optional pseudofs fs/pseudofs/pseudofs_vnops.c optional pseudofs fs/smbfs/smbfs_io.c optional smbfs fs/smbfs/smbfs_node.c optional smbfs fs/smbfs/smbfs_smb.c optional smbfs fs/smbfs/smbfs_subr.c optional smbfs fs/smbfs/smbfs_vfsops.c optional smbfs fs/smbfs/smbfs_vnops.c optional smbfs fs/udf/osta.c optional udf fs/udf/udf_iconv.c optional udf_iconv fs/udf/udf_vfsops.c optional udf fs/udf/udf_vnops.c optional udf fs/unionfs/union_subr.c optional unionfs fs/unionfs/union_vfsops.c optional unionfs fs/unionfs/union_vnops.c optional unionfs fs/tmpfs/tmpfs_vnops.c optional tmpfs fs/tmpfs/tmpfs_fifoops.c optional tmpfs fs/tmpfs/tmpfs_vfsops.c optional tmpfs fs/tmpfs/tmpfs_subr.c optional tmpfs gdb/gdb_cons.c optional gdb gdb/gdb_main.c optional gdb gdb/gdb_packet.c optional gdb geom/bde/g_bde.c optional geom_bde geom/bde/g_bde_crypt.c optional geom_bde geom/bde/g_bde_lock.c optional geom_bde geom/bde/g_bde_work.c optional geom_bde geom/cache/g_cache.c optional geom_cache geom/concat/g_concat.c optional geom_concat geom/eli/g_eli.c optional geom_eli geom/eli/g_eli_crypto.c optional geom_eli geom/eli/g_eli_ctl.c optional geom_eli geom/eli/g_eli_hmac.c optional geom_eli geom/eli/g_eli_integrity.c optional geom_eli geom/eli/g_eli_key.c optional geom_eli geom/eli/g_eli_key_cache.c optional geom_eli geom/eli/g_eli_privacy.c optional geom_eli geom/eli/pkcs5v2.c optional geom_eli geom/gate/g_gate.c optional geom_gate geom/geom_aes.c optional geom_aes geom/geom_bsd.c optional geom_bsd geom/geom_bsd_enc.c optional geom_bsd | geom_part_bsd geom/geom_ccd.c optional ccd | geom_ccd geom/geom_ctl.c standard geom/geom_dev.c standard geom/geom_disk.c standard geom/geom_dump.c standard geom/geom_event.c standard geom/geom_fox.c optional geom_fox geom/geom_flashmap.c optional fdt cfi | fdt nand | fdt mx25l geom/geom_io.c standard geom/geom_kern.c standard geom/geom_map.c optional geom_map geom/geom_mbr.c optional geom_mbr geom/geom_mbr_enc.c optional geom_mbr geom/geom_redboot.c optional geom_redboot geom/geom_slice.c standard geom/geom_subr.c standard geom/geom_sunlabel.c optional geom_sunlabel geom/geom_sunlabel_enc.c optional geom_sunlabel geom/geom_vfs.c standard geom/geom_vol_ffs.c optional geom_vol geom/journal/g_journal.c optional geom_journal geom/journal/g_journal_ufs.c optional geom_journal geom/label/g_label.c optional geom_label | geom_label_gpt geom/label/g_label_ext2fs.c optional geom_label geom/label/g_label_iso9660.c optional geom_label geom/label/g_label_msdosfs.c optional geom_label geom/label/g_label_ntfs.c optional geom_label geom/label/g_label_reiserfs.c optional geom_label geom/label/g_label_ufs.c optional geom_label geom/label/g_label_gpt.c optional geom_label | geom_label_gpt geom/label/g_label_disk_ident.c optional geom_label geom/linux_lvm/g_linux_lvm.c optional geom_linux_lvm geom/mirror/g_mirror.c optional geom_mirror geom/mirror/g_mirror_ctl.c optional geom_mirror geom/mountver/g_mountver.c optional geom_mountver geom/multipath/g_multipath.c optional geom_multipath geom/nop/g_nop.c optional geom_nop geom/part/g_part.c standard geom/part/g_part_if.m standard geom/part/g_part_apm.c optional geom_part_apm geom/part/g_part_bsd.c optional geom_part_bsd geom/part/g_part_bsd64.c optional geom_part_bsd64 geom/part/g_part_ebr.c optional geom_part_ebr geom/part/g_part_gpt.c optional geom_part_gpt geom/part/g_part_ldm.c optional geom_part_ldm geom/part/g_part_mbr.c optional geom_part_mbr geom/part/g_part_vtoc8.c optional geom_part_vtoc8 geom/raid/g_raid.c optional geom_raid geom/raid/g_raid_ctl.c optional geom_raid geom/raid/g_raid_md_if.m optional geom_raid geom/raid/g_raid_tr_if.m optional geom_raid geom/raid/md_ddf.c optional geom_raid geom/raid/md_intel.c optional geom_raid geom/raid/md_jmicron.c optional geom_raid geom/raid/md_nvidia.c optional geom_raid geom/raid/md_promise.c optional geom_raid geom/raid/md_sii.c optional geom_raid geom/raid/tr_concat.c optional geom_raid geom/raid/tr_raid0.c optional geom_raid geom/raid/tr_raid1.c optional geom_raid geom/raid/tr_raid1e.c optional geom_raid geom/raid/tr_raid5.c optional geom_raid geom/raid3/g_raid3.c optional geom_raid3 geom/raid3/g_raid3_ctl.c optional geom_raid3 geom/shsec/g_shsec.c optional geom_shsec geom/stripe/g_stripe.c optional geom_stripe contrib/xz-embedded/freebsd/xz_malloc.c \ optional xz_embedded | geom_uzip \ compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/" contrib/xz-embedded/linux/lib/xz/xz_crc32.c \ optional xz_embedded | geom_uzip \ compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/" contrib/xz-embedded/linux/lib/xz/xz_dec_bcj.c \ optional xz_embedded | geom_uzip \ compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/" contrib/xz-embedded/linux/lib/xz/xz_dec_lzma2.c \ optional xz_embedded | geom_uzip \ compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/" contrib/xz-embedded/linux/lib/xz/xz_dec_stream.c \ optional xz_embedded | geom_uzip \ compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/" geom/uzip/g_uzip.c optional geom_uzip geom/uzip/g_uzip_lzma.c optional geom_uzip geom/uzip/g_uzip_wrkthr.c optional geom_uzip geom/uzip/g_uzip_zlib.c optional geom_uzip geom/vinum/geom_vinum.c optional geom_vinum geom/vinum/geom_vinum_create.c optional geom_vinum geom/vinum/geom_vinum_drive.c optional geom_vinum geom/vinum/geom_vinum_plex.c optional geom_vinum geom/vinum/geom_vinum_volume.c optional geom_vinum geom/vinum/geom_vinum_subr.c optional geom_vinum geom/vinum/geom_vinum_raid5.c optional geom_vinum geom/vinum/geom_vinum_share.c optional geom_vinum geom/vinum/geom_vinum_list.c optional geom_vinum geom/vinum/geom_vinum_rm.c optional geom_vinum geom/vinum/geom_vinum_init.c optional geom_vinum geom/vinum/geom_vinum_state.c optional geom_vinum geom/vinum/geom_vinum_rename.c optional geom_vinum geom/vinum/geom_vinum_move.c optional geom_vinum geom/vinum/geom_vinum_events.c optional geom_vinum geom/virstor/binstream.c optional geom_virstor geom/virstor/g_virstor.c optional geom_virstor geom/virstor/g_virstor_md.c optional geom_virstor geom/zero/g_zero.c optional geom_zero fs/ext2fs/ext2_alloc.c optional ext2fs fs/ext2fs/ext2_balloc.c optional ext2fs fs/ext2fs/ext2_bmap.c optional ext2fs fs/ext2fs/ext2_extents.c optional ext2fs fs/ext2fs/ext2_inode.c optional ext2fs fs/ext2fs/ext2_inode_cnv.c optional ext2fs fs/ext2fs/ext2_hash.c optional ext2fs fs/ext2fs/ext2_htree.c optional ext2fs fs/ext2fs/ext2_lookup.c optional ext2fs fs/ext2fs/ext2_subr.c optional ext2fs fs/ext2fs/ext2_vfsops.c optional ext2fs fs/ext2fs/ext2_vnops.c optional ext2fs # isa/isa_if.m standard isa/isa_common.c optional isa isa/isahint.c optional isa isa/pnp.c optional isa isapnp isa/pnpparse.c optional isa isapnp fs/cd9660/cd9660_bmap.c optional cd9660 fs/cd9660/cd9660_lookup.c optional cd9660 fs/cd9660/cd9660_node.c optional cd9660 fs/cd9660/cd9660_rrip.c optional cd9660 fs/cd9660/cd9660_util.c optional cd9660 fs/cd9660/cd9660_vfsops.c optional cd9660 fs/cd9660/cd9660_vnops.c optional cd9660 fs/cd9660/cd9660_iconv.c optional cd9660_iconv kern/bus_if.m standard kern/clock_if.m standard kern/cpufreq_if.m standard kern/device_if.m standard kern/imgact_binmisc.c optional imagact_binmisc kern/imgact_elf.c standard kern/imgact_elf32.c optional compat_freebsd32 kern/imgact_shell.c standard kern/inflate.c optional gzip kern/init_main.c standard kern/init_sysent.c standard kern/ksched.c optional _kposix_priority_scheduling kern/kern_acct.c standard kern/kern_alq.c optional alq kern/kern_clock.c standard kern/kern_condvar.c standard kern/kern_conf.c standard kern/kern_cons.c standard kern/kern_cpu.c standard kern/kern_cpuset.c standard kern/kern_context.c standard kern/kern_descrip.c standard kern/kern_dtrace.c optional kdtrace_hooks kern/kern_dump.c standard kern/kern_environment.c standard kern/kern_et.c standard kern/kern_event.c standard kern/kern_exec.c standard kern/kern_exit.c standard kern/kern_fail.c standard kern/kern_ffclock.c standard kern/kern_fork.c standard kern/kern_gzio.c optional gzio kern/kern_hhook.c standard kern/kern_idle.c standard kern/kern_intr.c standard kern/kern_jail.c standard kern/kern_khelp.c standard kern/kern_kthread.c standard kern/kern_ktr.c optional ktr kern/kern_ktrace.c standard kern/kern_linker.c standard kern/kern_lock.c standard kern/kern_lockf.c standard kern/kern_lockstat.c optional kdtrace_hooks kern/kern_loginclass.c standard kern/kern_malloc.c standard kern/kern_mbuf.c standard kern/kern_mib.c standard kern/kern_module.c standard kern/kern_mtxpool.c standard kern/kern_mutex.c standard kern/kern_ntptime.c standard kern/kern_numa.c standard kern/kern_osd.c standard kern/kern_physio.c standard kern/kern_pmc.c standard kern/kern_poll.c optional device_polling kern/kern_priv.c standard kern/kern_proc.c standard kern/kern_procctl.c standard kern/kern_prot.c standard kern/kern_racct.c standard kern/kern_rangelock.c standard kern/kern_rctl.c standard kern/kern_resource.c standard kern/kern_rmlock.c standard kern/kern_rwlock.c standard kern/kern_sdt.c optional kdtrace_hooks kern/kern_sema.c standard kern/kern_sendfile.c standard kern/kern_sharedpage.c standard kern/kern_shutdown.c standard kern/kern_sig.c standard kern/kern_switch.c standard kern/kern_sx.c standard kern/kern_synch.c standard kern/kern_syscalls.c standard kern/kern_sysctl.c standard kern/kern_tc.c standard kern/kern_thr.c standard kern/kern_thread.c standard kern/kern_time.c standard kern/kern_timeout.c standard kern/kern_umtx.c standard kern/kern_uuid.c standard kern/kern_xxx.c standard kern/link_elf.c standard kern/linker_if.m standard kern/md4c.c optional netsmb kern/md5c.c standard kern/p1003_1b.c standard kern/posix4_mib.c standard kern/sched_4bsd.c optional sched_4bsd kern/sched_ule.c optional sched_ule kern/serdev_if.m standard kern/stack_protector.c standard \ compile-with "${NORMAL_C:N-fstack-protector*}" kern/subr_acl_nfs4.c optional ufs_acl | zfs kern/subr_acl_posix1e.c optional ufs_acl kern/subr_autoconf.c standard kern/subr_blist.c standard kern/subr_bus.c standard kern/subr_bus_dma.c standard kern/subr_bufring.c standard kern/subr_capability.c standard kern/subr_clock.c standard kern/subr_counter.c standard kern/subr_devstat.c standard kern/subr_disk.c standard kern/subr_eventhandler.c standard kern/subr_fattime.c standard kern/subr_firmware.c optional firmware kern/subr_gtaskqueue.c standard kern/subr_hash.c standard kern/subr_hints.c standard kern/subr_kdb.c standard kern/subr_kobj.c standard kern/subr_lock.c standard kern/subr_log.c standard kern/subr_mbpool.c optional libmbpool kern/subr_mchain.c optional libmchain kern/subr_module.c standard kern/subr_msgbuf.c standard kern/subr_param.c standard kern/subr_pcpu.c standard kern/subr_pctrie.c standard kern/subr_power.c standard kern/subr_prf.c standard kern/subr_prof.c standard kern/subr_rman.c standard kern/subr_rtc.c standard kern/subr_sbuf.c standard kern/subr_scanf.c standard kern/subr_sglist.c standard kern/subr_sleepqueue.c standard kern/subr_smp.c standard kern/subr_stack.c optional ddb | stack | ktr kern/subr_taskqueue.c standard kern/subr_terminal.c optional vt kern/subr_trap.c standard kern/subr_turnstile.c standard kern/subr_uio.c standard kern/subr_unit.c standard kern/subr_vmem.c standard kern/subr_witness.c optional witness kern/sys_capability.c standard kern/sys_generic.c standard kern/sys_pipe.c standard kern/sys_procdesc.c standard kern/sys_process.c standard kern/sys_socket.c standard kern/syscalls.c standard kern/sysv_ipc.c standard kern/sysv_msg.c optional sysvmsg kern/sysv_sem.c optional sysvsem kern/sysv_shm.c optional sysvshm kern/tty.c standard kern/tty_compat.c optional compat_43tty kern/tty_info.c standard kern/tty_inq.c standard kern/tty_outq.c standard kern/tty_pts.c standard kern/tty_tty.c standard kern/tty_ttydisc.c standard kern/uipc_accf.c standard kern/uipc_debug.c optional ddb kern/uipc_domain.c standard kern/uipc_mbuf.c standard kern/uipc_mbuf2.c standard kern/uipc_mbufhash.c standard kern/uipc_mqueue.c optional p1003_1b_mqueue kern/uipc_sem.c optional p1003_1b_semaphores kern/uipc_shm.c standard kern/uipc_sockbuf.c standard kern/uipc_socket.c standard kern/uipc_syscalls.c standard kern/uipc_usrreq.c standard kern/vfs_acl.c standard kern/vfs_aio.c standard kern/vfs_bio.c standard kern/vfs_cache.c standard kern/vfs_cluster.c standard kern/vfs_default.c standard kern/vfs_export.c standard kern/vfs_extattr.c standard kern/vfs_hash.c standard kern/vfs_init.c standard kern/vfs_lookup.c standard kern/vfs_mount.c standard kern/vfs_mountroot.c standard kern/vfs_subr.c standard kern/vfs_syscalls.c standard kern/vfs_vnops.c standard # # Kernel GSS-API # gssd.h optional kgssapi \ dependency "$S/kgssapi/gssd.x" \ compile-with "RPCGEN_CPP='${CPP}' rpcgen -hM $S/kgssapi/gssd.x | grep -v pthread.h > gssd.h" \ no-obj no-implicit-rule before-depend local \ clean "gssd.h" gssd_xdr.c optional kgssapi \ dependency "$S/kgssapi/gssd.x gssd.h" \ compile-with "RPCGEN_CPP='${CPP}' rpcgen -c $S/kgssapi/gssd.x -o gssd_xdr.c" \ no-implicit-rule before-depend local \ clean "gssd_xdr.c" gssd_clnt.c optional kgssapi \ dependency "$S/kgssapi/gssd.x gssd.h" \ compile-with "RPCGEN_CPP='${CPP}' rpcgen -lM $S/kgssapi/gssd.x | grep -v string.h > gssd_clnt.c" \ no-implicit-rule before-depend local \ clean "gssd_clnt.c" kgssapi/gss_accept_sec_context.c optional kgssapi kgssapi/gss_add_oid_set_member.c optional kgssapi kgssapi/gss_acquire_cred.c optional kgssapi kgssapi/gss_canonicalize_name.c optional kgssapi kgssapi/gss_create_empty_oid_set.c optional kgssapi kgssapi/gss_delete_sec_context.c optional kgssapi kgssapi/gss_display_status.c optional kgssapi kgssapi/gss_export_name.c optional kgssapi kgssapi/gss_get_mic.c optional kgssapi kgssapi/gss_init_sec_context.c optional kgssapi kgssapi/gss_impl.c optional kgssapi kgssapi/gss_import_name.c optional kgssapi kgssapi/gss_names.c optional kgssapi kgssapi/gss_pname_to_uid.c optional kgssapi kgssapi/gss_release_buffer.c optional kgssapi kgssapi/gss_release_cred.c optional kgssapi kgssapi/gss_release_name.c optional kgssapi kgssapi/gss_release_oid_set.c optional kgssapi kgssapi/gss_set_cred_option.c optional kgssapi kgssapi/gss_test_oid_set_member.c optional kgssapi kgssapi/gss_unwrap.c optional kgssapi kgssapi/gss_verify_mic.c optional kgssapi kgssapi/gss_wrap.c optional kgssapi kgssapi/gss_wrap_size_limit.c optional kgssapi kgssapi/gssd_prot.c optional kgssapi kgssapi/krb5/krb5_mech.c optional kgssapi kgssapi/krb5/kcrypto.c optional kgssapi kgssapi/krb5/kcrypto_aes.c optional kgssapi kgssapi/krb5/kcrypto_arcfour.c optional kgssapi kgssapi/krb5/kcrypto_des.c optional kgssapi kgssapi/krb5/kcrypto_des3.c optional kgssapi kgssapi/kgss_if.m optional kgssapi kgssapi/gsstest.c optional kgssapi_debug # These files in libkern/ are those needed by all architectures. Some # of the files in libkern/ are only needed on some architectures, e.g., # libkern/divdi3.c is needed by i386 but not alpha. Also, some of these # routines may be optimized for a particular platform. In either case, # the file should be moved to conf/files. from here. # libkern/arc4random.c standard libkern/asprintf.c standard libkern/bcd.c standard libkern/bsearch.c standard libkern/crc32.c standard libkern/explicit_bzero.c standard libkern/fnmatch.c standard libkern/iconv.c optional libiconv libkern/iconv_converter_if.m optional libiconv libkern/iconv_ucs.c optional libiconv libkern/iconv_xlat.c optional libiconv libkern/iconv_xlat16.c optional libiconv libkern/inet_aton.c standard libkern/inet_ntoa.c standard libkern/inet_ntop.c standard libkern/inet_pton.c standard libkern/jenkins_hash.c standard libkern/murmur3_32.c standard libkern/mcount.c optional profiling-routine libkern/memcchr.c standard libkern/memchr.c standard libkern/memcmp.c standard libkern/memmem.c optional gdb libkern/qsort.c standard libkern/qsort_r.c standard libkern/random.c standard libkern/scanc.c standard libkern/strcasecmp.c standard libkern/strcat.c standard libkern/strchr.c standard libkern/strcmp.c standard libkern/strcpy.c standard libkern/strcspn.c standard libkern/strdup.c standard libkern/strndup.c standard libkern/strlcat.c standard libkern/strlcpy.c standard libkern/strlen.c standard libkern/strncat.c standard libkern/strncmp.c standard libkern/strncpy.c standard libkern/strnlen.c standard libkern/strrchr.c standard libkern/strsep.c standard libkern/strspn.c standard libkern/strstr.c standard libkern/strtol.c standard libkern/strtoq.c standard libkern/strtoul.c standard libkern/strtouq.c standard libkern/strvalid.c standard libkern/timingsafe_bcmp.c standard libkern/zlib.c optional crypto | geom_uzip | ipsec | \ - mxge | netgraph_deflate | \ - ddb_ctf | gzio + ipsec_support | mxge | netgraph_deflate | ddb_ctf | gzio net/altq/altq_cbq.c optional altq net/altq/altq_cdnr.c optional altq net/altq/altq_codel.c optional altq net/altq/altq_hfsc.c optional altq net/altq/altq_fairq.c optional altq net/altq/altq_priq.c optional altq net/altq/altq_red.c optional altq net/altq/altq_rio.c optional altq net/altq/altq_rmclass.c optional altq net/altq/altq_subr.c optional altq net/bpf.c standard net/bpf_buffer.c optional bpf net/bpf_jitter.c optional bpf_jitter net/bpf_filter.c optional bpf | netgraph_bpf net/bpf_zerocopy.c optional bpf net/bridgestp.c optional bridge | if_bridge net/flowtable.c optional flowtable inet | flowtable inet6 net/ieee8023ad_lacp.c optional lagg net/if.c standard net/if_arcsubr.c optional arcnet net/if_atmsubr.c optional atm net/if_bridge.c optional bridge inet | if_bridge inet net/if_clone.c standard net/if_dead.c standard net/if_debug.c optional ddb net/if_disc.c optional disc net/if_edsc.c optional edsc net/if_enc.c optional enc inet | enc inet6 net/if_epair.c optional epair net/if_ethersubr.c optional ether net/if_fddisubr.c optional fddi net/if_fwsubr.c optional fwip net/if_gif.c optional gif inet | gif inet6 | \ netgraph_gif inet | netgraph_gif inet6 net/if_gre.c optional gre inet | gre inet6 +net/if_ipsec.c optional inet ipsec | inet6 ipsec net/if_iso88025subr.c optional token net/if_lagg.c optional lagg net/if_loop.c optional loop net/if_llatbl.c standard net/if_me.c optional me inet net/if_media.c standard net/if_mib.c standard net/if_spppfr.c optional sppp | netgraph_sppp net/if_spppsubr.c optional sppp | netgraph_sppp net/if_stf.c optional stf inet inet6 net/if_tun.c optional tun net/if_tap.c optional tap net/if_vlan.c optional vlan net/if_vxlan.c optional vxlan inet | vxlan inet6 net/ifdi_if.m optional ether pci net/iflib.c optional ether pci net/mp_ring.c optional ether net/mppcc.c optional netgraph_mppc_compression net/mppcd.c optional netgraph_mppc_compression net/netisr.c standard net/pfil.c optional ether | inet net/radix.c standard net/radix_mpath.c standard net/raw_cb.c standard net/raw_usrreq.c standard net/route.c standard net/rss_config.c optional inet rss | inet6 rss net/rtsock.c standard net/slcompress.c optional netgraph_vjc | sppp | \ netgraph_sppp net/toeplitz.c optional inet rss | inet6 rss net/vnet.c optional vimage net80211/ieee80211.c optional wlan net80211/ieee80211_acl.c optional wlan wlan_acl net80211/ieee80211_action.c optional wlan net80211/ieee80211_ageq.c optional wlan net80211/ieee80211_adhoc.c optional wlan \ compile-with "${NORMAL_C} -Wno-unused-function" net80211/ieee80211_ageq.c optional wlan net80211/ieee80211_amrr.c optional wlan | wlan_amrr net80211/ieee80211_crypto.c optional wlan \ compile-with "${NORMAL_C} -Wno-unused-function" net80211/ieee80211_crypto_ccmp.c optional wlan wlan_ccmp net80211/ieee80211_crypto_none.c optional wlan net80211/ieee80211_crypto_tkip.c optional wlan wlan_tkip net80211/ieee80211_crypto_wep.c optional wlan wlan_wep net80211/ieee80211_ddb.c optional wlan ddb net80211/ieee80211_dfs.c optional wlan net80211/ieee80211_freebsd.c optional wlan net80211/ieee80211_hostap.c optional wlan \ compile-with "${NORMAL_C} -Wno-unused-function" net80211/ieee80211_ht.c optional wlan net80211/ieee80211_hwmp.c optional wlan ieee80211_support_mesh net80211/ieee80211_input.c optional wlan net80211/ieee80211_ioctl.c optional wlan net80211/ieee80211_mesh.c optional wlan ieee80211_support_mesh \ compile-with "${NORMAL_C} -Wno-unused-function" net80211/ieee80211_monitor.c optional wlan net80211/ieee80211_node.c optional wlan net80211/ieee80211_output.c optional wlan net80211/ieee80211_phy.c optional wlan net80211/ieee80211_power.c optional wlan net80211/ieee80211_proto.c optional wlan net80211/ieee80211_radiotap.c optional wlan net80211/ieee80211_ratectl.c optional wlan net80211/ieee80211_ratectl_none.c optional wlan net80211/ieee80211_regdomain.c optional wlan net80211/ieee80211_rssadapt.c optional wlan wlan_rssadapt net80211/ieee80211_scan.c optional wlan net80211/ieee80211_scan_sta.c optional wlan net80211/ieee80211_sta.c optional wlan \ compile-with "${NORMAL_C} -Wno-unused-function" net80211/ieee80211_superg.c optional wlan ieee80211_support_superg net80211/ieee80211_scan_sw.c optional wlan net80211/ieee80211_tdma.c optional wlan ieee80211_support_tdma net80211/ieee80211_vht.c optional wlan net80211/ieee80211_wds.c optional wlan net80211/ieee80211_xauth.c optional wlan wlan_xauth net80211/ieee80211_alq.c optional wlan ieee80211_alq netgraph/atm/ccatm/ng_ccatm.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" netgraph/atm/ng_atm.c optional ngatm_atm netgraph/atm/ngatmbase.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" netgraph/atm/sscfu/ng_sscfu.c optional ngatm_sscfu \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" netgraph/atm/sscop/ng_sscop.c optional ngatm_sscop \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" netgraph/atm/uni/ng_uni.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" netgraph/bluetooth/common/ng_bluetooth.c optional netgraph_bluetooth netgraph/bluetooth/drivers/bt3c/ng_bt3c_pccard.c optional netgraph_bluetooth_bt3c netgraph/bluetooth/drivers/h4/ng_h4.c optional netgraph_bluetooth_h4 netgraph/bluetooth/drivers/ubt/ng_ubt.c optional netgraph_bluetooth_ubt usb netgraph/bluetooth/drivers/ubtbcmfw/ubtbcmfw.c optional netgraph_bluetooth_ubtbcmfw usb netgraph/bluetooth/hci/ng_hci_cmds.c optional netgraph_bluetooth_hci netgraph/bluetooth/hci/ng_hci_evnt.c optional netgraph_bluetooth_hci netgraph/bluetooth/hci/ng_hci_main.c optional netgraph_bluetooth_hci netgraph/bluetooth/hci/ng_hci_misc.c optional netgraph_bluetooth_hci netgraph/bluetooth/hci/ng_hci_ulpi.c optional netgraph_bluetooth_hci netgraph/bluetooth/l2cap/ng_l2cap_cmds.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/l2cap/ng_l2cap_evnt.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/l2cap/ng_l2cap_llpi.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/l2cap/ng_l2cap_main.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/l2cap/ng_l2cap_misc.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/l2cap/ng_l2cap_ulpi.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/socket/ng_btsocket.c optional netgraph_bluetooth_socket netgraph/bluetooth/socket/ng_btsocket_hci_raw.c optional netgraph_bluetooth_socket netgraph/bluetooth/socket/ng_btsocket_l2cap.c optional netgraph_bluetooth_socket netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c optional netgraph_bluetooth_socket netgraph/bluetooth/socket/ng_btsocket_rfcomm.c optional netgraph_bluetooth_socket netgraph/bluetooth/socket/ng_btsocket_sco.c optional netgraph_bluetooth_socket netgraph/netflow/netflow.c optional netgraph_netflow netgraph/netflow/netflow_v9.c optional netgraph_netflow netgraph/netflow/ng_netflow.c optional netgraph_netflow netgraph/ng_UI.c optional netgraph_UI netgraph/ng_async.c optional netgraph_async netgraph/ng_atmllc.c optional netgraph_atmllc netgraph/ng_base.c optional netgraph netgraph/ng_bpf.c optional netgraph_bpf netgraph/ng_bridge.c optional netgraph_bridge netgraph/ng_car.c optional netgraph_car netgraph/ng_cisco.c optional netgraph_cisco netgraph/ng_deflate.c optional netgraph_deflate netgraph/ng_device.c optional netgraph_device netgraph/ng_echo.c optional netgraph_echo netgraph/ng_eiface.c optional netgraph_eiface netgraph/ng_ether.c optional netgraph_ether netgraph/ng_ether_echo.c optional netgraph_ether_echo netgraph/ng_frame_relay.c optional netgraph_frame_relay netgraph/ng_gif.c optional netgraph_gif inet6 | netgraph_gif inet netgraph/ng_gif_demux.c optional netgraph_gif_demux netgraph/ng_hole.c optional netgraph_hole netgraph/ng_iface.c optional netgraph_iface netgraph/ng_ip_input.c optional netgraph_ip_input netgraph/ng_ipfw.c optional netgraph_ipfw inet ipfirewall netgraph/ng_ksocket.c optional netgraph_ksocket netgraph/ng_l2tp.c optional netgraph_l2tp netgraph/ng_lmi.c optional netgraph_lmi netgraph/ng_mppc.c optional netgraph_mppc_compression | \ netgraph_mppc_encryption netgraph/ng_nat.c optional netgraph_nat inet libalias netgraph/ng_one2many.c optional netgraph_one2many netgraph/ng_parse.c optional netgraph netgraph/ng_patch.c optional netgraph_patch netgraph/ng_pipe.c optional netgraph_pipe netgraph/ng_ppp.c optional netgraph_ppp netgraph/ng_pppoe.c optional netgraph_pppoe netgraph/ng_pptpgre.c optional netgraph_pptpgre netgraph/ng_pred1.c optional netgraph_pred1 netgraph/ng_rfc1490.c optional netgraph_rfc1490 netgraph/ng_socket.c optional netgraph_socket netgraph/ng_split.c optional netgraph_split netgraph/ng_sppp.c optional netgraph_sppp netgraph/ng_tag.c optional netgraph_tag netgraph/ng_tcpmss.c optional netgraph_tcpmss netgraph/ng_tee.c optional netgraph_tee netgraph/ng_tty.c optional netgraph_tty netgraph/ng_vjc.c optional netgraph_vjc netgraph/ng_vlan.c optional netgraph_vlan netinet/accf_data.c optional accept_filter_data inet netinet/accf_dns.c optional accept_filter_dns inet netinet/accf_http.c optional accept_filter_http inet netinet/if_atm.c optional atm netinet/if_ether.c optional inet ether netinet/igmp.c optional inet netinet/in.c optional inet netinet/in_debug.c optional inet ddb netinet/in_kdtrace.c optional inet | inet6 netinet/ip_carp.c optional inet carp | inet6 carp netinet/in_fib.c optional inet netinet/in_gif.c optional gif inet | netgraph_gif inet netinet/ip_gre.c optional gre inet netinet/ip_id.c optional inet netinet/in_jail.c optional inet netinet/in_mcast.c optional inet netinet/in_pcb.c optional inet | inet6 netinet/in_pcbgroup.c optional inet pcbgroup | inet6 pcbgroup netinet/in_prot.c optional inet | inet6 netinet/in_proto.c optional inet | inet6 netinet/in_rmx.c optional inet netinet/in_rss.c optional inet rss netinet/ip_divert.c optional inet ipdivert ipfirewall netinet/ip_ecn.c optional inet | inet6 netinet/ip_encap.c optional inet | inet6 netinet/ip_fastfwd.c optional inet netinet/ip_icmp.c optional inet | inet6 netinet/ip_input.c optional inet -netinet/ip_ipsec.c optional inet ipsec netinet/ip_mroute.c optional mrouting inet netinet/ip_options.c optional inet netinet/ip_output.c optional inet netinet/ip_reass.c optional inet netinet/raw_ip.c optional inet | inet6 netinet/cc/cc.c optional inet | inet6 netinet/cc/cc_newreno.c optional inet | inet6 netinet/sctp_asconf.c optional inet sctp | inet6 sctp netinet/sctp_auth.c optional inet sctp | inet6 sctp netinet/sctp_bsd_addr.c optional inet sctp | inet6 sctp netinet/sctp_cc_functions.c optional inet sctp | inet6 sctp netinet/sctp_crc32.c optional inet sctp | inet6 sctp netinet/sctp_indata.c optional inet sctp | inet6 sctp netinet/sctp_input.c optional inet sctp | inet6 sctp netinet/sctp_output.c optional inet sctp | inet6 sctp netinet/sctp_pcb.c optional inet sctp | inet6 sctp netinet/sctp_peeloff.c optional inet sctp | inet6 sctp netinet/sctp_ss_functions.c optional inet sctp | inet6 sctp netinet/sctp_syscalls.c optional inet sctp | inet6 sctp netinet/sctp_sysctl.c optional inet sctp | inet6 sctp netinet/sctp_timer.c optional inet sctp | inet6 sctp netinet/sctp_usrreq.c optional inet sctp | inet6 sctp netinet/sctputil.c optional inet sctp | inet6 sctp netinet/siftr.c optional inet siftr alq | inet6 siftr alq netinet/tcp_debug.c optional tcpdebug netinet/tcp_fastopen.c optional inet tcp_rfc7413 | inet6 tcp_rfc7413 netinet/tcp_hostcache.c optional inet | inet6 netinet/tcp_input.c optional inet | inet6 netinet/tcp_lro.c optional inet | inet6 netinet/tcp_output.c optional inet | inet6 netinet/tcp_offload.c optional tcp_offload inet | tcp_offload inet6 netinet/tcp_pcap.c optional inet tcppcap | inet6 tcppcap netinet/tcp_reass.c optional inet | inet6 netinet/tcp_sack.c optional inet | inet6 netinet/tcp_subr.c optional inet | inet6 netinet/tcp_syncache.c optional inet | inet6 netinet/tcp_timer.c optional inet | inet6 netinet/tcp_timewait.c optional inet | inet6 netinet/tcp_usrreq.c optional inet | inet6 netinet/udp_usrreq.c optional inet | inet6 netinet/libalias/alias.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_db.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_mod.c optional libalias | netgraph_nat netinet/libalias/alias_proxy.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_util.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_sctp.c optional libalias inet | netgraph_nat inet netinet6/dest6.c optional inet6 netinet6/frag6.c optional inet6 netinet6/icmp6.c optional inet6 netinet6/in6.c optional inet6 netinet6/in6_cksum.c optional inet6 netinet6/in6_fib.c optional inet6 netinet6/in6_gif.c optional gif inet6 | netgraph_gif inet6 netinet6/in6_ifattach.c optional inet6 netinet6/in6_jail.c optional inet6 netinet6/in6_mcast.c optional inet6 netinet6/in6_pcb.c optional inet6 netinet6/in6_pcbgroup.c optional inet6 pcbgroup netinet6/in6_proto.c optional inet6 netinet6/in6_rmx.c optional inet6 netinet6/in6_rss.c optional inet6 rss netinet6/in6_src.c optional inet6 netinet6/ip6_fastfwd.c optional inet6 netinet6/ip6_forward.c optional inet6 netinet6/ip6_gre.c optional gre inet6 netinet6/ip6_id.c optional inet6 netinet6/ip6_input.c optional inet6 netinet6/ip6_mroute.c optional mrouting inet6 netinet6/ip6_output.c optional inet6 -netinet6/ip6_ipsec.c optional inet6 ipsec netinet6/mld6.c optional inet6 netinet6/nd6.c optional inet6 netinet6/nd6_nbr.c optional inet6 netinet6/nd6_rtr.c optional inet6 netinet6/raw_ip6.c optional inet6 netinet6/route6.c optional inet6 netinet6/scope6.c optional inet6 netinet6/sctp6_usrreq.c optional inet6 sctp netinet6/udp6_usrreq.c optional inet6 netipsec/ipsec.c optional ipsec inet | ipsec inet6 netipsec/ipsec_input.c optional ipsec inet | ipsec inet6 netipsec/ipsec_mbuf.c optional ipsec inet | ipsec inet6 +netipsec/ipsec_mod.c optional ipsec inet | ipsec inet6 netipsec/ipsec_output.c optional ipsec inet | ipsec inet6 -netipsec/key.c optional ipsec inet | ipsec inet6 -netipsec/key_debug.c optional ipsec inet | ipsec inet6 -netipsec/keysock.c optional ipsec inet | ipsec inet6 +netipsec/ipsec_pcb.c optional ipsec inet | ipsec inet6 | \ + ipsec_support inet | ipsec_support inet6 +netipsec/key.c optional ipsec inet | ipsec inet6 | \ + ipsec_support inet | ipsec_support inet6 +netipsec/key_debug.c optional ipsec inet | ipsec inet6 | \ + ipsec_support inet | ipsec_support inet6 +netipsec/keysock.c optional ipsec inet | ipsec inet6 | \ + ipsec_support inet | ipsec_support inet6 +netipsec/subr_ipsec.c optional ipsec inet | ipsec inet6 | \ + ipsec_support inet | ipsec_support inet6 +netipsec/udpencap.c optional ipsec inet netipsec/xform_ah.c optional ipsec inet | ipsec inet6 netipsec/xform_esp.c optional ipsec inet | ipsec inet6 netipsec/xform_ipcomp.c optional ipsec inet | ipsec inet6 netipsec/xform_tcp.c optional ipsec inet tcp_signature | \ - ipsec inet6 tcp_signature + ipsec inet6 tcp_signature | ipsec_support inet tcp_signature | \ + ipsec_support inet6 tcp_signature netnatm/natm.c optional natm netnatm/natm_pcb.c optional natm netnatm/natm_proto.c optional natm netpfil/ipfw/dn_aqm_codel.c optional inet dummynet netpfil/ipfw/dn_aqm_pie.c optional inet dummynet netpfil/ipfw/dn_heap.c optional inet dummynet netpfil/ipfw/dn_sched_fifo.c optional inet dummynet netpfil/ipfw/dn_sched_fq_codel.c optional inet dummynet netpfil/ipfw/dn_sched_fq_pie.c optional inet dummynet netpfil/ipfw/dn_sched_prio.c optional inet dummynet netpfil/ipfw/dn_sched_qfq.c optional inet dummynet netpfil/ipfw/dn_sched_rr.c optional inet dummynet netpfil/ipfw/dn_sched_wf2q.c optional inet dummynet netpfil/ipfw/ip_dummynet.c optional inet dummynet netpfil/ipfw/ip_dn_io.c optional inet dummynet netpfil/ipfw/ip_dn_glue.c optional inet dummynet netpfil/ipfw/ip_fw2.c optional inet ipfirewall netpfil/ipfw/ip_fw_bpf.c optional inet ipfirewall netpfil/ipfw/ip_fw_dynamic.c optional inet ipfirewall netpfil/ipfw/ip_fw_eaction.c optional inet ipfirewall netpfil/ipfw/ip_fw_log.c optional inet ipfirewall netpfil/ipfw/ip_fw_pfil.c optional inet ipfirewall netpfil/ipfw/ip_fw_sockopt.c optional inet ipfirewall netpfil/ipfw/ip_fw_table.c optional inet ipfirewall netpfil/ipfw/ip_fw_table_algo.c optional inet ipfirewall netpfil/ipfw/ip_fw_table_value.c optional inet ipfirewall netpfil/ipfw/ip_fw_iface.c optional inet ipfirewall netpfil/ipfw/ip_fw_nat.c optional inet ipfirewall_nat netpfil/ipfw/nat64/ip_fw_nat64.c optional inet inet6 ipfirewall \ ipfirewall_nat64 netpfil/ipfw/nat64/nat64lsn.c optional inet inet6 ipfirewall \ ipfirewall_nat64 netpfil/ipfw/nat64/nat64lsn_control.c optional inet inet6 ipfirewall \ ipfirewall_nat64 netpfil/ipfw/nat64/nat64stl.c optional inet inet6 ipfirewall \ ipfirewall_nat64 netpfil/ipfw/nat64/nat64stl_control.c optional inet inet6 ipfirewall \ ipfirewall_nat64 netpfil/ipfw/nat64/nat64_translate.c optional inet inet6 ipfirewall \ ipfirewall_nat64 netpfil/ipfw/nptv6/ip_fw_nptv6.c optional inet inet6 ipfirewall \ ipfirewall_nptv6 netpfil/ipfw/nptv6/nptv6.c optional inet inet6 ipfirewall \ ipfirewall_nptv6 netpfil/pf/if_pflog.c optional pflog pf inet netpfil/pf/if_pfsync.c optional pfsync pf inet netpfil/pf/pf.c optional pf inet netpfil/pf/pf_if.c optional pf inet netpfil/pf/pf_ioctl.c optional pf inet netpfil/pf/pf_lb.c optional pf inet netpfil/pf/pf_norm.c optional pf inet netpfil/pf/pf_osfp.c optional pf inet netpfil/pf/pf_ruleset.c optional pf inet netpfil/pf/pf_table.c optional pf inet netpfil/pf/in4_cksum.c optional pf inet netsmb/smb_conn.c optional netsmb netsmb/smb_crypt.c optional netsmb netsmb/smb_dev.c optional netsmb netsmb/smb_iod.c optional netsmb netsmb/smb_rq.c optional netsmb netsmb/smb_smb.c optional netsmb netsmb/smb_subr.c optional netsmb netsmb/smb_trantcp.c optional netsmb netsmb/smb_usr.c optional netsmb nfs/bootp_subr.c optional bootp nfscl nfs/krpc_subr.c optional bootp nfscl nfs/nfs_diskless.c optional nfscl nfs_root nfs/nfs_fha.c optional nfsd nfs/nfs_lock.c optional nfscl | nfslockd | nfsd nfs/nfs_nfssvc.c optional nfscl | nfsd nlm/nlm_advlock.c optional nfslockd | nfsd nlm/nlm_prot_clnt.c optional nfslockd | nfsd nlm/nlm_prot_impl.c optional nfslockd | nfsd nlm/nlm_prot_server.c optional nfslockd | nfsd nlm/nlm_prot_svc.c optional nfslockd | nfsd nlm/nlm_prot_xdr.c optional nfslockd | nfsd nlm/sm_inter_xdr.c optional nfslockd | nfsd # Linux Kernel Programming Interface compat/linuxkpi/common/src/linux_kmod.c optional compat_linuxkpi \ compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_compat.c optional compat_linuxkpi \ compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_pci.c optional compat_linuxkpi pci \ compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_idr.c optional compat_linuxkpi \ compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_radix.c optional compat_linuxkpi \ compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_usb.c optional compat_linuxkpi usb \ compile-with "${LINUXKPI_C}" # OpenFabrics Enterprise Distribution (Infiniband) ofed/drivers/infiniband/core/addr.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/agent.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/cache.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" # XXX Mad.c must be ordered before cm.c for sysinit sets to occur in # the correct order. ofed/drivers/infiniband/core/mad.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/cm.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/ -Wno-unused-function" ofed/drivers/infiniband/core/cma.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/device.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/fmr_pool.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/iwcm.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/mad_rmpp.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/multicast.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/packer.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/peer_mem.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/sa_query.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/smi.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/sysfs.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/ucm.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/ucma.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/ud_header.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/umem.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/user_mad.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/uverbs_cmd.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/uverbs_main.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/uverbs_marshall.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/verbs.c optional ofed \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c optional ipoib \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" #ofed/drivers/infiniband/ulp/ipoib/ipoib_fs.c optional ipoib \ # compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c optional ipoib \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c optional ipoib \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c optional ipoib \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c optional ipoib \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" #ofed/drivers/infiniband/ulp/ipoib/ipoib_vlan.c optional ipoib \ # compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c optional sdp inet \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/" ofed/drivers/infiniband/ulp/sdp/sdp_main.c optional sdp inet \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/" ofed/drivers/infiniband/ulp/sdp/sdp_rx.c optional sdp inet \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/" ofed/drivers/infiniband/ulp/sdp/sdp_cma.c optional sdp inet \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/" ofed/drivers/infiniband/ulp/sdp/sdp_tx.c optional sdp inet \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/" dev/mlx4/mlx4_ib/mlx4_ib_alias_GUID.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_mcg.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_sysfs.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_cm.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_ah.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_cq.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_doorbell.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_mad.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_main.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_exp.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_mr.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_qp.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_srq.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_ib/mlx4_ib_wc.c optional mlx4ib pci ofed \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_alloc.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_catas.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_cmd.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_cq.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_eq.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_fw.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_icm.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_intf.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_main.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_mcg.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_mr.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_pd.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_port.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_profile.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_qp.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_reset.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_sense.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_srq.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_resource_tracker.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_core/mlx4_sys_tune.c optional mlx4 pci \ compile-with "${OFED_C}" dev/mlx4/mlx4_en/mlx4_en_cq.c optional mlx4en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx4/mlx4_en/mlx4_en_main.c optional mlx4en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx4/mlx4_en/mlx4_en_netdev.c optional mlx4en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx4/mlx4_en/mlx4_en_port.c optional mlx4en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx4/mlx4_en/mlx4_en_resources.c optional mlx4en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx4/mlx4_en/mlx4_en_rx.c optional mlx4en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx4/mlx4_en/mlx4_en_tx.c optional mlx4en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_alloc.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_cmd.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_cq.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_diagnostics.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_eq.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_flow_table.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_fw.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_health.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_mad.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_main.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_mcg.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_mr.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_pagealloc.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_pd.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_port.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_qp.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_srq.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_transobj.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_uar.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_vport.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_wq.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_en/mlx5_en_ethtool.c optional mlx5en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx5/mlx5_en/mlx5_en_main.c optional mlx5en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx5/mlx5_en/mlx5_en_tx.c optional mlx5en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx5/mlx5_en/mlx5_en_flow_table.c optional mlx5en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx5/mlx5_en/mlx5_en_rx.c optional mlx5en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx5/mlx5_en/mlx5_en_txrx.c optional mlx5en pci inet inet6 \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_allocator.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_av.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_catas.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_cmd.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_cq.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_eq.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mad.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_main.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mcg.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_memfree.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mr.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_pd.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_profile.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_provider.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_qp.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_reset.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_srq.c optional mthca \ compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_uar.c optional mthca \ compile-with "${OFED_C}" # crypto support -opencrypto/cast.c optional crypto | ipsec -opencrypto/criov.c optional crypto | ipsec -opencrypto/crypto.c optional crypto | ipsec +opencrypto/cast.c optional crypto | ipsec | ipsec_support +opencrypto/criov.c optional crypto | ipsec | ipsec_support +opencrypto/crypto.c optional crypto | ipsec | ipsec_support opencrypto/cryptodev.c optional cryptodev -opencrypto/cryptodev_if.m optional crypto | ipsec -opencrypto/cryptosoft.c optional crypto | ipsec -opencrypto/cryptodeflate.c optional crypto | ipsec -opencrypto/gmac.c optional crypto | ipsec -opencrypto/gfmult.c optional crypto | ipsec -opencrypto/rmd160.c optional crypto | ipsec -opencrypto/skipjack.c optional crypto | ipsec -opencrypto/xform.c optional crypto | ipsec +opencrypto/cryptodev_if.m optional crypto | ipsec | ipsec_support +opencrypto/cryptosoft.c optional crypto | ipsec | ipsec_support +opencrypto/cryptodeflate.c optional crypto | ipsec | ipsec_support +opencrypto/gmac.c optional crypto | ipsec | ipsec_support +opencrypto/gfmult.c optional crypto | ipsec | ipsec_support +opencrypto/rmd160.c optional crypto | ipsec | ipsec_support +opencrypto/skipjack.c optional crypto | ipsec | ipsec_support +opencrypto/xform.c optional crypto | ipsec | ipsec_support rpc/auth_none.c optional krpc | nfslockd | nfscl | nfsd rpc/auth_unix.c optional krpc | nfslockd | nfscl | nfsd rpc/authunix_prot.c optional krpc | nfslockd | nfscl | nfsd rpc/clnt_bck.c optional krpc | nfslockd | nfscl | nfsd rpc/clnt_dg.c optional krpc | nfslockd | nfscl | nfsd rpc/clnt_rc.c optional krpc | nfslockd | nfscl | nfsd rpc/clnt_vc.c optional krpc | nfslockd | nfscl | nfsd rpc/getnetconfig.c optional krpc | nfslockd | nfscl | nfsd rpc/replay.c optional krpc | nfslockd | nfscl | nfsd rpc/rpc_callmsg.c optional krpc | nfslockd | nfscl | nfsd rpc/rpc_generic.c optional krpc | nfslockd | nfscl | nfsd rpc/rpc_prot.c optional krpc | nfslockd | nfscl | nfsd rpc/rpcb_clnt.c optional krpc | nfslockd | nfscl | nfsd rpc/rpcb_prot.c optional krpc | nfslockd | nfscl | nfsd rpc/svc.c optional krpc | nfslockd | nfscl | nfsd rpc/svc_auth.c optional krpc | nfslockd | nfscl | nfsd rpc/svc_auth_unix.c optional krpc | nfslockd | nfscl | nfsd rpc/svc_dg.c optional krpc | nfslockd | nfscl | nfsd rpc/svc_generic.c optional krpc | nfslockd | nfscl | nfsd rpc/svc_vc.c optional krpc | nfslockd | nfscl | nfsd rpc/rpcsec_gss/rpcsec_gss.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi rpc/rpcsec_gss/rpcsec_gss_conf.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi rpc/rpcsec_gss/rpcsec_gss_misc.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi rpc/rpcsec_gss/rpcsec_gss_prot.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi rpc/rpcsec_gss/svc_rpcsec_gss.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi security/audit/audit.c optional audit security/audit/audit_arg.c optional audit security/audit/audit_bsm.c optional audit security/audit/audit_bsm_klib.c optional audit security/audit/audit_pipe.c optional audit security/audit/audit_syscalls.c standard security/audit/audit_trigger.c optional audit security/audit/audit_worker.c optional audit security/audit/bsm_domain.c optional audit security/audit/bsm_errno.c optional audit security/audit/bsm_fcntl.c optional audit security/audit/bsm_socket_type.c optional audit security/audit/bsm_token.c optional audit security/mac/mac_audit.c optional mac audit security/mac/mac_cred.c optional mac security/mac/mac_framework.c optional mac security/mac/mac_inet.c optional mac inet | mac inet6 security/mac/mac_inet6.c optional mac inet6 security/mac/mac_label.c optional mac security/mac/mac_net.c optional mac security/mac/mac_pipe.c optional mac security/mac/mac_posix_sem.c optional mac security/mac/mac_posix_shm.c optional mac security/mac/mac_priv.c optional mac security/mac/mac_process.c optional mac security/mac/mac_socket.c optional mac security/mac/mac_syscalls.c standard security/mac/mac_system.c optional mac security/mac/mac_sysv_msg.c optional mac security/mac/mac_sysv_sem.c optional mac security/mac/mac_sysv_shm.c optional mac security/mac/mac_vfs.c optional mac security/mac_biba/mac_biba.c optional mac_biba security/mac_bsdextended/mac_bsdextended.c optional mac_bsdextended security/mac_bsdextended/ugidfw_system.c optional mac_bsdextended security/mac_bsdextended/ugidfw_vnode.c optional mac_bsdextended security/mac_ifoff/mac_ifoff.c optional mac_ifoff security/mac_lomac/mac_lomac.c optional mac_lomac security/mac_mls/mac_mls.c optional mac_mls security/mac_none/mac_none.c optional mac_none security/mac_partition/mac_partition.c optional mac_partition security/mac_portacl/mac_portacl.c optional mac_portacl security/mac_seeotheruids/mac_seeotheruids.c optional mac_seeotheruids security/mac_stub/mac_stub.c optional mac_stub security/mac_test/mac_test.c optional mac_test teken/teken.c optional sc | vt ufs/ffs/ffs_alloc.c optional ffs ufs/ffs/ffs_balloc.c optional ffs ufs/ffs/ffs_inode.c optional ffs ufs/ffs/ffs_snapshot.c optional ffs ufs/ffs/ffs_softdep.c optional ffs ufs/ffs/ffs_subr.c optional ffs ufs/ffs/ffs_tables.c optional ffs ufs/ffs/ffs_vfsops.c optional ffs ufs/ffs/ffs_vnops.c optional ffs ufs/ffs/ffs_rawread.c optional ffs directio ufs/ffs/ffs_suspend.c optional ffs ufs/ufs/ufs_acl.c optional ffs ufs/ufs/ufs_bmap.c optional ffs ufs/ufs/ufs_dirhash.c optional ffs ufs/ufs/ufs_extattr.c optional ffs ufs/ufs/ufs_gjournal.c optional ffs UFS_GJOURNAL ufs/ufs/ufs_inode.c optional ffs ufs/ufs/ufs_lookup.c optional ffs ufs/ufs/ufs_quota.c optional ffs ufs/ufs/ufs_vfsops.c optional ffs ufs/ufs/ufs_vnops.c optional ffs vm/default_pager.c standard vm/device_pager.c standard vm/phys_pager.c standard vm/redzone.c optional DEBUG_REDZONE vm/sg_pager.c standard vm/swap_pager.c standard vm/uma_core.c standard vm/uma_dbg.c standard vm/memguard.c optional DEBUG_MEMGUARD vm/vm_fault.c standard vm/vm_glue.c standard vm/vm_init.c standard vm/vm_kern.c standard vm/vm_map.c standard vm/vm_meter.c standard vm/vm_mmap.c standard vm/vm_object.c standard vm/vm_page.c standard vm/vm_pageout.c standard vm/vm_pager.c standard vm/vm_phys.c standard vm/vm_radix.c standard vm/vm_reserv.c standard vm/vm_domain.c standard vm/vm_unix.c standard vm/vnode_pager.c standard xen/features.c optional xenhvm xen/xenbus/xenbus_if.m optional xenhvm xen/xenbus/xenbus.c optional xenhvm xen/xenbus/xenbusb_if.m optional xenhvm xen/xenbus/xenbusb.c optional xenhvm xen/xenbus/xenbusb_front.c optional xenhvm xen/xenbus/xenbusb_back.c optional xenhvm xen/xenmem/xenmem_if.m optional xenhvm xdr/xdr.c optional krpc | nfslockd | nfscl | nfsd xdr/xdr_array.c optional krpc | nfslockd | nfscl | nfsd xdr/xdr_mbuf.c optional krpc | nfslockd | nfscl | nfsd xdr/xdr_mem.c optional krpc | nfslockd | nfscl | nfsd xdr/xdr_reference.c optional krpc | nfslockd | nfscl | nfsd xdr/xdr_sizeof.c optional krpc | nfslockd | nfscl | nfsd diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 741731d0f9ef..d03602207bbe 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -1,678 +1,679 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # # cloudabi32_vdso.o optional compat_cloudabi32 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_i686_on_64bit.S" \ compile-with "${CC} -x assembler-with-cpp -m32 -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_i686_on_64bit.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi32_vdso.o" # cloudabi32_vdso_blob.o optional compat_cloudabi32 \ dependency "cloudabi32_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf64-x86-64-freebsd --binary-architecture i386 cloudabi32_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi32_vdso_blob.o" # cloudabi64_vdso.o optional compat_cloudabi64 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_x86_64.S" \ compile-with "${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_x86_64.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi64_vdso.o" # cloudabi64_vdso_blob.o optional compat_cloudabi64 \ dependency "cloudabi64_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf64-x86-64-freebsd --binary-architecture i386 cloudabi64_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi64_vdso_blob.o" # linux32_genassym.o optional compat_linux32 \ dependency "$S/amd64/linux32/linux32_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "linux32_genassym.o" # linux32_assym.h optional compat_linux32 \ dependency "$S/kern/genassym.sh linux32_genassym.o" \ compile-with "sh $S/kern/genassym.sh linux32_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "linux32_assym.h" # linux32_locore.o optional compat_linux32 \ dependency "linux32_assym.h $S/amd64/linux32/linux32_locore.s" \ compile-with "${CC} -x assembler-with-cpp -DLOCORE -m32 -shared -s -pipe -I. -I$S -Werror -Wall -fno-common -nostdinc -nostdlib -Wl,-T$S/amd64/linux32/linux32_vdso.lds.s -Wl,-soname=linux32_vdso.so,--eh-frame-hdr,-fPIC,-warn-common ${.IMPSRC} -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "linux32_locore.o" # linux32_vdso.so optional compat_linux32 \ dependency "linux32_locore.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf64-x86-64-freebsd --binary-architecture i386 linux32_locore.o ${.TARGET}" \ no-implicit-rule \ clean "linux32_vdso.so" # ia32_genassym.o standard \ dependency "$S/compat/ia32/ia32_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "ia32_genassym.o" # ia32_assym.h standard \ dependency "$S/kern/genassym.sh ia32_genassym.o" \ compile-with "env NM='${NM}' NMFLAGS='${NMFLAGS}' sh $S/kern/genassym.sh ia32_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "ia32_assym.h" # font.h optional sc_dflt_font \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" # atkbdmap.h optional atkbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "atkbdmap.h" # ukbdmap.h optional ukbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "ukbdmap.h" # hpt27xx_lib.o optional hpt27xx \ dependency "$S/dev/hpt27xx/amd64-elf.hpt27xx_lib.o.uu" \ compile-with "uudecode < $S/dev/hpt27xx/amd64-elf.hpt27xx_lib.o.uu" \ no-implicit-rule # hptmvraid.o optional hptmv \ dependency "$S/dev/hptmv/amd64-elf.raid.o.uu" \ compile-with "uudecode < $S/dev/hptmv/amd64-elf.raid.o.uu" \ no-implicit-rule # hptnr_lib.o optional hptnr \ dependency "$S/dev/hptnr/amd64-elf.hptnr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptnr/amd64-elf.hptnr_lib.o.uu" \ no-implicit-rule # hptrr_lib.o optional hptrr \ dependency "$S/dev/hptrr/amd64-elf.hptrr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptrr/amd64-elf.hptrr_lib.o.uu" \ no-implicit-rule # amd64/acpica/acpi_machdep.c optional acpi acpi_wakecode.o optional acpi \ dependency "$S/amd64/acpica/acpi_wakecode.S assym.s" \ compile-with "${NORMAL_S}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.o" acpi_wakecode.bin optional acpi \ dependency "acpi_wakecode.o" \ compile-with "${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.bin" acpi_wakecode.h optional acpi \ dependency "acpi_wakecode.bin" \ compile-with "file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.h" acpi_wakedata.h optional acpi \ dependency "acpi_wakecode.o" \ compile-with '${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define $${what} 0x$${offset}"; done > ${.TARGET}' \ no-obj no-implicit-rule before-depend \ clean "acpi_wakedata.h" # amd64/amd64/amd64_mem.c optional mem #amd64/amd64/apic_vector.S standard amd64/amd64/atomic.c standard amd64/amd64/bios.c standard amd64/amd64/bpf_jit_machdep.c optional bpf_jitter amd64/amd64/cpu_switch.S standard amd64/amd64/db_disasm.c optional ddb amd64/amd64/db_interface.c optional ddb amd64/amd64/db_trace.c optional ddb amd64/amd64/efirt.c optional efirt amd64/amd64/elf_machdep.c standard amd64/amd64/exception.S standard amd64/amd64/fpu.c standard amd64/amd64/gdb_machdep.c optional gdb amd64/amd64/in_cksum.c optional inet | inet6 amd64/amd64/initcpu.c standard amd64/amd64/io.c optional io amd64/amd64/locore.S standard no-obj amd64/amd64/xen-locore.S optional xenhvm amd64/amd64/machdep.c standard amd64/amd64/mem.c optional mem amd64/amd64/minidump_machdep.c standard amd64/amd64/mp_machdep.c optional smp amd64/amd64/mpboot.S optional smp amd64/amd64/pmap.c standard amd64/amd64/prof_machdep.c optional profiling-routine amd64/amd64/ptrace_machdep.c standard amd64/amd64/sigtramp.S standard amd64/amd64/support.S standard amd64/amd64/sys_machdep.c standard amd64/amd64/trap.c standard amd64/amd64/uio_machdep.c standard amd64/amd64/uma_machdep.c standard amd64/amd64/vm_machdep.c standard amd64/cloudabi32/cloudabi32_sysvec.c optional compat_cloudabi32 amd64/cloudabi64/cloudabi64_sysvec.c optional compat_cloudabi64 amd64/pci/pci_cfgreg.c optional pci cddl/contrib/opensolaris/common/atomic/amd64/opensolaris_atomic.S optional zfs | dtrace compile-with "${ZFS_S}" cddl/dev/dtrace/amd64/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/amd64/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/x86/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" cddl/dev/dtrace/x86/dis_tables.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" cddl/dev/dtrace/x86/instr_size.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" crypto/aesni/aeskeys_amd64.S optional aesni crypto/aesni/aesni.c optional aesni aesni_ghash.o optional aesni \ dependency "$S/crypto/aesni/aesni_ghash.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_ghash.o" aesni_wrap.o optional aesni \ dependency "$S/crypto/aesni/aesni_wrap.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_wrap.o" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | \ + ipsec_support | netsmb crypto/via/padlock.c optional padlock crypto/via/padlock_cipher.c optional padlock crypto/via/padlock_hash.c optional padlock dev/acpica/acpi_if.m standard dev/acpica/acpi_hpet.c optional acpi dev/acpica/acpi_timer.c optional acpi dev/acpi_support/acpi_wmi_if.m standard dev/agp/agp_amd64.c optional agp dev/agp/agp_i810.c optional agp dev/agp/agp_via.c optional agp dev/amdsbwd/amdsbwd.c optional amdsbwd dev/amdtemp/amdtemp.c optional amdtemp dev/arcmsr/arcmsr.c optional arcmsr pci dev/asmc/asmc.c optional asmc isa dev/atkbdc/atkbd.c optional atkbd atkbdc dev/atkbdc/atkbd_atkbdc.c optional atkbd atkbdc dev/atkbdc/atkbdc.c optional atkbdc dev/atkbdc/atkbdc_isa.c optional atkbdc isa dev/atkbdc/atkbdc_subr.c optional atkbdc dev/atkbdc/psm.c optional psm atkbdc dev/bxe/bxe.c optional bxe pci dev/bxe/bxe_stats.c optional bxe pci dev/bxe/bxe_debug.c optional bxe pci dev/bxe/ecore_sp.c optional bxe pci dev/bxe/bxe_elink.c optional bxe pci dev/bxe/57710_init_values.c optional bxe pci dev/bxe/57711_init_values.c optional bxe pci dev/bxe/57712_init_values.c optional bxe pci dev/coretemp/coretemp.c optional coretemp dev/cpuctl/cpuctl.c optional cpuctl dev/dpms/dpms.c optional dpms # There are no systems with isa slots, so all ed isa entries should go.. dev/ed/if_ed_3c503.c optional ed isa ed_3c503 dev/ed/if_ed_isa.c optional ed isa dev/ed/if_ed_wd80x3.c optional ed isa dev/ed/if_ed_hpp.c optional ed isa ed_hpp dev/ed/if_ed_sic.c optional ed isa ed_sic dev/fb/fb.c optional fb | vga dev/fb/s3_pci.c optional s3pci dev/fb/vesa.c optional vga vesa dev/fb/vga.c optional vga dev/ichwd/ichwd.c optional ichwd dev/if_ndis/if_ndis.c optional ndis dev/if_ndis/if_ndis_pccard.c optional ndis pccard dev/if_ndis/if_ndis_pci.c optional ndis cardbus | ndis pci dev/if_ndis/if_ndis_usb.c optional ndis usb dev/intel/spi.c optional intelspi dev/io/iodev.c optional io dev/ioat/ioat.c optional ioat pci dev/ioat/ioat_test.c optional ioat pci dev/ipmi/ipmi.c optional ipmi dev/ipmi/ipmi_acpi.c optional ipmi acpi dev/ipmi/ipmi_isa.c optional ipmi isa dev/ipmi/ipmi_kcs.c optional ipmi dev/ipmi/ipmi_smic.c optional ipmi dev/ipmi/ipmi_smbus.c optional ipmi smbus dev/ipmi/ipmi_smbios.c optional ipmi dev/ipmi/ipmi_ssif.c optional ipmi smbus dev/ipmi/ipmi_pci.c optional ipmi pci dev/ipmi/ipmi_linux.c optional ipmi compat_linux32 dev/ixl/if_ixl.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/ixl_pf_main.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/ixl_pf_qmgr.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/ixl_pf_iov.c optional ixl pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/if_ixlv.c optional ixlv pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/ixlvc.c optional ixlv pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/ixl_txrx.c optional ixl pci | ixlv pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_osdep.c optional ixl pci | ixlv pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_lan_hmc.c optional ixl pci | ixlv pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_hmc.c optional ixl pci | ixlv pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_common.c optional ixl pci | ixlv pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_nvm.c optional ixl pci | ixlv pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/ixl/i40e_adminq.c optional ixl pci | ixlv pci \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/fdc/fdc.c optional fdc dev/fdc/fdc_acpi.c optional fdc dev/fdc/fdc_isa.c optional fdc isa dev/fdc/fdc_pccard.c optional fdc pccard dev/gpio/bytgpio.c optional bytgpio dev/hpt27xx/hpt27xx_os_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_osm_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_config.c optional hpt27xx dev/hptmv/entry.c optional hptmv dev/hptmv/mv.c optional hptmv dev/hptmv/gui_lib.c optional hptmv dev/hptmv/hptproc.c optional hptmv dev/hptmv/ioctl.c optional hptmv dev/hptnr/hptnr_os_bsd.c optional hptnr dev/hptnr/hptnr_osm_bsd.c optional hptnr dev/hptnr/hptnr_config.c optional hptnr dev/hptrr/hptrr_os_bsd.c optional hptrr dev/hptrr/hptrr_osm_bsd.c optional hptrr dev/hptrr/hptrr_config.c optional hptrr dev/hwpmc/hwpmc_amd.c optional hwpmc dev/hwpmc/hwpmc_intel.c optional hwpmc dev/hwpmc/hwpmc_core.c optional hwpmc dev/hwpmc/hwpmc_uncore.c optional hwpmc dev/hwpmc/hwpmc_piv.c optional hwpmc dev/hwpmc/hwpmc_tsc.c optional hwpmc dev/hwpmc/hwpmc_x86.c optional hwpmc dev/hyperv/pcib/vmbus_pcib.c optional hyperv pci dev/hyperv/netvsc/hn_nvs.c optional hyperv dev/hyperv/netvsc/hn_rndis.c optional hyperv dev/hyperv/netvsc/if_hn.c optional hyperv dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c optional hyperv dev/hyperv/utilities/hv_kvp.c optional hyperv dev/hyperv/utilities/hv_snapshot.c optional hyperv dev/hyperv/utilities/vmbus_heartbeat.c optional hyperv dev/hyperv/utilities/vmbus_ic.c optional hyperv dev/hyperv/utilities/vmbus_shutdown.c optional hyperv dev/hyperv/utilities/vmbus_timesync.c optional hyperv dev/hyperv/vmbus/hyperv.c optional hyperv dev/hyperv/vmbus/hyperv_busdma.c optional hyperv dev/hyperv/vmbus/vmbus.c optional hyperv pci dev/hyperv/vmbus/vmbus_br.c optional hyperv dev/hyperv/vmbus/vmbus_chan.c optional hyperv dev/hyperv/vmbus/vmbus_et.c optional hyperv dev/hyperv/vmbus/vmbus_if.m optional hyperv dev/hyperv/vmbus/vmbus_xact.c optional hyperv dev/hyperv/vmbus/amd64/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/amd64/vmbus_vector.S optional hyperv dev/nfe/if_nfe.c optional nfe pci dev/ntb/if_ntb/if_ntb.c optional if_ntb dev/ntb/ntb_transport.c optional if_ntb dev/ntb/ntb.c optional if_ntb | ntb_hw dev/ntb/ntb_if.m optional if_ntb | ntb_hw dev/ntb/ntb_hw/ntb_hw.c optional ntb_hw dev/nvd/nvd.c optional nvd nvme dev/nvme/nvme.c optional nvme dev/nvme/nvme_ctrlr.c optional nvme dev/nvme/nvme_ctrlr_cmd.c optional nvme dev/nvme/nvme_ns.c optional nvme dev/nvme/nvme_ns_cmd.c optional nvme dev/nvme/nvme_qpair.c optional nvme dev/nvme/nvme_sim.c optional nvme scbus !nvd dev/nvme/nvme_sysctl.c optional nvme dev/nvme/nvme_test.c optional nvme dev/nvme/nvme_util.c optional nvme dev/nvram/nvram.c optional nvram isa dev/random/ivy.c optional rdrand_rng dev/random/nehemiah.c optional padlock_rng dev/qlxge/qls_dbg.c optional qlxge pci dev/qlxge/qls_dump.c optional qlxge pci dev/qlxge/qls_hw.c optional qlxge pci dev/qlxge/qls_ioctl.c optional qlxge pci dev/qlxge/qls_isr.c optional qlxge pci dev/qlxge/qls_os.c optional qlxge pci dev/qlxgb/qla_dbg.c optional qlxgb pci dev/qlxgb/qla_hw.c optional qlxgb pci dev/qlxgb/qla_ioctl.c optional qlxgb pci dev/qlxgb/qla_isr.c optional qlxgb pci dev/qlxgb/qla_misc.c optional qlxgb pci dev/qlxgb/qla_os.c optional qlxgb pci dev/qlxgbe/ql_dbg.c optional qlxgbe pci dev/qlxgbe/ql_hw.c optional qlxgbe pci dev/qlxgbe/ql_ioctl.c optional qlxgbe pci dev/qlxgbe/ql_isr.c optional qlxgbe pci dev/qlxgbe/ql_misc.c optional qlxgbe pci dev/qlxgbe/ql_os.c optional qlxgbe pci dev/qlxgbe/ql_reset.c optional qlxgbe pci dev/sfxge/common/ef10_ev.c optional sfxge pci dev/sfxge/common/ef10_filter.c optional sfxge pci dev/sfxge/common/ef10_intr.c optional sfxge pci dev/sfxge/common/ef10_mac.c optional sfxge pci dev/sfxge/common/ef10_mcdi.c optional sfxge pci dev/sfxge/common/ef10_nic.c optional sfxge pci dev/sfxge/common/ef10_nvram.c optional sfxge pci dev/sfxge/common/ef10_phy.c optional sfxge pci dev/sfxge/common/ef10_rx.c optional sfxge pci dev/sfxge/common/ef10_tx.c optional sfxge pci dev/sfxge/common/ef10_vpd.c optional sfxge pci dev/sfxge/common/efx_bootcfg.c optional sfxge pci dev/sfxge/common/efx_crc32.c optional sfxge pci dev/sfxge/common/efx_ev.c optional sfxge pci dev/sfxge/common/efx_filter.c optional sfxge pci dev/sfxge/common/efx_hash.c optional sfxge pci dev/sfxge/common/efx_intr.c optional sfxge pci dev/sfxge/common/efx_lic.c optional sfxge pci dev/sfxge/common/efx_mac.c optional sfxge pci dev/sfxge/common/efx_mcdi.c optional sfxge pci dev/sfxge/common/efx_mon.c optional sfxge pci dev/sfxge/common/efx_nic.c optional sfxge pci dev/sfxge/common/efx_nvram.c optional sfxge pci dev/sfxge/common/efx_phy.c optional sfxge pci dev/sfxge/common/efx_port.c optional sfxge pci dev/sfxge/common/efx_rx.c optional sfxge pci dev/sfxge/common/efx_sram.c optional sfxge pci dev/sfxge/common/efx_tx.c optional sfxge pci dev/sfxge/common/efx_vpd.c optional sfxge pci dev/sfxge/common/hunt_nic.c optional sfxge pci dev/sfxge/common/mcdi_mon.c optional sfxge pci dev/sfxge/common/medford_nic.c optional sfxge pci dev/sfxge/common/siena_mac.c optional sfxge pci dev/sfxge/common/siena_mcdi.c optional sfxge pci dev/sfxge/common/siena_nic.c optional sfxge pci dev/sfxge/common/siena_nvram.c optional sfxge pci dev/sfxge/common/siena_phy.c optional sfxge pci dev/sfxge/common/siena_sram.c optional sfxge pci dev/sfxge/common/siena_vpd.c optional sfxge pci dev/sfxge/sfxge.c optional sfxge pci dev/sfxge/sfxge_dma.c optional sfxge pci dev/sfxge/sfxge_ev.c optional sfxge pci dev/sfxge/sfxge_intr.c optional sfxge pci dev/sfxge/sfxge_mcdi.c optional sfxge pci dev/sfxge/sfxge_nvram.c optional sfxge pci dev/sfxge/sfxge_port.c optional sfxge pci dev/sfxge/sfxge_rx.c optional sfxge pci dev/sfxge/sfxge_tx.c optional sfxge pci dev/sio/sio.c optional sio dev/sio/sio_isa.c optional sio isa dev/sio/sio_pccard.c optional sio pccard dev/sio/sio_pci.c optional sio pci dev/sio/sio_puc.c optional sio puc dev/speaker/spkr.c optional speaker dev/syscons/apm/apm_saver.c optional apm_saver apm dev/syscons/scterm-teken.c optional sc dev/syscons/scvesactl.c optional sc vga vesa dev/syscons/scvgarndr.c optional sc vga dev/syscons/scvtb.c optional sc dev/tpm/tpm.c optional tpm dev/tpm/tpm_acpi.c optional tpm acpi dev/tpm/tpm_isa.c optional tpm isa dev/uart/uart_cpu_x86.c optional uart dev/viawd/viawd.c optional viawd dev/vmware/vmxnet3/if_vmx.c optional vmx dev/wbwd/wbwd.c optional wbwd dev/wpi/if_wpi.c optional wpi dev/xen/pci/xen_acpi_pci.c optional xenhvm dev/xen/pci/xen_pci.c optional xenhvm dev/isci/isci.c optional isci dev/isci/isci_controller.c optional isci dev/isci/isci_domain.c optional isci dev/isci/isci_interrupt.c optional isci dev/isci/isci_io_request.c optional isci dev/isci/isci_logger.c optional isci dev/isci/isci_oem_parameters.c optional isci dev/isci/isci_remote_device.c optional isci dev/isci/isci_sysctl.c optional isci dev/isci/isci_task_request.c optional isci dev/isci/isci_timer.c optional isci dev/isci/scil/sati.c optional isci dev/isci/scil/sati_abort_task_set.c optional isci dev/isci/scil/sati_atapi.c optional isci dev/isci/scil/sati_device.c optional isci dev/isci/scil/sati_inquiry.c optional isci dev/isci/scil/sati_log_sense.c optional isci dev/isci/scil/sati_lun_reset.c optional isci dev/isci/scil/sati_mode_pages.c optional isci dev/isci/scil/sati_mode_select.c optional isci dev/isci/scil/sati_mode_sense.c optional isci dev/isci/scil/sati_mode_sense_10.c optional isci dev/isci/scil/sati_mode_sense_6.c optional isci dev/isci/scil/sati_move.c optional isci dev/isci/scil/sati_passthrough.c optional isci dev/isci/scil/sati_read.c optional isci dev/isci/scil/sati_read_buffer.c optional isci dev/isci/scil/sati_read_capacity.c optional isci dev/isci/scil/sati_reassign_blocks.c optional isci dev/isci/scil/sati_report_luns.c optional isci dev/isci/scil/sati_request_sense.c optional isci dev/isci/scil/sati_start_stop_unit.c optional isci dev/isci/scil/sati_synchronize_cache.c optional isci dev/isci/scil/sati_test_unit_ready.c optional isci dev/isci/scil/sati_unmap.c optional isci dev/isci/scil/sati_util.c optional isci dev/isci/scil/sati_verify.c optional isci dev/isci/scil/sati_write.c optional isci dev/isci/scil/sati_write_and_verify.c optional isci dev/isci/scil/sati_write_buffer.c optional isci dev/isci/scil/sati_write_long.c optional isci dev/isci/scil/sci_abstract_list.c optional isci dev/isci/scil/sci_base_controller.c optional isci dev/isci/scil/sci_base_domain.c optional isci dev/isci/scil/sci_base_iterator.c optional isci dev/isci/scil/sci_base_library.c optional isci dev/isci/scil/sci_base_logger.c optional isci dev/isci/scil/sci_base_memory_descriptor_list.c optional isci dev/isci/scil/sci_base_memory_descriptor_list_decorator.c optional isci dev/isci/scil/sci_base_object.c optional isci dev/isci/scil/sci_base_observer.c optional isci dev/isci/scil/sci_base_phy.c optional isci dev/isci/scil/sci_base_port.c optional isci dev/isci/scil/sci_base_remote_device.c optional isci dev/isci/scil/sci_base_request.c optional isci dev/isci/scil/sci_base_state_machine.c optional isci dev/isci/scil/sci_base_state_machine_logger.c optional isci dev/isci/scil/sci_base_state_machine_observer.c optional isci dev/isci/scil/sci_base_subject.c optional isci dev/isci/scil/sci_util.c optional isci dev/isci/scil/scic_sds_controller.c optional isci dev/isci/scil/scic_sds_library.c optional isci dev/isci/scil/scic_sds_pci.c optional isci dev/isci/scil/scic_sds_phy.c optional isci dev/isci/scil/scic_sds_port.c optional isci dev/isci/scil/scic_sds_port_configuration_agent.c optional isci dev/isci/scil/scic_sds_remote_device.c optional isci dev/isci/scil/scic_sds_remote_node_context.c optional isci dev/isci/scil/scic_sds_remote_node_table.c optional isci dev/isci/scil/scic_sds_request.c optional isci dev/isci/scil/scic_sds_sgpio.c optional isci dev/isci/scil/scic_sds_smp_remote_device.c optional isci dev/isci/scil/scic_sds_smp_request.c optional isci dev/isci/scil/scic_sds_ssp_request.c optional isci dev/isci/scil/scic_sds_stp_packet_request.c optional isci dev/isci/scil/scic_sds_stp_remote_device.c optional isci dev/isci/scil/scic_sds_stp_request.c optional isci dev/isci/scil/scic_sds_unsolicited_frame_control.c optional isci dev/isci/scil/scif_sas_controller.c optional isci dev/isci/scil/scif_sas_controller_state_handlers.c optional isci dev/isci/scil/scif_sas_controller_states.c optional isci dev/isci/scil/scif_sas_domain.c optional isci dev/isci/scil/scif_sas_domain_state_handlers.c optional isci dev/isci/scil/scif_sas_domain_states.c optional isci dev/isci/scil/scif_sas_high_priority_request_queue.c optional isci dev/isci/scil/scif_sas_internal_io_request.c optional isci dev/isci/scil/scif_sas_io_request.c optional isci dev/isci/scil/scif_sas_io_request_state_handlers.c optional isci dev/isci/scil/scif_sas_io_request_states.c optional isci dev/isci/scil/scif_sas_library.c optional isci dev/isci/scil/scif_sas_remote_device.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substates.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substates.c optional isci dev/isci/scil/scif_sas_remote_device_state_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_states.c optional isci dev/isci/scil/scif_sas_request.c optional isci dev/isci/scil/scif_sas_smp_activity_clear_affiliation.c optional isci dev/isci/scil/scif_sas_smp_io_request.c optional isci dev/isci/scil/scif_sas_smp_phy.c optional isci dev/isci/scil/scif_sas_smp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_io_request.c optional isci dev/isci/scil/scif_sas_stp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_task_request.c optional isci dev/isci/scil/scif_sas_task_request.c optional isci dev/isci/scil/scif_sas_task_request_state_handlers.c optional isci dev/isci/scil/scif_sas_task_request_states.c optional isci dev/isci/scil/scif_sas_timer.c optional isci isa/syscons_isa.c optional sc isa/vga_isa.c optional vga kern/kern_clocksource.c standard kern/link_elf_obj.c standard # # IA32 binary support # #amd64/ia32/ia32_exception.S optional compat_freebsd32 amd64/ia32/ia32_reg.c optional compat_freebsd32 amd64/ia32/ia32_signal.c optional compat_freebsd32 amd64/ia32/ia32_sigtramp.S optional compat_freebsd32 amd64/ia32/ia32_syscall.c optional compat_freebsd32 amd64/ia32/ia32_misc.c optional compat_freebsd32 compat/ia32/ia32_sysvec.c optional compat_freebsd32 compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs # # Linux/i386 binary support # amd64/linux32/linux32_dummy.c optional compat_linux32 amd64/linux32/linux32_machdep.c optional compat_linux32 amd64/linux32/linux32_support.s optional compat_linux32 \ dependency "linux32_assym.h" amd64/linux32/linux32_sysent.c optional compat_linux32 amd64/linux32/linux32_sysvec.c optional compat_linux32 compat/linux/linux_emul.c optional compat_linux32 compat/linux/linux_file.c optional compat_linux32 compat/linux/linux_fork.c optional compat_linux32 compat/linux/linux_futex.c optional compat_linux32 compat/linux/linux_getcwd.c optional compat_linux32 compat/linux/linux_ioctl.c optional compat_linux32 compat/linux/linux_ipc.c optional compat_linux32 compat/linux/linux_mib.c optional compat_linux32 compat/linux/linux_misc.c optional compat_linux32 compat/linux/linux_mmap.c optional compat_linux32 compat/linux/linux_signal.c optional compat_linux32 compat/linux/linux_socket.c optional compat_linux32 compat/linux/linux_stats.c optional compat_linux32 compat/linux/linux_sysctl.c optional compat_linux32 compat/linux/linux_time.c optional compat_linux32 compat/linux/linux_timer.c optional compat_linux32 compat/linux/linux_uid16.c optional compat_linux32 compat/linux/linux_util.c optional compat_linux32 compat/linux/linux_vdso.c optional compat_linux32 compat/linux/linux_common.c optional compat_linux32 compat/linux/linux_event.c optional compat_linux32 compat/linux/linux.c optional compat_linux32 dev/amr/amr_linux.c optional compat_linux32 amr dev/mfi/mfi_linux.c optional compat_linux32 mfi # # Windows NDIS driver support # compat/ndis/kern_ndis.c optional ndisapi pci compat/ndis/kern_windrv.c optional ndisapi pci compat/ndis/subr_hal.c optional ndisapi pci compat/ndis/subr_ndis.c optional ndisapi pci compat/ndis/subr_ntoskrnl.c optional ndisapi pci compat/ndis/subr_pe.c optional ndisapi pci compat/ndis/subr_usbd.c optional ndisapi pci compat/ndis/winx64_wrap.S optional ndisapi pci # crc32_sse42.o standard \ dependency "$S/libkern/x86/crc32_sse42.c" \ compile-with "${CC} -c ${CFLAGS:N-nostdinc} ${WERROR} ${PROF} -msse4 ${.IMPSRC}" \ no-implicit-rule \ clean "crc32_sse42.o" libkern/memmove.c standard libkern/memset.c standard # # x86 real mode BIOS emulator, required by dpms/pci/vesa # compat/x86bios/x86bios.c optional x86bios | dpms | pci | vesa contrib/x86emu/x86emu.c optional x86bios | dpms | pci | vesa # # bvm console # dev/bvm/bvm_console.c optional bvmconsole dev/bvm/bvm_dbg.c optional bvmdebug # # x86 shared code between IA32 and AMD64 architectures # x86/acpica/OsdEnvironment.c optional acpi x86/acpica/acpi_apm.c optional acpi x86/acpica/acpi_wakeup.c optional acpi x86/acpica/madt.c optional acpi x86/acpica/srat.c optional acpi x86/bios/smbios.c optional smbios x86/bios/vpd.c optional vpd x86/cpufreq/powernow.c optional cpufreq x86/cpufreq/est.c optional cpufreq x86/cpufreq/hwpstate.c optional cpufreq x86/cpufreq/p4tcc.c optional cpufreq x86/iommu/busdma_dmar.c optional acpi acpi_dmar pci x86/iommu/intel_ctx.c optional acpi acpi_dmar pci x86/iommu/intel_drv.c optional acpi acpi_dmar pci x86/iommu/intel_fault.c optional acpi acpi_dmar pci x86/iommu/intel_gas.c optional acpi acpi_dmar pci x86/iommu/intel_idpgtbl.c optional acpi acpi_dmar pci x86/iommu/intel_intrmap.c optional acpi acpi_dmar pci x86/iommu/intel_qi.c optional acpi acpi_dmar pci x86/iommu/intel_quirks.c optional acpi acpi_dmar pci x86/iommu/intel_utils.c optional acpi acpi_dmar pci x86/isa/atpic.c optional atpic isa x86/isa/atrtc.c standard x86/isa/clock.c standard x86/isa/elcr.c optional atpic isa | mptable x86/isa/isa.c standard x86/isa/isa_dma.c standard x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci x86/x86/autoconf.c standard x86/x86/bus_machdep.c standard x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard x86/x86/cpu_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt x86/x86/identcpu.c standard x86/x86/intr_machdep.c standard x86/x86/io_apic.c standard x86/x86/legacy.c standard x86/x86/local_apic.c standard x86/x86/mca.c standard x86/x86/mptable.c optional mptable x86/x86/mptable_pci.c optional mptable pci x86/x86/mp_x86.c optional smp x86/x86/mp_watchdog.c optional mp_watchdog smp x86/x86/msi.c optional pci x86/x86/nexus.c standard x86/x86/pvclock.c standard x86/x86/stack_machdep.c optional ddb | stack x86/x86/tsc.c standard x86/x86/delay.c standard x86/xen/hvm.c optional xenhvm x86/xen/xen_intr.c optional xenhvm x86/xen/pv.c optional xenhvm x86/xen/pvcpu_enum.c optional xenhvm x86/xen/xen_apic.c optional xenhvm x86/xen/xenpv.c optional xenhvm x86/xen/xen_nexus.c optional xenhvm x86/xen/xen_msi.c optional xenhvm x86/xen/xen_pci_bus.c optional xenhvm diff --git a/sys/conf/files.arm b/sys/conf/files.arm index 366ca243302b..e81b6120f7b3 100644 --- a/sys/conf/files.arm +++ b/sys/conf/files.arm @@ -1,167 +1,167 @@ # $FreeBSD$ cloudabi32_vdso.o optional compat_cloudabi32 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_armv6.S" \ compile-with "${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_armv6.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi32_vdso.o" # cloudabi32_vdso_blob.o optional compat_cloudabi32 \ dependency "cloudabi32_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf32-littlearm --binary-architecture arm cloudabi32_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi32_vdso_blob.o" # arm/annapurna/alpine/alpine_ccu.c optional al_ccu fdt arm/annapurna/alpine/alpine_nb_service.c optional al_nb_service fdt arm/annapurna/alpine/alpine_pci.c optional al_pci fdt arm/annapurna/alpine/alpine_pci_msix.c optional al_pci fdt arm/annapurna/alpine/alpine_serdes.c optional al_serdes fdt \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" arm/arm/autoconf.c standard arm/arm/bcopy_page.S standard arm/arm/bcopyinout.S standard arm/arm/blockio.S standard arm/arm/bus_space_asm_generic.S standard arm/arm/bus_space_base.c optional fdt arm/arm/bus_space_generic.c standard arm/arm/busdma_machdep-v4.c optional !armv6 arm/arm/busdma_machdep-v6.c optional armv6 arm/arm/copystr.S standard arm/arm/cpufunc.c standard arm/arm/cpufunc_asm.S standard arm/arm/cpufunc_asm_arm9.S optional cpu_arm9 | cpu_arm9e arm/arm/cpufunc_asm_arm11.S optional cpu_arm1176 arm/arm/cpufunc_asm_arm11x6.S optional cpu_arm1176 arm/arm/cpufunc_asm_armv4.S optional cpu_arm9 | cpu_arm9e | cpu_fa526 | cpu_xscale_pxa2x0 | cpu_xscale_ixp425 | cpu_xscale_81342 arm/arm/cpufunc_asm_armv5_ec.S optional cpu_arm9e arm/arm/cpufunc_asm_armv6.S optional cpu_arm1176 arm/arm/cpufunc_asm_armv7.S optional cpu_cortexa | cpu_krait | cpu_mv_pj4b arm/arm/cpufunc_asm_fa526.S optional cpu_fa526 arm/arm/cpufunc_asm_pj4b.S optional cpu_mv_pj4b arm/arm/cpufunc_asm_sheeva.S optional cpu_arm9e arm/arm/cpufunc_asm_xscale.S optional cpu_xscale_pxa2x0 | cpu_xscale_ixp425 | cpu_xscale_81342 arm/arm/cpufunc_asm_xscale_c3.S optional cpu_xscale_81342 arm/arm/cpuinfo.c standard arm/arm/cpu_asm-v6.S optional armv6 arm/arm/db_disasm.c optional ddb arm/arm/db_interface.c optional ddb arm/arm/db_trace.c optional ddb arm/arm/debug_monitor.c optional ddb armv6 arm/arm/disassem.c optional ddb arm/arm/dump_machdep.c standard arm/arm/elf_machdep.c standard arm/arm/elf_note.S standard arm/arm/exception.S standard arm/arm/fiq.c standard arm/arm/fiq_subr.S standard arm/arm/fusu.S standard arm/arm/gdb_machdep.c optional gdb arm/arm/generic_timer.c optional generic_timer arm/arm/gic.c optional gic arm/arm/gic_fdt.c optional gic fdt arm/arm/identcpu-v4.c optional !armv6 arm/arm/identcpu-v6.c optional armv6 arm/arm/in_cksum.c optional inet | inet6 arm/arm/in_cksum_arm.S optional inet | inet6 arm/arm/intr.c optional !intrng kern/subr_intr.c optional intrng arm/arm/locore.S standard no-obj arm/arm/machdep.c standard arm/arm/machdep_intr.c standard arm/arm/mem.c optional mem arm/arm/minidump_machdep.c optional mem arm/arm/mp_machdep.c optional smp arm/arm/mpcore_timer.c optional mpcore_timer arm/arm/nexus.c standard arm/arm/ofw_machdep.c optional fdt arm/arm/physmem.c standard arm/arm/pl190.c optional pl190 arm/arm/pl310.c optional pl310 arm/arm/platform.c optional platform arm/arm/platform_if.m optional platform arm/arm/pmap-v4.c optional !armv6 arm/arm/pmap-v6.c optional armv6 arm/arm/pmu.c optional pmu | fdt hwpmc arm/arm/sc_machdep.c optional sc arm/arm/setcpsr.S standard arm/arm/setstack.s standard arm/arm/stack_machdep.c optional ddb | stack arm/arm/stdatomic.c standard \ compile-with "${NORMAL_C:N-Wmissing-prototypes}" arm/arm/support.S standard arm/arm/swtch.S standard arm/arm/swtch-v4.S optional !armv6 arm/arm/swtch-v6.S optional armv6 arm/arm/sys_machdep.c standard arm/arm/syscall.c standard arm/arm/trap-v4.c optional !armv6 arm/arm/trap-v6.c optional armv6 arm/arm/uio_machdep.c standard arm/arm/undefined.c standard arm/arm/unwind.c optional ddb | kdtrace_hooks arm/arm/vm_machdep.c standard arm/arm/vfp.c standard arm/cloudabi32/cloudabi32_sysvec.c optional compat_cloudabi32 board_id.h standard \ dependency "$S/arm/conf/genboardid.awk $S/arm/conf/mach-types" \ compile-with "${AWK} -f $S/arm/conf/genboardid.awk $S/arm/conf/mach-types > board_id.h" \ no-obj no-implicit-rule before-depend \ clean "board_id.h" cddl/compat/opensolaris/kern/opensolaris_atomic.c optional zfs | dtrace compile-with "${CDDL_C}" cddl/dev/dtrace/arm/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/arm/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/arm/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/cpufreq/cpufreq_dt.c optional cpufreq fdt dev/dwc/if_dwc.c optional dwc dev/dwc/if_dwc_if.m optional dwc dev/fb/fb.c optional sc dev/fdt/fdt_arm_platform.c optional platform fdt dev/hdmi/hdmi_if.m optional hdmi dev/hwpmc/hwpmc_arm.c optional hwpmc dev/hwpmc/hwpmc_armv7.c optional hwpmc armv6 dev/iicbus/twsi/twsi.c optional twsi dev/ofw/ofwpci.c optional fdt pci dev/pci/pci_host_generic.c optional pci_host_generic pci dev/pci/pci_host_generic_fdt.c optional pci_host_generic pci fdt dev/psci/psci.c optional psci dev/psci/psci_arm.S optional psci dev/syscons/scgfbrndr.c optional sc dev/syscons/scterm-teken.c optional sc dev/syscons/scvtb.c optional sc dev/uart/uart_cpu_fdt.c optional uart fdt font.h optional sc \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" kern/msi_if.m optional intrng kern/pic_if.m optional intrng kern/subr_busdma_bufalloc.c standard kern/subr_devmap.c standard kern/subr_sfbuf.c standard libkern/arm/aeabi_unwind.c standard libkern/arm/divsi3.S standard libkern/arm/ffs.S standard libkern/arm/ldivmod.S standard libkern/arm/ldivmod_helper.c standard libkern/arm/memclr.S standard libkern/arm/memcpy.S standard libkern/arm/memset.S standard libkern/arm/muldi3.c standard libkern/ashldi3.c standard libkern/ashrdi3.c standard libkern/divdi3.c standard libkern/ffsl.c standard libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/lshrdi3.c standard libkern/moddi3.c standard libkern/qdivrem.c standard libkern/ucmpdi2.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index 2ff24eb1ef94..7e5dfd2e8a7b 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -1,192 +1,192 @@ # $FreeBSD$ cloudabi64_vdso.o optional compat_cloudabi64 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_aarch64.S" \ compile-with "${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_aarch64.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi64_vdso.o" # cloudabi64_vdso_blob.o optional compat_cloudabi64 \ dependency "cloudabi64_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf64-littleaarch64 --binary-architecture aarch64 cloudabi64_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi64_vdso_blob.o" # arm/allwinner/a10_ehci.c optional ehci aw_ehci fdt arm/allwinner/a10_gpio.c optional gpio aw_gpio fdt arm/allwinner/a10_mmc.c optional mmc aw_mmc fdt arm/allwinner/a64/a64_padconf.c optional soc_allwinner_a64 fdt arm/allwinner/a64/a64_r_padconf.c optional soc_allwinner_a64 fdt arm/allwinner/aw_ccu.c optional aw_ccu fdt arm/allwinner/aw_nmi.c optional aw_nmi fdt \ compile-with "${NORMAL_C} -I$S/gnu/dts/include" arm/allwinner/aw_reset.c optional aw_ccu fdt arm/allwinner/aw_rsb.c optional aw_rsb fdt arm/allwinner/aw_rtc.c optional aw_rtc fdt arm/allwinner/aw_sid.c optional aw_sid fdt arm/allwinner/aw_thermal.c optional aw_thermal fdt arm/allwinner/aw_usbphy.c optional ehci aw_usbphy fdt arm/allwinner/aw_wdog.c optional aw_wdog fdt arm/allwinner/axp81x.c optional axp81x fdt arm/allwinner/clk/aw_ahbclk.c optional aw_ccu fdt arm/allwinner/clk/aw_apbclk.c optional aw_ccu fdt arm/allwinner/clk/aw_axiclk.c optional aw_ccu fdt arm/allwinner/clk/aw_cpuclk.c optional aw_ccu fdt arm/allwinner/clk/aw_gate.c optional aw_ccu fdt arm/allwinner/clk/aw_modclk.c optional aw_ccu fdt arm/allwinner/clk/aw_pll.c optional aw_ccu fdt \ compile-with "${NORMAL_C} -I$S/gnu/dts/include" arm/allwinner/clk/aw_thsclk.c optional aw_ccu fdt arm/allwinner/clk/aw_usbclk.c optional aw_ccu fdt arm/allwinner/if_awg.c optional awg fdt arm/annapurna/alpine/alpine_ccu.c optional al_ccu fdt arm/annapurna/alpine/alpine_nb_service.c optional al_nb_service fdt arm/annapurna/alpine/alpine_pci.c optional al_pci fdt arm/annapurna/alpine/alpine_pci_msix.c optional al_pci fdt arm/annapurna/alpine/alpine_serdes.c optional al_serdes fdt \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" arm/arm/generic_timer.c standard arm/arm/gic.c standard arm/arm/gic_fdt.c optional fdt arm/arm/pmu.c standard arm/broadcom/bcm2835/bcm2835_audio.c optional sound vchiq fdt \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" arm/broadcom/bcm2835/bcm2835_bsc.c optional bcm2835_bsc soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_cpufreq.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_dma.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_fbd.c optional vt soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_ft5406.c optional evdev bcm2835_ft5406 soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_gpio.c optional gpio soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_intr.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_mbox.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_rng.c optional random soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_sdhci.c optional sdhci soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_spi.c optional bcm2835_spi soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_vcio.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_wdog.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2836.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm283x_dwc_fdt.c optional dwcotg fdt soc_brcm_bcm2837 arm64/acpica/acpi_machdep.c optional acpi arm64/acpica/OsdEnvironment.c optional acpi arm64/acpica/acpi_wakeup.c optional acpi arm64/acpica/pci_cfgreg.c optional acpi pci arm64/arm64/autoconf.c standard arm64/arm64/bus_machdep.c standard arm64/arm64/bus_space_asm.S standard arm64/arm64/busdma_bounce.c standard arm64/arm64/busdma_machdep.c standard arm64/arm64/bzero.S standard arm64/arm64/clock.c standard arm64/arm64/copyinout.S standard arm64/arm64/copystr.c standard arm64/arm64/cpufunc_asm.S standard arm64/arm64/db_disasm.c optional ddb arm64/arm64/db_interface.c optional ddb arm64/arm64/db_trace.c optional ddb arm64/arm64/debug_monitor.c optional ddb arm64/arm64/disassem.c optional ddb arm64/arm64/dump_machdep.c standard arm64/arm64/elf_machdep.c standard arm64/arm64/exception.S standard arm64/arm64/gicv3_its.c optional intrng arm64/arm64/gic_v3.c standard arm64/arm64/gic_v3_fdt.c optional fdt arm64/arm64/identcpu.c standard arm64/arm64/in_cksum.c optional inet | inet6 arm64/arm64/locore.S standard no-obj arm64/arm64/machdep.c standard arm64/arm64/mem.c standard arm64/arm64/memcpy.S standard arm64/arm64/memmove.S standard arm64/arm64/minidump_machdep.c standard arm64/arm64/mp_machdep.c optional smp arm64/arm64/nexus.c standard arm64/arm64/ofw_machdep.c optional fdt arm64/arm64/pmap.c standard arm64/arm64/stack_machdep.c optional ddb | stack arm64/arm64/support.S standard arm64/arm64/swtch.S standard arm64/arm64/sys_machdep.c standard arm64/arm64/trap.c standard arm64/arm64/uio_machdep.c standard arm64/arm64/uma_machdep.c standard arm64/arm64/unwind.c optional ddb | kdtrace_hooks | stack arm64/arm64/vfp.c standard arm64/arm64/vm_machdep.c standard arm64/cavium/thunder_pcie_fdt.c optional soc_cavm_thunderx pci fdt arm64/cavium/thunder_pcie_pem.c optional soc_cavm_thunderx pci arm64/cavium/thunder_pcie_pem_fdt.c optional soc_cavm_thunderx pci fdt arm64/cavium/thunder_pcie_common.c optional soc_cavm_thunderx pci arm64/cloudabi64/cloudabi64_sysvec.c optional compat_cloudabi64 contrib/vchiq/interface/compat/vchi_bsd.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -Wno-unused -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_arm.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -Wno-unused -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_connected.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_core.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_kern_lib.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_shim.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_util.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" crypto/armv8/armv8_crypto.c optional armv8crypto armv8_crypto_wrap.o optional armv8crypto \ dependency "$S/crypto/armv8/armv8_crypto_wrap.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -march=armv8a+crypto ${.IMPSRC}" \ no-implicit-rule \ clean "armv8_crypto_wrap.o" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/acpica/acpi_if.m optional acpi dev/ahci/ahci_generic.c optional ahci dev/cpufreq/cpufreq_dt.c optional cpufreq fdt dev/hwpmc/hwpmc_arm64.c optional hwpmc dev/hwpmc/hwpmc_arm64_md.c optional hwpmc dev/mbox/mbox_if.m optional soc_brcm_bcm2837 dev/mmc/host/dwmmc.c optional dwmmc fdt dev/mmc/host/dwmmc_hisi.c optional dwmmc fdt soc_hisi_hi6220 dev/ofw/ofw_cpu.c optional fdt dev/ofw/ofwpci.c optional fdt pci dev/pci/pci_host_generic.c optional pci dev/pci/pci_host_generic_fdt.c optional pci fdt dev/psci/psci.c optional psci dev/psci/psci_arm64.S optional psci dev/uart/uart_cpu_arm64.c optional uart dev/uart/uart_dev_pl011.c optional uart pl011 dev/usb/controller/dwc_otg_hisi.c optional dwcotg fdt soc_hisi_hi6220 dev/usb/controller/generic_ehci.c optional ehci acpi dev/usb/controller/generic_ohci.c optional ohci fdt dev/usb/controller/generic_usb_if.m optional ohci fdt dev/vnic/mrml_bridge.c optional vnic fdt dev/vnic/nic_main.c optional vnic pci dev/vnic/nicvf_main.c optional vnic pci pci_iov dev/vnic/nicvf_queues.c optional vnic pci pci_iov dev/vnic/thunder_bgx_fdt.c optional vnic fdt dev/vnic/thunder_bgx.c optional vnic pci dev/vnic/thunder_mdio_fdt.c optional vnic fdt dev/vnic/thunder_mdio.c optional vnic dev/vnic/lmac_if.m optional inet | inet6 | vnic kern/kern_clocksource.c standard kern/msi_if.m optional intrng kern/pic_if.m optional intrng kern/subr_devmap.c standard kern/subr_intr.c optional intrng libkern/bcmp.c standard libkern/ffs.c standard libkern/ffsl.c standard libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/memset.c standard cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S optional zfs | dtrace compile-with "${CDDL_C}" cddl/dev/dtrace/aarch64/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/aarch64/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/aarch64/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 7b3cfb0a24e7..d553d24737ba 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -1,648 +1,648 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # cloudabi32_vdso.o optional compat_cloudabi32 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_i686.S" \ compile-with "${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_i686.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi32_vdso.o" # cloudabi32_vdso_blob.o optional compat_cloudabi32 \ dependency "cloudabi32_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf32-i386-freebsd --binary-architecture i386 cloudabi32_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi32_vdso_blob.o" # linux_genassym.o optional compat_linux \ dependency "$S/i386/linux/linux_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "linux_genassym.o" # linux_assym.h optional compat_linux \ dependency "$S/kern/genassym.sh linux_genassym.o" \ compile-with "sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "linux_assym.h" # linux_locore.o optional compat_linux \ dependency "linux_assym.h $S/i386/linux/linux_locore.s" \ compile-with "${CC} -x assembler-with-cpp -DLOCORE -shared -s -pipe -I. -I$S -Werror -Wall -fno-common -nostdinc -nostdlib -Wl,-T$S/i386/linux/linux_vdso.lds.s -Wl,-soname=linux_vdso.so,--eh-frame-hdr,-fPIC,-warn-common ${.IMPSRC} -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "linux_locore.o" # linux_vdso.so optional compat_linux \ dependency "linux_locore.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf32-i386-freebsd --binary-architecture i386 linux_locore.o ${.TARGET}" \ no-implicit-rule \ clean "linux_vdso.so" # svr4_genassym.o optional compat_svr4 \ dependency "$S/i386/svr4/svr4_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "svr4_genassym.o" # svr4_assym.h optional compat_svr4 \ dependency "$S/kern/genassym.sh svr4_genassym.o" \ compile-with "sh $S/kern/genassym.sh svr4_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "svr4_assym.h" # font.h optional sc_dflt_font \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" # atkbdmap.h optional atkbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "atkbdmap.h" # ukbdmap.h optional ukbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "ukbdmap.h" # hpt27xx_lib.o optional hpt27xx \ dependency "$S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \ compile-with "uudecode < $S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \ no-implicit-rule # hptmvraid.o optional hptmv \ dependency "$S/dev/hptmv/i386-elf.raid.o.uu" \ compile-with "uudecode < $S/dev/hptmv/i386-elf.raid.o.uu" \ no-implicit-rule # hptnr_lib.o optional hptnr \ dependency "$S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \ no-implicit-rule # hptrr_lib.o optional hptrr \ dependency "$S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \ no-implicit-rule # cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs | dtrace compile-with "${ZFS_S}" cddl/dev/dtrace/i386/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/i386/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/x86/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" cddl/dev/dtrace/x86/dis_tables.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" cddl/dev/dtrace/x86/instr_size.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs compat/linux/linux_event.c optional compat_linux compat/linux/linux_emul.c optional compat_linux compat/linux/linux_file.c optional compat_linux compat/linux/linux_fork.c optional compat_linux compat/linux/linux_futex.c optional compat_linux compat/linux/linux_getcwd.c optional compat_linux compat/linux/linux_ioctl.c optional compat_linux compat/linux/linux_ipc.c optional compat_linux compat/linux/linux_mib.c optional compat_linux compat/linux/linux_misc.c optional compat_linux compat/linux/linux_mmap.c optional compat_linux compat/linux/linux_signal.c optional compat_linux compat/linux/linux_socket.c optional compat_linux compat/linux/linux_stats.c optional compat_linux compat/linux/linux_sysctl.c optional compat_linux compat/linux/linux_time.c optional compat_linux compat/linux/linux_timer.c optional compat_linux compat/linux/linux_uid16.c optional compat_linux compat/linux/linux_util.c optional compat_linux compat/linux/linux_vdso.c optional compat_linux compat/linux/linux.c optional compat_linux compat/ndis/kern_ndis.c optional ndisapi pci compat/ndis/kern_windrv.c optional ndisapi pci compat/ndis/subr_hal.c optional ndisapi pci compat/ndis/subr_ndis.c optional ndisapi pci compat/ndis/subr_ntoskrnl.c optional ndisapi pci compat/ndis/subr_pe.c optional ndisapi pci compat/ndis/subr_usbd.c optional ndisapi pci compat/ndis/winx32_wrap.S optional ndisapi pci compat/svr4/imgact_svr4.c optional compat_svr4 compat/svr4/svr4_fcntl.c optional compat_svr4 compat/svr4/svr4_filio.c optional compat_svr4 compat/svr4/svr4_ioctl.c optional compat_svr4 compat/svr4/svr4_ipc.c optional compat_svr4 compat/svr4/svr4_misc.c optional compat_svr4 compat/svr4/svr4_resource.c optional compat_svr4 compat/svr4/svr4_signal.c optional compat_svr4 compat/svr4/svr4_socket.c optional compat_svr4 compat/svr4/svr4_sockio.c optional compat_svr4 compat/svr4/svr4_stat.c optional compat_svr4 compat/svr4/svr4_stream.c optional compat_svr4 compat/svr4/svr4_syscallnames.c optional compat_svr4 compat/svr4/svr4_sysent.c optional compat_svr4 compat/svr4/svr4_sysvec.c optional compat_svr4 compat/svr4/svr4_termios.c optional compat_svr4 -bf_enc.o optional crypto | ipsec \ +bf_enc.o optional crypto | ipsec | ipsec_support \ dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ no-implicit-rule crypto/aesni/aeskeys_i386.S optional aesni crypto/aesni/aesni.c optional aesni aesni_ghash.o optional aesni \ dependency "$S/crypto/aesni/aesni_ghash.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_ghash.o" aesni_wrap.o optional aesni \ dependency "$S/crypto/aesni/aesni_wrap.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_wrap.o" -crypto/des/arch/i386/des_enc.S optional crypto | ipsec | netsmb +crypto/des/arch/i386/des_enc.S optional crypto | ipsec | ipsec_support | netsmb crypto/via/padlock.c optional padlock crypto/via/padlock_cipher.c optional padlock crypto/via/padlock_hash.c optional padlock dev/advansys/adv_isa.c optional adv isa dev/agp/agp_ali.c optional agp dev/agp/agp_amd.c optional agp dev/agp/agp_amd64.c optional agp dev/agp/agp_ati.c optional agp dev/agp/agp_i810.c optional agp dev/agp/agp_intel.c optional agp dev/agp/agp_nvidia.c optional agp dev/agp/agp_sis.c optional agp dev/agp/agp_via.c optional agp dev/aic/aic_isa.c optional aic isa dev/amdsbwd/amdsbwd.c optional amdsbwd dev/amdtemp/amdtemp.c optional amdtemp dev/arcmsr/arcmsr.c optional arcmsr pci dev/asmc/asmc.c optional asmc isa dev/atkbdc/atkbd.c optional atkbd atkbdc dev/atkbdc/atkbd_atkbdc.c optional atkbd atkbdc dev/atkbdc/atkbdc.c optional atkbdc dev/atkbdc/atkbdc_isa.c optional atkbdc isa dev/atkbdc/atkbdc_subr.c optional atkbdc dev/atkbdc/psm.c optional psm atkbdc dev/bxe/bxe.c optional bxe pci dev/bxe/bxe_stats.c optional bxe pci dev/bxe/bxe_debug.c optional bxe pci dev/bxe/ecore_sp.c optional bxe pci dev/bxe/bxe_elink.c optional bxe pci dev/bxe/57710_init_values.c optional bxe pci dev/bxe/57711_init_values.c optional bxe pci dev/bxe/57712_init_values.c optional bxe pci dev/ce/ceddk.c optional ce dev/ce/if_ce.c optional ce dev/ce/tau32-ddk.c optional ce \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/cm/if_cm_isa.c optional cm isa dev/coretemp/coretemp.c optional coretemp dev/cp/cpddk.c optional cp dev/cp/if_cp.c optional cp dev/cpuctl/cpuctl.c optional cpuctl dev/ctau/ctau.c optional ctau dev/ctau/ctddk.c optional ctau dev/ctau/if_ct.c optional ctau dev/cx/csigma.c optional cx dev/cx/cxddk.c optional cx dev/cx/if_cx.c optional cx dev/dpms/dpms.c optional dpms dev/ed/if_ed_3c503.c optional ed isa ed_3c503 dev/ed/if_ed_isa.c optional ed isa dev/ed/if_ed_wd80x3.c optional ed isa dev/ed/if_ed_hpp.c optional ed isa ed_hpp dev/ed/if_ed_sic.c optional ed isa ed_sic dev/fb/fb.c optional fb | vga dev/fb/s3_pci.c optional s3pci dev/fb/vesa.c optional vga vesa dev/fb/vga.c optional vga dev/fdc/fdc.c optional fdc dev/fdc/fdc_acpi.c optional fdc dev/fdc/fdc_isa.c optional fdc isa dev/fdc/fdc_pccard.c optional fdc pccard dev/fe/if_fe_isa.c optional fe isa dev/glxiic/glxiic.c optional glxiic dev/glxsb/glxsb.c optional glxsb dev/glxsb/glxsb_hash.c optional glxsb dev/gpio/bytgpio.c optional bytgpio dev/hpt27xx/hpt27xx_os_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_osm_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_config.c optional hpt27xx dev/hptmv/entry.c optional hptmv dev/hptmv/mv.c optional hptmv dev/hptmv/gui_lib.c optional hptmv dev/hptmv/hptproc.c optional hptmv dev/hptmv/ioctl.c optional hptmv dev/hptnr/hptnr_os_bsd.c optional hptnr dev/hptnr/hptnr_osm_bsd.c optional hptnr dev/hptnr/hptnr_config.c optional hptnr dev/hptrr/hptrr_os_bsd.c optional hptrr dev/hptrr/hptrr_osm_bsd.c optional hptrr dev/hptrr/hptrr_config.c optional hptrr dev/hwpmc/hwpmc_amd.c optional hwpmc dev/hwpmc/hwpmc_intel.c optional hwpmc dev/hwpmc/hwpmc_core.c optional hwpmc dev/hwpmc/hwpmc_uncore.c optional hwpmc dev/hwpmc/hwpmc_pentium.c optional hwpmc dev/hwpmc/hwpmc_piv.c optional hwpmc dev/hwpmc/hwpmc_ppro.c optional hwpmc dev/hwpmc/hwpmc_tsc.c optional hwpmc dev/hwpmc/hwpmc_x86.c optional hwpmc dev/hyperv/pcib/vmbus_pcib.c optional hyperv pci dev/hyperv/netvsc/hn_nvs.c optional hyperv dev/hyperv/netvsc/hn_rndis.c optional hyperv dev/hyperv/netvsc/if_hn.c optional hyperv dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c optional hyperv dev/hyperv/utilities/hv_kvp.c optional hyperv dev/hyperv/utilities/hv_snapshot.c optional hyperv dev/hyperv/utilities/vmbus_heartbeat.c optional hyperv dev/hyperv/utilities/vmbus_ic.c optional hyperv dev/hyperv/utilities/vmbus_shutdown.c optional hyperv dev/hyperv/utilities/vmbus_timesync.c optional hyperv dev/hyperv/vmbus/hyperv.c optional hyperv dev/hyperv/vmbus/hyperv_busdma.c optional hyperv dev/hyperv/vmbus/vmbus.c optional hyperv pci dev/hyperv/vmbus/vmbus_br.c optional hyperv dev/hyperv/vmbus/vmbus_chan.c optional hyperv dev/hyperv/vmbus/vmbus_et.c optional hyperv dev/hyperv/vmbus/vmbus_if.m optional hyperv dev/hyperv/vmbus/vmbus_xact.c optional hyperv dev/hyperv/vmbus/i386/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/i386/vmbus_vector.S optional hyperv dev/ichwd/ichwd.c optional ichwd dev/if_ndis/if_ndis.c optional ndis dev/if_ndis/if_ndis_pccard.c optional ndis pccard dev/if_ndis/if_ndis_pci.c optional ndis cardbus | ndis pci dev/if_ndis/if_ndis_usb.c optional ndis usb dev/intel/spi.c optional intelspi dev/io/iodev.c optional io dev/ipmi/ipmi.c optional ipmi dev/ipmi/ipmi_acpi.c optional ipmi acpi dev/ipmi/ipmi_isa.c optional ipmi isa dev/ipmi/ipmi_kcs.c optional ipmi dev/ipmi/ipmi_smic.c optional ipmi dev/ipmi/ipmi_smbus.c optional ipmi smbus dev/ipmi/ipmi_smbios.c optional ipmi dev/ipmi/ipmi_ssif.c optional ipmi smbus dev/ipmi/ipmi_pci.c optional ipmi pci dev/ipmi/ipmi_linux.c optional ipmi compat_linux dev/le/if_le_isa.c optional le isa dev/mse/mse.c optional mse dev/mse/mse_isa.c optional mse isa dev/nfe/if_nfe.c optional nfe pci dev/ntb/if_ntb/if_ntb.c optional if_ntb dev/ntb/ntb_transport.c optional if_ntb dev/ntb/ntb.c optional if_ntb | ntb_hw dev/ntb/ntb_if.m optional if_ntb | ntb_hw dev/ntb/ntb_hw/ntb_hw.c optional ntb_hw dev/nvd/nvd.c optional nvd nvme dev/nvme/nvme.c optional nvme dev/nvme/nvme_ctrlr.c optional nvme dev/nvme/nvme_ctrlr_cmd.c optional nvme dev/nvme/nvme_ns.c optional nvme dev/nvme/nvme_ns_cmd.c optional nvme dev/nvme/nvme_qpair.c optional nvme dev/nvme/nvme_sysctl.c optional nvme dev/nvme/nvme_test.c optional nvme dev/nvme/nvme_util.c optional nvme dev/nvram/nvram.c optional nvram isa dev/ofw/ofwpci.c optional fdt pci dev/pcf/pcf_isa.c optional pcf dev/random/ivy.c optional rdrand_rng dev/random/nehemiah.c optional padlock_rng dev/sbni/if_sbni.c optional sbni dev/sbni/if_sbni_isa.c optional sbni isa dev/sbni/if_sbni_pci.c optional sbni pci dev/sio/sio.c optional sio dev/sio/sio_isa.c optional sio isa dev/sio/sio_pccard.c optional sio pccard dev/sio/sio_pci.c optional sio pci dev/sio/sio_puc.c optional sio puc dev/speaker/spkr.c optional speaker dev/syscons/apm/apm_saver.c optional apm_saver apm dev/syscons/scterm-teken.c optional sc dev/syscons/scvesactl.c optional sc vga vesa dev/syscons/scvgarndr.c optional sc vga dev/syscons/scvtb.c optional sc dev/tpm/tpm.c optional tpm dev/tpm/tpm_acpi.c optional tpm acpi dev/tpm/tpm_isa.c optional tpm isa dev/uart/uart_cpu_x86.c optional uart dev/viawd/viawd.c optional viawd dev/vmware/vmxnet3/if_vmx.c optional vmx dev/acpica/acpi_if.m standard dev/acpica/acpi_hpet.c optional acpi dev/acpica/acpi_timer.c optional acpi dev/acpi_support/acpi_wmi_if.m standard dev/wbwd/wbwd.c optional wbwd dev/wpi/if_wpi.c optional wpi dev/isci/isci.c optional isci dev/isci/isci_controller.c optional isci dev/isci/isci_domain.c optional isci dev/isci/isci_interrupt.c optional isci dev/isci/isci_io_request.c optional isci dev/isci/isci_logger.c optional isci dev/isci/isci_oem_parameters.c optional isci dev/isci/isci_remote_device.c optional isci dev/isci/isci_sysctl.c optional isci dev/isci/isci_task_request.c optional isci dev/isci/isci_timer.c optional isci dev/isci/scil/sati.c optional isci dev/isci/scil/sati_abort_task_set.c optional isci dev/isci/scil/sati_atapi.c optional isci dev/isci/scil/sati_device.c optional isci dev/isci/scil/sati_inquiry.c optional isci dev/isci/scil/sati_log_sense.c optional isci dev/isci/scil/sati_lun_reset.c optional isci dev/isci/scil/sati_mode_pages.c optional isci dev/isci/scil/sati_mode_select.c optional isci dev/isci/scil/sati_mode_sense.c optional isci dev/isci/scil/sati_mode_sense_10.c optional isci dev/isci/scil/sati_mode_sense_6.c optional isci dev/isci/scil/sati_move.c optional isci dev/isci/scil/sati_passthrough.c optional isci dev/isci/scil/sati_read.c optional isci dev/isci/scil/sati_read_buffer.c optional isci dev/isci/scil/sati_read_capacity.c optional isci dev/isci/scil/sati_reassign_blocks.c optional isci dev/isci/scil/sati_report_luns.c optional isci dev/isci/scil/sati_request_sense.c optional isci dev/isci/scil/sati_start_stop_unit.c optional isci dev/isci/scil/sati_synchronize_cache.c optional isci dev/isci/scil/sati_test_unit_ready.c optional isci dev/isci/scil/sati_unmap.c optional isci dev/isci/scil/sati_util.c optional isci dev/isci/scil/sati_verify.c optional isci dev/isci/scil/sati_write.c optional isci dev/isci/scil/sati_write_and_verify.c optional isci dev/isci/scil/sati_write_buffer.c optional isci dev/isci/scil/sati_write_long.c optional isci dev/isci/scil/sci_abstract_list.c optional isci dev/isci/scil/sci_base_controller.c optional isci dev/isci/scil/sci_base_domain.c optional isci dev/isci/scil/sci_base_iterator.c optional isci dev/isci/scil/sci_base_library.c optional isci dev/isci/scil/sci_base_logger.c optional isci dev/isci/scil/sci_base_memory_descriptor_list.c optional isci dev/isci/scil/sci_base_memory_descriptor_list_decorator.c optional isci dev/isci/scil/sci_base_object.c optional isci dev/isci/scil/sci_base_observer.c optional isci dev/isci/scil/sci_base_phy.c optional isci dev/isci/scil/sci_base_port.c optional isci dev/isci/scil/sci_base_remote_device.c optional isci dev/isci/scil/sci_base_request.c optional isci dev/isci/scil/sci_base_state_machine.c optional isci dev/isci/scil/sci_base_state_machine_logger.c optional isci dev/isci/scil/sci_base_state_machine_observer.c optional isci dev/isci/scil/sci_base_subject.c optional isci dev/isci/scil/sci_util.c optional isci dev/isci/scil/scic_sds_controller.c optional isci dev/isci/scil/scic_sds_library.c optional isci dev/isci/scil/scic_sds_pci.c optional isci dev/isci/scil/scic_sds_phy.c optional isci dev/isci/scil/scic_sds_port.c optional isci dev/isci/scil/scic_sds_port_configuration_agent.c optional isci dev/isci/scil/scic_sds_remote_device.c optional isci dev/isci/scil/scic_sds_remote_node_context.c optional isci dev/isci/scil/scic_sds_remote_node_table.c optional isci dev/isci/scil/scic_sds_request.c optional isci dev/isci/scil/scic_sds_sgpio.c optional isci dev/isci/scil/scic_sds_smp_remote_device.c optional isci dev/isci/scil/scic_sds_smp_request.c optional isci dev/isci/scil/scic_sds_ssp_request.c optional isci dev/isci/scil/scic_sds_stp_packet_request.c optional isci dev/isci/scil/scic_sds_stp_remote_device.c optional isci dev/isci/scil/scic_sds_stp_request.c optional isci dev/isci/scil/scic_sds_unsolicited_frame_control.c optional isci dev/isci/scil/scif_sas_controller.c optional isci dev/isci/scil/scif_sas_controller_state_handlers.c optional isci dev/isci/scil/scif_sas_controller_states.c optional isci dev/isci/scil/scif_sas_domain.c optional isci dev/isci/scil/scif_sas_domain_state_handlers.c optional isci dev/isci/scil/scif_sas_domain_states.c optional isci dev/isci/scil/scif_sas_high_priority_request_queue.c optional isci dev/isci/scil/scif_sas_internal_io_request.c optional isci dev/isci/scil/scif_sas_io_request.c optional isci dev/isci/scil/scif_sas_io_request_state_handlers.c optional isci dev/isci/scil/scif_sas_io_request_states.c optional isci dev/isci/scil/scif_sas_library.c optional isci dev/isci/scil/scif_sas_remote_device.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substates.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substates.c optional isci dev/isci/scil/scif_sas_remote_device_state_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_states.c optional isci dev/isci/scil/scif_sas_request.c optional isci dev/isci/scil/scif_sas_smp_activity_clear_affiliation.c optional isci dev/isci/scil/scif_sas_smp_io_request.c optional isci dev/isci/scil/scif_sas_smp_phy.c optional isci dev/isci/scil/scif_sas_smp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_io_request.c optional isci dev/isci/scil/scif_sas_stp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_task_request.c optional isci dev/isci/scil/scif_sas_task_request.c optional isci dev/isci/scil/scif_sas_task_request_state_handlers.c optional isci dev/isci/scil/scif_sas_task_request_states.c optional isci dev/isci/scil/scif_sas_timer.c optional isci i386/acpica/acpi_machdep.c optional acpi acpi_wakecode.o optional acpi \ dependency "$S/i386/acpica/acpi_wakecode.S assym.s" \ compile-with "${NORMAL_S}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.o" acpi_wakecode.bin optional acpi \ dependency "acpi_wakecode.o" \ compile-with "${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.bin" acpi_wakecode.h optional acpi \ dependency "acpi_wakecode.bin" \ compile-with "file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.h" acpi_wakedata.h optional acpi \ dependency "acpi_wakecode.o" \ compile-with '${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define $${what} 0x$${offset}"; done > ${.TARGET}' \ no-obj no-implicit-rule before-depend \ clean "acpi_wakedata.h" # i386/bios/apm.c optional apm i386/bios/mca_machdep.c optional mca i386/bios/smapi.c optional smapi i386/bios/smapi_bios.S optional smapi i386/cloudabi32/cloudabi32_sysvec.c optional compat_cloudabi32 #i386/i386/apic_vector.s optional apic i386/i386/atomic.c standard \ compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}" i386/i386/bios.c standard i386/i386/bioscall.s standard i386/i386/bpf_jit_machdep.c optional bpf_jitter i386/i386/db_disasm.c optional ddb i386/i386/db_interface.c optional ddb i386/i386/db_trace.c optional ddb i386/i386/elan-mmcr.c optional cpu_elan | cpu_soekris i386/i386/elf_machdep.c standard i386/i386/exception.s standard i386/i386/gdb_machdep.c optional gdb i386/i386/geode.c optional cpu_geode i386/i386/i686_mem.c optional mem i386/i386/in_cksum.c optional inet | inet6 i386/i386/initcpu.c standard i386/i386/io.c optional io i386/i386/k6_mem.c optional mem i386/i386/locore.s standard no-obj i386/i386/longrun.c optional cpu_enable_longrun i386/i386/machdep.c standard i386/i386/mem.c optional mem i386/i386/minidump_machdep.c standard i386/i386/mp_clock.c optional smp i386/i386/mp_machdep.c optional smp i386/i386/mpboot.s optional smp i386/i386/perfmon.c optional perfmon i386/i386/pmap.c standard i386/i386/ptrace_machdep.c standard i386/i386/support.s standard i386/i386/swtch.s standard i386/i386/sys_machdep.c standard i386/i386/trap.c standard i386/i386/uio_machdep.c standard i386/i386/vm86.c standard i386/i386/vm_machdep.c standard i386/ibcs2/ibcs2_errno.c optional ibcs2 i386/ibcs2/ibcs2_fcntl.c optional ibcs2 i386/ibcs2/ibcs2_ioctl.c optional ibcs2 i386/ibcs2/ibcs2_ipc.c optional ibcs2 i386/ibcs2/ibcs2_isc.c optional ibcs2 i386/ibcs2/ibcs2_isc_sysent.c optional ibcs2 i386/ibcs2/ibcs2_misc.c optional ibcs2 i386/ibcs2/ibcs2_msg.c optional ibcs2 i386/ibcs2/ibcs2_other.c optional ibcs2 i386/ibcs2/ibcs2_signal.c optional ibcs2 i386/ibcs2/ibcs2_socksys.c optional ibcs2 i386/ibcs2/ibcs2_stat.c optional ibcs2 i386/ibcs2/ibcs2_sysent.c optional ibcs2 i386/ibcs2/ibcs2_sysi86.c optional ibcs2 i386/ibcs2/ibcs2_sysvec.c optional ibcs2 i386/ibcs2/ibcs2_util.c optional ibcs2 i386/ibcs2/ibcs2_xenix.c optional ibcs2 i386/ibcs2/ibcs2_xenix_sysent.c optional ibcs2 i386/ibcs2/imgact_coff.c optional ibcs2 i386/isa/elink.c optional ep | ie i386/isa/npx.c standard i386/isa/pmtimer.c optional pmtimer i386/isa/prof_machdep.c optional profiling-routine i386/linux/imgact_linux.c optional compat_linux i386/linux/linux_dummy.c optional compat_linux i386/linux/linux_machdep.c optional compat_linux i386/linux/linux_ptrace.c optional compat_linux i386/linux/linux_support.s optional compat_linux \ dependency "linux_assym.h" i386/linux/linux_sysent.c optional compat_linux i386/linux/linux_sysvec.c optional compat_linux i386/pci/pci_cfgreg.c optional pci i386/pci/pci_pir.c optional pci i386/svr4/svr4_locore.s optional compat_svr4 \ dependency "svr4_assym.h" \ warning "COMPAT_SVR4 is broken and should be avoided" i386/svr4/svr4_machdep.c optional compat_svr4 # isa/syscons_isa.c optional sc isa/vga_isa.c optional vga kern/kern_clocksource.c standard kern/imgact_aout.c optional compat_aout kern/imgact_gzip.c optional gzip kern/subr_sfbuf.c standard crc32_sse42.o standard \ dependency "$S/libkern/x86/crc32_sse42.c" \ compile-with "${CC} -c ${CFLAGS:N-nostdinc} ${WERROR} ${PROF} -msse4 ${.IMPSRC}" \ no-implicit-rule \ clean "crc32_sse42.o" libkern/divdi3.c standard libkern/ffsll.c standard libkern/flsll.c standard libkern/memmove.c standard libkern/memset.c standard libkern/moddi3.c standard libkern/qdivrem.c standard libkern/ucmpdi2.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard i386/xbox/xbox.c optional xbox i386/xbox/xboxfb.c optional xboxfb dev/fb/boot_font.c optional xboxfb i386/xbox/pic16l.s optional xbox # # x86 real mode BIOS support, required by dpms/pci/vesa # compat/x86bios/x86bios.c optional x86bios | dpms | pci | vesa # # bvm console # dev/bvm/bvm_console.c optional bvmconsole dev/bvm/bvm_dbg.c optional bvmdebug # # x86 shared code between IA32 and AMD64 architectures # x86/acpica/OsdEnvironment.c optional acpi x86/acpica/acpi_apm.c optional acpi x86/acpica/acpi_wakeup.c optional acpi x86/acpica/madt.c optional acpi apic x86/acpica/srat.c optional acpi x86/bios/smbios.c optional smbios x86/bios/vpd.c optional vpd x86/cpufreq/est.c optional cpufreq x86/cpufreq/hwpstate.c optional cpufreq x86/cpufreq/p4tcc.c optional cpufreq x86/cpufreq/powernow.c optional cpufreq x86/cpufreq/smist.c optional cpufreq x86/iommu/busdma_dmar.c optional acpi acpi_dmar pci x86/iommu/intel_ctx.c optional acpi acpi_dmar pci x86/iommu/intel_drv.c optional acpi acpi_dmar pci x86/iommu/intel_fault.c optional acpi acpi_dmar pci x86/iommu/intel_gas.c optional acpi acpi_dmar pci x86/iommu/intel_idpgtbl.c optional acpi acpi_dmar pci x86/iommu/intel_intrmap.c optional acpi acpi_dmar pci x86/iommu/intel_qi.c optional acpi acpi_dmar pci x86/iommu/intel_quirks.c optional acpi acpi_dmar pci x86/iommu/intel_utils.c optional acpi acpi_dmar pci x86/isa/atpic.c optional atpic x86/isa/atrtc.c standard x86/isa/clock.c standard x86/isa/elcr.c optional atpic | apic x86/isa/isa.c optional isa x86/isa/isa_dma.c optional isa x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci x86/x86/autoconf.c standard x86/x86/bus_machdep.c standard x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard x86/x86/cpu_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt x86/x86/identcpu.c standard x86/x86/intr_machdep.c standard x86/x86/io_apic.c optional apic x86/x86/legacy.c standard x86/x86/local_apic.c optional apic x86/x86/mca.c standard x86/x86/mptable.c optional apic x86/x86/mptable_pci.c optional apic pci x86/x86/mp_x86.c optional smp x86/x86/mp_watchdog.c optional mp_watchdog smp x86/x86/msi.c optional apic pci x86/x86/nexus.c standard x86/x86/stack_machdep.c optional ddb | stack x86/x86/tsc.c standard x86/x86/pvclock.c standard x86/x86/delay.c standard x86/xen/hvm.c optional xenhvm x86/xen/xen_intr.c optional xenhvm x86/xen/xen_apic.c optional xenhvm x86/xen/xenpv.c optional xenhvm x86/xen/xen_nexus.c optional xenhvm x86/xen/xen_msi.c optional xenhvm diff --git a/sys/conf/files.mips b/sys/conf/files.mips index 0fbdd1c3bd3e..f09010d59c45 100644 --- a/sys/conf/files.mips +++ b/sys/conf/files.mips @@ -1,112 +1,114 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # Arch dependent files mips/mips/autoconf.c standard mips/mips/bus_space_generic.c standard mips/mips/busdma_machdep.c standard mips/mips/cache.c standard mips/mips/cache_mipsNN.c standard mips/mips/cpu.c standard mips/mips/db_disasm.c optional ddb mips/mips/db_interface.c optional ddb mips/mips/db_trace.c optional ddb mips/mips/dump_machdep.c standard mips/mips/elf_machdep.c standard mips/mips/exception.S standard mips/mips/fp.S standard mips/mips/freebsd32_machdep.c optional compat_freebsd32 mips/mips/gdb_machdep.c standard mips/mips/in_cksum.c optional inet mips/mips/libkern_machdep.c standard mips/mips/locore.S standard no-obj mips/mips/machdep.c standard mips/mips/mem.c optional mem mips/mips/minidump_machdep.c standard mips/mips/mp_machdep.c optional smp mips/mips/mpboot.S optional smp mips/mips/nexus.c standard mips/mips/ofw_machdep.c optional fdt mips/mips/pm_machdep.c standard mips/mips/pmap.c standard mips/mips/ptrace_machdep.c standard mips/mips/sc_machdep.c standard mips/mips/stack_machdep.c optional ddb | stack mips/mips/stdatomic.c standard \ compile-with "${NORMAL_C:N-Wmissing-prototypes}" mips/mips/support.S standard mips/mips/bcopy.S standard mips/mips/swtch.S standard mips/mips/sys_machdep.c standard mips/mips/tlb.c standard mips/mips/trap.c standard mips/mips/uio_machdep.c standard mips/mips/uma_machdep.c standard mips/mips/vm_machdep.c standard # misc opt-in bits kern/kern_clocksource.c standard kern/link_elf_obj.c standard kern/subr_busdma_bufalloc.c standard kern/subr_dummy_vdso_tc.c standard kern/subr_sfbuf.c optional mips | mipsel | mipsn32 kern/subr_sfbuf.c optional mipshf | mipselhf # gcc/clang runtime libkern/ffsl.c standard libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/memmove.c standard libkern/cmpdi2.c optional mips | mipshf | mipsel | mipselhf libkern/ucmpdi2.c optional mips | mipshf | mipsel | mipselhf libkern/ashldi3.c standard libkern/ashrdi3.c standard # cfe support dev/cfe/cfe_api.c optional cfe dev/cfe/cfe_console.c optional cfe_console dev/cfe/cfe_env.c optional cfe_env # syscons support dev/fb/fb.c optional sc dev/syscons/scgfbrndr.c optional sc dev/syscons/scterm-teken.c optional sc dev/syscons/scvtb.c optional sc mips/mips/sc_machdep.c optional sc # FDT support dev/uart/uart_cpu_fdt.c optional uart fdt # crypto support -- use generic -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | \ + ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | \ + ipsec_support | netsmb # AP common nvram interface MIPS specific, but maybe should be more generic dev/nvram2env/nvram2env_mips.c optional nvram2env dev/nvram2env/nvram2env.c optional nvram2env # hwpmc support dev/hwpmc/hwpmc_mips.c optional hwpmc dev/hwpmc/hwpmc_mips24k.c optional hwpmc_mips24k dev/hwpmc/hwpmc_mips74k.c optional hwpmc_mips74k # ofw support dev/ofw/ofwpci.c optional fdt pci # INTRNG support code kern/msi_if.m optional intrng kern/pic_if.m optional intrng kern/subr_intr.c optional intrng # INTRNG compatible MIPS32 interrupt controller mips/mips/mips_pic.c optional intrng # DTrace cddl/compat/opensolaris/kern/opensolaris_atomic.c optional zfs | dtrace compile-with "${CDDL_C}" cddl/dev/dtrace/mips/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/mips/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/mips/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index e34dcf525b59..65c81ad27e84 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -1,258 +1,258 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # # font.h optional sc \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" # # There is only an asm version on ppc64. cddl/compat/opensolaris/kern/opensolaris_atomic.c optional zfs powerpc | dtrace powerpc compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/atomic/powerpc64/opensolaris_atomic.S optional zfs powerpc64 | dtrace powerpc64 compile-with "${ZFS_S}" cddl/dev/dtrace/powerpc/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/powerpc/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/powerpc/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/bm/if_bm.c optional bm powermac dev/adb/adb_bus.c optional adb dev/adb/adb_kbd.c optional adb dev/adb/adb_mouse.c optional adb dev/adb/adb_hb_if.m optional adb dev/adb/adb_if.m optional adb dev/adb/adb_buttons.c optional adb dev/agp/agp_apple.c optional agp powermac dev/fb/fb.c optional sc dev/fdt/fdt_powerpc.c optional fdt # ofwbus depends on simplebus. dev/fdt/simplebus.c optional aim | fdt dev/hwpmc/hwpmc_e500.c optional hwpmc dev/hwpmc/hwpmc_mpc7xxx.c optional hwpmc dev/hwpmc/hwpmc_powerpc.c optional hwpmc dev/hwpmc/hwpmc_ppc970.c optional hwpmc dev/iicbus/ad7417.c optional ad7417 powermac dev/iicbus/adm1030.c optional powermac windtunnel | adm1030 powermac dev/iicbus/adt746x.c optional adt746x powermac dev/iicbus/ds1631.c optional ds1631 powermac dev/iicbus/ds1775.c optional ds1775 powermac dev/iicbus/max6690.c optional max6690 powermac dev/iicbus/ofw_iicbus.c optional iicbus aim dev/nand/nfc_fsl.c optional nand mpc85xx dev/nand/nfc_rb.c optional nand mpc85xx # ofw can be either aim or fdt: fdt case handled in files. aim only powerpc specific. dev/ofw/openfirm.c optional aim dev/ofw/openfirmio.c optional aim dev/ofw/ofw_bus_if.m optional aim dev/ofw/ofw_cpu.c optional aim dev/ofw/ofw_if.m optional aim dev/ofw/ofw_bus_subr.c optional aim dev/ofw/ofw_console.c optional aim dev/ofw/ofw_disk.c optional ofwd aim dev/ofw/ofwbus.c optional aim | fdt dev/ofw/ofwpci.c optional pci dev/ofw/ofw_standard.c optional aim powerpc dev/ofw/ofw_subr.c optional aim powerpc dev/powermac_nvram/powermac_nvram.c optional powermac_nvram powermac dev/quicc/quicc_bfe_fdt.c optional quicc mpc85xx dev/scc/scc_bfe_macio.c optional scc powermac dev/sdhci/fsl_sdhci.c optional mpc85xx sdhci dev/sec/sec.c optional sec mpc85xx dev/sound/macio/aoa.c optional snd_davbus | snd_ai2s powermac dev/sound/macio/davbus.c optional snd_davbus powermac dev/sound/macio/i2s.c optional snd_ai2s powermac dev/sound/macio/onyx.c optional snd_ai2s iicbus powermac dev/sound/macio/snapper.c optional snd_ai2s iicbus powermac dev/sound/macio/tumbler.c optional snd_ai2s iicbus powermac dev/syscons/scgfbrndr.c optional sc dev/syscons/scterm-teken.c optional sc dev/syscons/scvtb.c optional sc dev/tsec/if_tsec.c optional tsec dev/tsec/if_tsec_fdt.c optional tsec fdt dev/uart/uart_cpu_powerpc.c optional uart dev/usb/controller/ehci_fsl.c optional ehci mpc85xx dev/vt/hw/ofwfb/ofwfb.c optional vt aim kern/kern_clocksource.c standard kern/subr_dummy_vdso_tc.c standard kern/syscalls.c optional ktr kern/subr_sfbuf.c standard libkern/ashldi3.c optional powerpc | powerpcspe libkern/ashrdi3.c optional powerpc | powerpcspe libkern/bcmp.c standard libkern/cmpdi2.c optional powerpc | powerpcspe libkern/divdi3.c optional powerpc | powerpcspe libkern/ffs.c standard libkern/ffsl.c standard libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/lshrdi3.c optional powerpc | powerpcspe libkern/memmove.c standard libkern/memset.c standard libkern/moddi3.c optional powerpc | powerpcspe libkern/qdivrem.c optional powerpc | powerpcspe libkern/ucmpdi2.c optional powerpc | powerpcspe libkern/udivdi3.c optional powerpc | powerpcspe libkern/umoddi3.c optional powerpc | powerpcspe powerpc/aim/locore.S optional aim no-obj powerpc/aim/aim_machdep.c optional aim powerpc/aim/mmu_oea.c optional aim powerpc powerpc/aim/mmu_oea64.c optional aim powerpc/aim/moea64_if.m optional aim powerpc/aim/moea64_native.c optional aim powerpc/aim/mp_cpudep.c optional aim powerpc/aim/slb.c optional aim powerpc64 powerpc/booke/locore.S optional booke no-obj powerpc/booke/booke_machdep.c optional booke powerpc/booke/machdep_e500.c optional booke_e500 powerpc/booke/mp_cpudep.c optional booke smp powerpc/booke/platform_bare.c optional booke powerpc/booke/pmap.c optional booke powerpc/booke/spe.c optional powerpcspe powerpc/cpufreq/dfs.c optional cpufreq powerpc/cpufreq/pcr.c optional cpufreq aim powerpc/cpufreq/pmufreq.c optional cpufreq aim pmu powerpc/fpu/fpu_add.c optional fpu_emu powerpc/fpu/fpu_compare.c optional fpu_emu powerpc/fpu/fpu_div.c optional fpu_emu powerpc/fpu/fpu_emu.c optional fpu_emu powerpc/fpu/fpu_explode.c optional fpu_emu powerpc/fpu/fpu_implode.c optional fpu_emu powerpc/fpu/fpu_mul.c optional fpu_emu powerpc/fpu/fpu_sqrt.c optional fpu_emu powerpc/fpu/fpu_subr.c optional fpu_emu powerpc/mambo/mambocall.S optional mambo powerpc/mambo/mambo.c optional mambo powerpc/mambo/mambo_console.c optional mambo powerpc/mambo/mambo_disk.c optional mambo powerpc/mikrotik/platform_rb.c optional mikrotik powerpc/mpc85xx/atpic.c optional mpc85xx isa powerpc/mpc85xx/ds1553_bus_fdt.c optional ds1553 fdt powerpc/mpc85xx/ds1553_core.c optional ds1553 powerpc/mpc85xx/fsl_diu.c optional mpc85xx diu powerpc/mpc85xx/i2c.c optional iicbus fdt powerpc/mpc85xx/isa.c optional mpc85xx isa powerpc/mpc85xx/lbc.c optional mpc85xx powerpc/mpc85xx/mpc85xx.c optional mpc85xx powerpc/mpc85xx/mpc85xx_gpio.c optional mpc85xx gpio powerpc/mpc85xx/platform_mpc85xx.c optional mpc85xx powerpc/mpc85xx/pci_mpc85xx.c optional pci mpc85xx powerpc/mpc85xx/pci_mpc85xx_pcib.c optional pci mpc85xx powerpc/mpc85xx/qoriq_gpio.c optional mpc85xx gpio powerpc/ofw/ofw_machdep.c standard powerpc/ofw/ofw_pcibus.c optional pci powerpc/ofw/ofw_pcib_pci.c optional pci powerpc/ofw/ofw_real.c optional aim powerpc/ofw/ofw_syscons.c optional sc aim powerpc/ofw/ofwcall32.S optional aim powerpc powerpc/ofw/ofwcall64.S optional aim powerpc64 powerpc/ofw/ofwmagic.S optional aim powerpc/ofw/openpic_ofw.c optional aim | fdt powerpc/ofw/rtas.c optional aim powerpc/powermac/ata_kauai.c optional powermac ata | powermac atamacio powerpc/powermac/ata_macio.c optional powermac ata | powermac atamacio powerpc/powermac/ata_dbdma.c optional powermac ata | powermac atamacio powerpc/powermac/atibl.c optional powermac atibl powerpc/powermac/cuda.c optional powermac cuda powerpc/powermac/cpcht.c optional powermac pci powerpc/powermac/dbdma.c optional powermac pci powerpc/powermac/fcu.c optional powermac fcu powerpc/powermac/grackle.c optional powermac pci powerpc/powermac/hrowpic.c optional powermac pci powerpc/powermac/kiic.c optional powermac kiic powerpc/powermac/macgpio.c optional powermac pci powerpc/powermac/macio.c optional powermac pci powerpc/powermac/nvbl.c optional powermac nvbl powerpc/powermac/platform_powermac.c optional powermac powerpc/powermac/powermac_thermal.c optional powermac powerpc/powermac/pswitch.c optional powermac pswitch powerpc/powermac/pmu.c optional powermac pmu powerpc/powermac/smu.c optional powermac smu powerpc/powermac/smusat.c optional powermac smu powerpc/powermac/uninorth.c optional powermac powerpc/powermac/uninorthpci.c optional powermac pci powerpc/powermac/vcoregpio.c optional powermac powerpc/powerpc/altivec.c optional powerpc | powerpc64 powerpc/powerpc/autoconf.c standard powerpc/powerpc/bcopy.c standard powerpc/powerpc/bus_machdep.c standard powerpc/powerpc/busdma_machdep.c standard powerpc/powerpc/clock.c standard powerpc/powerpc/copyinout.c standard powerpc/powerpc/copystr.c standard powerpc/powerpc/cpu.c standard powerpc/powerpc/db_disasm.c optional ddb powerpc/powerpc/db_hwwatch.c optional ddb powerpc/powerpc/db_interface.c optional ddb powerpc/powerpc/db_trace.c optional ddb powerpc/powerpc/dump_machdep.c standard powerpc/powerpc/elf32_machdep.c optional powerpc | powerpcspe | compat_freebsd32 powerpc/powerpc/elf64_machdep.c optional powerpc64 powerpc/powerpc/exec_machdep.c standard powerpc/powerpc/fpu.c standard powerpc/powerpc/fuswintr.c standard powerpc/powerpc/gdb_machdep.c optional gdb powerpc/powerpc/in_cksum.c optional inet | inet6 powerpc/powerpc/interrupt.c standard powerpc/powerpc/intr_machdep.c standard powerpc/powerpc/iommu_if.m standard powerpc/powerpc/machdep.c standard powerpc/powerpc/mem.c optional mem powerpc/powerpc/mmu_if.m standard powerpc/powerpc/mp_machdep.c optional smp powerpc/powerpc/nexus.c standard powerpc/powerpc/openpic.c standard powerpc/powerpc/pic_if.m standard powerpc/powerpc/pmap_dispatch.c standard powerpc/powerpc/platform.c standard powerpc/powerpc/platform_if.m standard powerpc/powerpc/ptrace_machdep.c standard powerpc/powerpc/sc_machdep.c optional sc powerpc/powerpc/setjmp.S standard powerpc/powerpc/sigcode32.S optional powerpc | powerpcspe | compat_freebsd32 powerpc/powerpc/sigcode64.S optional powerpc64 powerpc/powerpc/swtch32.S optional powerpc | powerpcspe powerpc/powerpc/swtch64.S optional powerpc64 powerpc/powerpc/stack_machdep.c optional ddb | stack powerpc/powerpc/suswintr.c standard powerpc/powerpc/syncicache.c standard powerpc/powerpc/sys_machdep.c standard powerpc/powerpc/trap.c standard powerpc/powerpc/uio_machdep.c standard powerpc/powerpc/uma_machdep.c standard powerpc/powerpc/vm_machdep.c standard powerpc/ps3/ehci_ps3.c optional ps3 ehci powerpc/ps3/ohci_ps3.c optional ps3 ohci powerpc/ps3/if_glc.c optional ps3 glc powerpc/ps3/mmu_ps3.c optional ps3 powerpc/ps3/platform_ps3.c optional ps3 powerpc/ps3/ps3bus.c optional ps3 powerpc/ps3/ps3cdrom.c optional ps3 scbus powerpc/ps3/ps3disk.c optional ps3 powerpc/ps3/ps3pic.c optional ps3 powerpc/ps3/ps3_syscons.c optional ps3 vt powerpc/ps3/ps3-hvcall.S optional ps3 powerpc/pseries/phyp-hvcall.S optional pseries powerpc64 powerpc/pseries/mmu_phyp.c optional pseries powerpc64 powerpc/pseries/phyp_console.c optional pseries powerpc64 uart powerpc/pseries/phyp_llan.c optional llan powerpc/pseries/phyp_vscsi.c optional pseries powerpc64 scbus powerpc/pseries/platform_chrp.c optional pseries powerpc/pseries/plpar_iommu.c optional pseries powerpc64 powerpc/pseries/plpar_pcibus.c optional pseries powerpc64 pci powerpc/pseries/rtas_dev.c optional pseries powerpc/pseries/rtas_pci.c optional pseries pci powerpc/pseries/vdevice.c optional pseries powerpc64 powerpc/pseries/xics.c optional pseries powerpc64 powerpc/psim/iobus.c optional psim powerpc/psim/ata_iobus.c optional ata psim powerpc/psim/openpic_iobus.c optional psim powerpc/psim/uart_iobus.c optional uart psim diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv index e6908bed8ae2..2a27bb1f18ad 100644 --- a/sys/conf/files.riscv +++ b/sys/conf/files.riscv @@ -1,59 +1,59 @@ # $FreeBSD$ cddl/compat/opensolaris/kern/opensolaris_atomic.c optional zfs | dtrace compile-with "${CDDL_C}" cddl/dev/dtrace/riscv/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/riscv/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/riscv/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/ofw/ofw_cpu.c optional fdt dev/uart/uart_cpu_fdt.c optional uart fdt dev/xilinx/axi_quad_spi.c optional xilinx_spi kern/kern_clocksource.c standard kern/subr_devmap.c standard kern/subr_dummy_vdso_tc.c standard libkern/bcmp.c standard libkern/ffs.c standard libkern/ffsl.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/memmove.c standard libkern/memset.c standard riscv/riscv/autoconf.c standard riscv/riscv/bcopy.c standard riscv/riscv/bus_machdep.c standard riscv/riscv/bus_space_asm.S standard riscv/riscv/busdma_machdep.c standard riscv/riscv/clock.c standard riscv/riscv/copyinout.S standard riscv/riscv/copystr.c standard riscv/riscv/cpufunc_asm.S standard riscv/riscv/db_disasm.c optional ddb riscv/riscv/db_interface.c optional ddb riscv/riscv/db_trace.c optional ddb riscv/riscv/dump_machdep.c standard riscv/riscv/elf_machdep.c standard riscv/riscv/exception.S standard riscv/riscv/intr_machdep.c standard riscv/riscv/in_cksum.c optional inet | inet6 riscv/riscv/identcpu.c standard riscv/riscv/locore.S standard no-obj riscv/riscv/machdep.c standard riscv/riscv/minidump_machdep.c standard riscv/riscv/mp_machdep.c optional smp riscv/riscv/mem.c standard riscv/riscv/nexus.c standard riscv/riscv/ofw_machdep.c optional fdt riscv/riscv/pmap.c standard riscv/riscv/riscv_console.c optional rcons riscv/riscv/sbi.S standard riscv/riscv/stack_machdep.c optional ddb | stack riscv/riscv/support.S standard riscv/riscv/swtch.S standard riscv/riscv/sys_machdep.c standard riscv/riscv/trap.c standard riscv/riscv/timer.c standard riscv/riscv/uio_machdep.c standard riscv/riscv/uma_machdep.c standard riscv/riscv/unwind.c optional ddb | kdtrace_hooks | stack riscv/riscv/vm_machdep.c standard diff --git a/sys/conf/files.sparc64 b/sys/conf/files.sparc64 index a9643bd98e44..ab543f1cac62 100644 --- a/sys/conf/files.sparc64 +++ b/sys/conf/files.sparc64 @@ -1,151 +1,151 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # atkbdmap.h optional atkbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "atkbdmap.h" # sunkbdmap.h optional sunkbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${SUNKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > sunkbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "sunkbdmap.h" # ukbdmap.h optional ukbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "ukbdmap.h" # cddl/contrib/opensolaris/common/atomic/sparc64/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/atkbdc/atkbd.c optional atkbd atkbdc dev/atkbdc/atkbd_atkbdc.c optional atkbd atkbdc dev/atkbdc/atkbdc.c optional atkbdc dev/atkbdc/atkbdc_ebus.c optional atkbdc ebus dev/atkbdc/atkbdc_isa.c optional atkbdc isa dev/atkbdc/atkbdc_subr.c optional atkbdc dev/atkbdc/psm.c optional psm atkbdc dev/auxio/auxio.c optional auxio sbus | auxio ebus dev/esp/esp_sbus.c optional esp sbus dev/fb/creator.c optional creator sc dev/fb/creator_vt.c optional creator vt dev/fb/fb.c optional sc dev/fb/gallant12x22.c optional sc dev/fb/machfb.c optional machfb sc dev/hwpmc/hwpmc_sparc64.c optional hwpmc dev/le/if_le_lebuffer.c optional le sbus dev/le/if_le_ledma.c optional le sbus dev/le/lebuffer_sbus.c optional le sbus dev/ofw/ofw_bus_if.m standard dev/ofw/ofw_bus_subr.c standard dev/ofw/ofw_console.c optional ofw_console dev/ofw/ofw_if.m standard dev/ofw/ofw_standard.c standard dev/ofw/openfirm.c standard dev/ofw/openfirmio.c standard dev/ofw/openpromio.c standard dev/pcf/envctrl.c optional pcf ebus dev/pcf/pcf_ebus.c optional pcf ebus dev/sound/sbus/cs4231.c optional snd_audiocs ebus | \ snd_audiocs sbus dev/syscons/scgfbrndr.c optional sc dev/syscons/scterm-teken.c optional sc dev/syscons/scvtb.c optional sc dev/uart/uart_cpu_sparc64.c optional uart dev/uart/uart_kbd_sun.c optional uart sc | vt dev/vt/hw/ofwfb/ofwfb.c optional vt kern/kern_clocksource.c standard kern/subr_dummy_vdso_tc.c standard kern/syscalls.c optional ktr kern/subr_sfbuf.c standard libkern/ffs.c standard libkern/ffsl.c standard libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/memmove.c standard sparc64/central/central.c optional central sparc64/ebus/ebus.c optional ebus sparc64/ebus/epic.c optional epic ebus sparc64/fhc/clkbrd.c optional fhc sparc64/fhc/fhc.c optional fhc sparc64/isa/isa.c optional isa sparc64/isa/isa_dma.c optional isa sparc64/isa/ofw_isa.c optional ebus | isa sparc64/pci/apb.c optional pci sparc64/pci/fire.c optional pci sparc64/pci/ofw_pci.c optional pci sparc64/pci/ofw_pcib.c optional pci sparc64/pci/ofw_pcib_subr.c optional pci sparc64/pci/ofw_pcibus.c optional pci sparc64/pci/ofw_pci_if.m optional pci sparc64/pci/psycho.c optional pci sparc64/pci/sbbc.c optional sbbc uart sparc64/pci/schizo.c optional pci sparc64/sbus/dma_sbus.c optional sbus sparc64/sbus/sbus.c optional sbus sparc64/sbus/lsi64854.c optional sbus sparc64/sparc64/ata_machdep.c optional ada | da sparc64/sparc64/autoconf.c standard sparc64/sparc64/bus_machdep.c standard sparc64/sparc64/cache.c standard sparc64/sparc64/cam_machdep.c optional scbus sparc64/sparc64/cheetah.c standard sparc64/sparc64/clock.c standard sparc64/sparc64/counter.c standard sparc64/sparc64/db_disasm.c optional ddb sparc64/sparc64/db_interface.c optional ddb sparc64/sparc64/db_trace.c optional ddb sparc64/sparc64/db_hwwatch.c optional ddb sparc64/sparc64/dump_machdep.c standard sparc64/sparc64/elf_machdep.c standard sparc64/sparc64/exception.S standard no-obj \ compile-with "${NORMAL_S} -mcpu=ultrasparc" sparc64/sparc64/eeprom.c optional eeprom ebus | eeprom fhc | \ eeprom sbus sparc64/sparc64/gdb_machdep.c optional gdb sparc64/sparc64/identcpu.c standard sparc64/sparc64/in_cksum.c optional inet | inet6 sparc64/sparc64/interrupt.S standard no-obj \ compile-with "${NORMAL_S} -mcpu=ultrasparc" sparc64/sparc64/intr_machdep.c standard sparc64/sparc64/iommu.c standard sparc64/sparc64/jbusppm.c standard sparc64/sparc64/locore.S standard no-obj sparc64/sparc64/machdep.c standard sparc64/sparc64/mem.c optional mem sparc64/sparc64/mp_exception.S optional smp \ compile-with "${NORMAL_S} -mcpu=ultrasparc" sparc64/sparc64/mp_locore.S optional smp sparc64/sparc64/mp_machdep.c optional smp sparc64/sparc64/nexus.c standard sparc64/sparc64/ofw_machdep.c standard sparc64/sparc64/pmap.c standard sparc64/sparc64/prof_machdep.c optional profiling-routine sparc64/sparc64/rtc.c optional rtc ebus | rtc isa sparc64/sparc64/rwindow.c standard sparc64/sparc64/sc_machdep.c optional sc sparc64/sparc64/schppm.c standard sparc64/sparc64/spitfire.c standard sparc64/sparc64/ssm.c standard sparc64/sparc64/stack_machdep.c optional ddb | stack sparc64/sparc64/support.S standard \ compile-with "${NORMAL_S} -mcpu=ultrasparc" sparc64/sparc64/sys_machdep.c standard sparc64/sparc64/swtch.S standard sparc64/sparc64/tick.c standard sparc64/sparc64/tlb.c standard sparc64/sparc64/trap.c standard sparc64/sparc64/tsb.c standard sparc64/sparc64/uio_machdep.c standard sparc64/sparc64/upa.c optional creator sparc64/sparc64/vm_machdep.c standard sparc64/sparc64/zeus.c standard diff --git a/sys/conf/kern.opts.mk b/sys/conf/kern.opts.mk index 8b793cf5184d..c2b3ad727d79 100644 --- a/sys/conf/kern.opts.mk +++ b/sys/conf/kern.opts.mk @@ -1,163 +1,164 @@ # $FreeBSD$ # Options set in the build system that affect the kernel somehow. # # Define MK_* variables (which are either "yes" or "no") for users # to set via WITH_*/WITHOUT_* in /etc/src.conf and override in the # make(1) environment. # These should be tested with `== "no"' or `!= "no"' in makefiles. # The NO_* variables should only be set by makefiles for variables # that haven't been converted over. # # Note: bsd.own.mk must be included before the rest of kern.opts.mk to make # building on 10.x and earlier work. This should be removed when that's no # longer supported since it confounds the defaults (since it uses the host's # notion of defaults rather than what's default in current when building # within sys/modules). .include # These options are used by the kernel build process (kern.mk and kmod.mk) # They have to be listed here so we can build modules outside of the # src tree. __DEFAULT_YES_OPTIONS = \ AUTOFS \ BHYVE \ BLUETOOTH \ CCD \ CDDL \ CRYPT \ CUSE \ FORMAT_EXTENSIONS \ INET \ INET6 \ IPFILTER \ + IPSEC_SUPPORT \ ISCSI \ KERNEL_SYMBOLS \ NETGRAPH \ PF \ SOURCELESS_HOST \ SOURCELESS_UCODE \ USB_GADGET_EXAMPLES \ ZFS __DEFAULT_NO_OPTIONS = \ EISA \ EXTRA_TCP_STACKS \ NAND \ OFED \ RATELIMIT \ REPRODUCIBLE_BUILD # Some options are totally broken on some architectures. We disable # them. If you need to enable them on an experimental basis, you # must change this code. # Note: These only apply to the list of modules we build by default # and sometimes what is in the opt_*.h files by default. # Kernel config files are unaffected, though some targets can be # affected by KERNEL_SYMBOLS, FORMAT_EXTENSIONS, CTF and SSP. # Things that don't work based on the CPU .if ${MACHINE_CPUARCH} == "arm" . if ${MACHINE_ARCH:Marmv6*} == "" BROKEN_OPTIONS+= CDDL ZFS . endif .endif .if ${MACHINE_CPUARCH} == "mips" BROKEN_OPTIONS+= CDDL ZFS SSP .endif .if ${MACHINE_CPUARCH} == "powerpc" && ${MACHINE_ARCH} == "powerpc" BROKEN_OPTIONS+= ZFS .endif # Things that don't work because the kernel doesn't have the support # for them. .if ${MACHINE} != "i386" BROKEN_OPTIONS+= EISA .endif .if ${MACHINE} != "i386" && ${MACHINE} != "amd64" BROKEN_OPTIONS+= OFED .endif # expanded inline from bsd.mkopt.mk to avoid share/mk dependency # Those that default to yes .for var in ${__DEFAULT_YES_OPTIONS} .if !defined(MK_${var}) .if defined(WITHOUT_${var}) # WITHOUT always wins MK_${var}:= no .else MK_${var}:= yes .endif .else .if ${MK_${var}} != "yes" && ${MK_${var}} != "no" .error "Illegal value for MK_${var}: ${MK_${var}}" .endif .endif # !defined(MK_${var}) .endfor .undef __DEFAULT_YES_OPTIONS # Those that default to no .for var in ${__DEFAULT_NO_OPTIONS} .if !defined(MK_${var}) .if defined(WITH_${var}) && !defined(WITHOUT_${var}) # WITHOUT always wins MK_${var}:= yes .else MK_${var}:= no .endif .else .if ${MK_${var}} != "yes" && ${MK_${var}} != "no" .error "Illegal value for MK_${var}: ${MK_${var}}" .endif .endif # !defined(MK_${var}) .endfor .undef __DEFAULT_NO_OPTIONS # # MK_* options which are always no, usually because they are # unsupported/badly broken on this architecture. # .for var in ${BROKEN_OPTIONS} MK_${var}:= no .endfor .undef BROKEN_OPTIONS #end of bsd.mkopt.mk expanded inline. # # MK_*_SUPPORT options which default to "yes" unless their corresponding # MK_* variable is set to "no". # .for var in \ INET \ INET6 .if defined(WITHOUT_${var}_SUPPORT) || ${MK_${var}} == "no" MK_${var}_SUPPORT:= no .else .if defined(KERNBUILDDIR) # See if there's an opt_foo.h .if !defined(OPT_${var}) OPT_${var}!= cat ${KERNBUILDDIR}/opt_${var:tl}.h; echo .export OPT_${var} .endif .if ${OPT_${var}} == "" # nothing -> no MK_${var}_SUPPORT:= no .else MK_${var}_SUPPORT:= yes .endif .else # otherwise, yes MK_${var}_SUPPORT:= yes .endif .endif .endfor # Some modules only compile successfully if option FDT is set, due to #ifdef FDT # wrapped around declarations. Module makefiles can optionally compile such # things using .if !empty(OPT_FDT) .if !defined(OPT_FDT) && defined(KERNBUILDDIR) OPT_FDT!= sed -n '/FDT/p' ${KERNBUILDDIR}/opt_platform.h .export OPT_FDT .endif diff --git a/sys/conf/options b/sys/conf/options index e59f0c76a822..a857d9a216c8 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -1,1005 +1,1005 @@ # $FreeBSD$ # # On the handling of kernel options # # All kernel options should be listed in NOTES, with suitable # descriptions. Negative options (options that make some code not # compile) should be commented out; LINT (generated from NOTES) should # compile as much code as possible. Try to structure option-using # code so that a single option only switch code on, or only switch # code off, to make it possible to have a full compile-test. If # necessary, you can check for COMPILING_LINT to get maximum code # coverage. # # All new options shall also be listed in either "conf/options" or # "conf/options.". Options that affect a single source-file # .[c|s] should be directed into "opt_.h", while options # that affect multiple files should either go in "opt_global.h" if # this is a kernel-wide option (used just about everywhere), or in # "opt_.h" if it affects only some files. # Note that the effect of listing only an option without a # header-file-name in conf/options (and cousins) is that the last # convention is followed. # # This handling scheme is not yet fully implemented. # # # Format of this file: # Option name filename # # If filename is missing, the default is # opt_.h AAC_DEBUG opt_aac.h AACRAID_DEBUG opt_aacraid.h AHC_ALLOW_MEMIO opt_aic7xxx.h AHC_TMODE_ENABLE opt_aic7xxx.h AHC_DUMP_EEPROM opt_aic7xxx.h AHC_DEBUG opt_aic7xxx.h AHC_DEBUG_OPTS opt_aic7xxx.h AHC_REG_PRETTY_PRINT opt_aic7xxx.h AHD_DEBUG opt_aic79xx.h AHD_DEBUG_OPTS opt_aic79xx.h AHD_TMODE_ENABLE opt_aic79xx.h AHD_REG_PRETTY_PRINT opt_aic79xx.h ADW_ALLOW_MEMIO opt_adw.h TWA_DEBUG opt_twa.h TWA_FLASH_FIRMWARE opt_twa.h # Debugging options. ALT_BREAK_TO_DEBUGGER opt_kdb.h BREAK_TO_DEBUGGER opt_kdb.h BUF_TRACKING opt_global.h DDB DDB_BUFR_SIZE opt_ddb.h DDB_CAPTURE_DEFAULTBUFSIZE opt_ddb.h DDB_CAPTURE_MAXBUFSIZE opt_ddb.h DDB_CTF opt_ddb.h DDB_NUMSYM opt_ddb.h FULL_BUF_TRACKING opt_global.h GDB KDB opt_global.h KDB_TRACE opt_kdb.h KDB_UNATTENDED opt_kdb.h KLD_DEBUG opt_kld.h SYSCTL_DEBUG opt_sysctl.h EARLY_PRINTF opt_global.h TEXTDUMP_PREFERRED opt_ddb.h TEXTDUMP_VERBOSE opt_ddb.h NUM_CORE_FILES opt_global.h # Miscellaneous options. ADAPTIVE_LOCKMGRS ALQ ALTERA_SDCARD_FAST_SIM opt_altera_sdcard.h ATSE_CFI_HACK opt_cfi.h AUDIT opt_global.h BOOTHOWTO opt_global.h BOOTVERBOSE opt_global.h CALLOUT_PROFILING CAPABILITIES opt_capsicum.h CAPABILITY_MODE opt_capsicum.h COMPAT_43 opt_compat.h COMPAT_43TTY opt_compat.h COMPAT_FREEBSD4 opt_compat.h COMPAT_FREEBSD5 opt_compat.h COMPAT_FREEBSD6 opt_compat.h COMPAT_FREEBSD7 opt_compat.h COMPAT_FREEBSD9 opt_compat.h COMPAT_FREEBSD10 opt_compat.h COMPAT_FREEBSD11 opt_compat.h COMPAT_CLOUDABI32 opt_dontuse.h COMPAT_CLOUDABI64 opt_dontuse.h COMPAT_LINUXKPI opt_compat.h COMPILING_LINT opt_global.h CY_PCI_FASTINTR DEADLKRES opt_watchdog.h DEVICE_NUMA EXT_RESOURCES opt_global.h DIRECTIO FILEMON opt_dontuse.h FFCLOCK FULL_PREEMPTION opt_sched.h GZIO opt_gzio.h IMAGACT_BINMISC opt_dontuse.h IPI_PREEMPTION opt_sched.h GEOM_AES opt_geom.h GEOM_BDE opt_geom.h GEOM_BSD opt_geom.h GEOM_CACHE opt_geom.h GEOM_CONCAT opt_geom.h GEOM_ELI opt_geom.h GEOM_FOX opt_geom.h GEOM_GATE opt_geom.h GEOM_JOURNAL opt_geom.h GEOM_LABEL opt_geom.h GEOM_LABEL_GPT opt_geom.h GEOM_LINUX_LVM opt_geom.h GEOM_MAP opt_geom.h GEOM_MBR opt_geom.h GEOM_MIRROR opt_geom.h GEOM_MOUNTVER opt_geom.h GEOM_MULTIPATH opt_geom.h GEOM_NOP opt_geom.h GEOM_PART_APM opt_geom.h GEOM_PART_BSD opt_geom.h GEOM_PART_BSD64 opt_geom.h GEOM_PART_EBR opt_geom.h GEOM_PART_EBR_COMPAT opt_geom.h GEOM_PART_GPT opt_geom.h GEOM_PART_LDM opt_geom.h GEOM_PART_MBR opt_geom.h GEOM_PART_VTOC8 opt_geom.h GEOM_RAID opt_geom.h GEOM_RAID3 opt_geom.h GEOM_SHSEC opt_geom.h GEOM_STRIPE opt_geom.h GEOM_SUNLABEL opt_geom.h GEOM_UZIP opt_geom.h GEOM_UZIP_DEBUG opt_geom.h GEOM_VINUM opt_geom.h GEOM_VIRSTOR opt_geom.h GEOM_VOL opt_geom.h GEOM_ZERO opt_geom.h IFLIB opt_iflib.h KDTRACE_HOOKS opt_global.h KDTRACE_FRAME opt_kdtrace.h KN_HASHSIZE opt_kqueue.h KSTACK_MAX_PAGES KSTACK_PAGES KSTACK_USAGE_PROF KTRACE KTRACE_REQUEST_POOL opt_ktrace.h LIBICONV MAC opt_global.h MAC_BIBA opt_dontuse.h MAC_BSDEXTENDED opt_dontuse.h MAC_IFOFF opt_dontuse.h MAC_LOMAC opt_dontuse.h MAC_MLS opt_dontuse.h MAC_NONE opt_dontuse.h MAC_PARTITION opt_dontuse.h MAC_PORTACL opt_dontuse.h MAC_SEEOTHERUIDS opt_dontuse.h MAC_STATIC opt_mac.h MAC_STUB opt_dontuse.h MAC_TEST opt_dontuse.h MD_ROOT opt_md.h MD_ROOT_FSTYPE opt_md.h MD_ROOT_SIZE opt_md.h MFI_DEBUG opt_mfi.h MFI_DECODE_LOG opt_mfi.h MPROF_BUFFERS opt_mprof.h MPROF_HASH_SIZE opt_mprof.h NEW_PCIB opt_global.h NO_ADAPTIVE_MUTEXES opt_adaptive_mutexes.h NO_ADAPTIVE_RWLOCKS NO_ADAPTIVE_SX NO_EVENTTIMERS opt_timer.h NO_SYSCTL_DESCR opt_global.h NSWBUF_MIN opt_swap.h MBUF_PACKET_ZONE_DISABLE opt_global.h PANIC_REBOOT_WAIT_TIME opt_panic.h PCI_HP opt_pci.h PCI_IOV opt_global.h PPC_DEBUG opt_ppc.h PPC_PROBE_CHIPSET opt_ppc.h PPS_SYNC opt_ntp.h PREEMPTION opt_sched.h QUOTA SCHED_4BSD opt_sched.h SCHED_STATS opt_sched.h SCHED_ULE opt_sched.h SLEEPQUEUE_PROFILING SLHCI_DEBUG opt_slhci.h SPX_HACK STACK opt_stack.h SUIDDIR MSGMNB opt_sysvipc.h MSGMNI opt_sysvipc.h MSGSEG opt_sysvipc.h MSGSSZ opt_sysvipc.h MSGTQL opt_sysvipc.h SEMMNI opt_sysvipc.h SEMMNS opt_sysvipc.h SEMMNU opt_sysvipc.h SEMMSL opt_sysvipc.h SEMOPM opt_sysvipc.h SEMUME opt_sysvipc.h SHMALL opt_sysvipc.h SHMMAX opt_sysvipc.h SHMMAXPGS opt_sysvipc.h SHMMIN opt_sysvipc.h SHMMNI opt_sysvipc.h SHMSEG opt_sysvipc.h SYSVMSG opt_sysvipc.h SYSVSEM opt_sysvipc.h SYSVSHM opt_sysvipc.h SW_WATCHDOG opt_watchdog.h TURNSTILE_PROFILING UMTX_PROFILING UMTX_CHAINS opt_global.h VERBOSE_SYSINIT # POSIX kernel options P1003_1B_MQUEUE opt_posix.h P1003_1B_SEMAPHORES opt_posix.h _KPOSIX_PRIORITY_SCHEDULING opt_posix.h # Do we want the config file compiled into the kernel? INCLUDE_CONFIG_FILE opt_config.h # Options for static filesystems. These should only be used at config # time, since the corresponding lkms cannot work if there are any static # dependencies. Unusability is enforced by hiding the defines for the # options in a never-included header. AUTOFS opt_dontuse.h CD9660 opt_dontuse.h EXT2FS opt_dontuse.h FDESCFS opt_dontuse.h FFS opt_dontuse.h FUSE opt_dontuse.h MSDOSFS opt_dontuse.h NANDFS opt_dontuse.h NULLFS opt_dontuse.h PROCFS opt_dontuse.h PSEUDOFS opt_dontuse.h SMBFS opt_dontuse.h TMPFS opt_dontuse.h UDF opt_dontuse.h UNIONFS opt_dontuse.h ZFS opt_dontuse.h # Pseudofs debugging PSEUDOFS_TRACE opt_pseudofs.h # In-kernel GSS-API KGSSAPI opt_kgssapi.h KGSSAPI_DEBUG opt_kgssapi.h # These static filesystems have one slightly bogus static dependency in # sys/i386/i386/autoconf.c. If any of these filesystems are # statically compiled into the kernel, code for mounting them as root # filesystems will be enabled - but look below. # NFSCL - client # NFSD - server NFSCL opt_nfs.h NFSD opt_nfs.h # filesystems and libiconv bridge CD9660_ICONV opt_dontuse.h MSDOSFS_ICONV opt_dontuse.h UDF_ICONV opt_dontuse.h # If you are following the conditions in the copyright, # you can enable soft-updates which will speed up a lot of thigs # and make the system safer from crashes at the same time. # otherwise a STUB module will be compiled in. SOFTUPDATES opt_ffs.h # On small, embedded systems, it can be useful to turn off support for # snapshots. It saves about 30-40k for a feature that would be lightly # used, if it is used at all. NO_FFS_SNAPSHOT opt_ffs.h # Enabling this option turns on support for Access Control Lists in UFS, # which can be used to support high security configurations. Depends on # UFS_EXTATTR. UFS_ACL opt_ufs.h # Enabling this option turns on support for extended attributes in UFS-based # filesystems, which can be used to support high security configurations # as well as new filesystem features. UFS_EXTATTR opt_ufs.h UFS_EXTATTR_AUTOSTART opt_ufs.h # Enable fast hash lookups for large directories on UFS-based filesystems. UFS_DIRHASH opt_ufs.h # Enable gjournal-based UFS journal. UFS_GJOURNAL opt_ufs.h # The below sentence is not in English, and neither is this one. # We plan to remove the static dependences above, with a # _ROOT option to control if it usable as root. This list # allows these options to be present in config files already (though # they won't make any difference yet). NFS_ROOT opt_nfsroot.h # SMB/CIFS requester NETSMB opt_netsmb.h # Options used only in subr_param.c. HZ opt_param.h MAXFILES opt_param.h NBUF opt_param.h NSFBUFS opt_param.h VM_BCACHE_SIZE_MAX opt_param.h VM_SWZONE_SIZE_MAX opt_param.h MAXUSERS DFLDSIZ opt_param.h MAXDSIZ opt_param.h MAXSSIZ opt_param.h # Generic SCSI options. CAM_MAX_HIGHPOWER opt_cam.h CAMDEBUG opt_cam.h CAM_DEBUG_COMPILE opt_cam.h CAM_DEBUG_DELAY opt_cam.h CAM_DEBUG_BUS opt_cam.h CAM_DEBUG_TARGET opt_cam.h CAM_DEBUG_LUN opt_cam.h CAM_DEBUG_FLAGS opt_cam.h CAM_BOOT_DELAY opt_cam.h CAM_IOSCHED_DYNAMIC opt_cam.h SCSI_DELAY opt_scsi.h SCSI_NO_SENSE_STRINGS opt_scsi.h SCSI_NO_OP_STRINGS opt_scsi.h # Options used only in cam/ata/ata_da.c ADA_TEST_FAILURE opt_ada.h ATA_STATIC_ID opt_ada.h # Options used only in cam/scsi/scsi_cd.c CHANGER_MIN_BUSY_SECONDS opt_cd.h CHANGER_MAX_BUSY_SECONDS opt_cd.h # Options used only in cam/scsi/scsi_sa.c. SA_IO_TIMEOUT opt_sa.h SA_SPACE_TIMEOUT opt_sa.h SA_REWIND_TIMEOUT opt_sa.h SA_ERASE_TIMEOUT opt_sa.h SA_1FM_AT_EOD opt_sa.h # Options used only in cam/scsi/scsi_pt.c SCSI_PT_DEFAULT_TIMEOUT opt_pt.h # Options used only in cam/scsi/scsi_ses.c SES_ENABLE_PASSTHROUGH opt_ses.h # Options used in dev/sym/ (Symbios SCSI driver). SYM_SETUP_LP_PROBE_MAP opt_sym.h #-Low Priority Probe Map (bits) # Allows the ncr to take precedence # 1 (1<<0) -> 810a, 860 # 2 (1<<1) -> 825a, 875, 885, 895 # 4 (1<<2) -> 895a, 896, 1510d SYM_SETUP_SCSI_DIFF opt_sym.h #-HVD support for 825a, 875, 885 # disabled:0 (default), enabled:1 SYM_SETUP_PCI_PARITY opt_sym.h #-PCI parity checking # disabled:0, enabled:1 (default) SYM_SETUP_MAX_LUN opt_sym.h #-Number of LUNs supported # default:8, range:[1..64] # Options used only in dev/ncr/* SCSI_NCR_DEBUG opt_ncr.h SCSI_NCR_MAX_SYNC opt_ncr.h SCSI_NCR_MAX_WIDE opt_ncr.h SCSI_NCR_MYADDR opt_ncr.h # Options used only in dev/isp/* ISP_TARGET_MODE opt_isp.h ISP_FW_CRASH_DUMP opt_isp.h ISP_DEFAULT_ROLES opt_isp.h ISP_INTERNAL_TARGET opt_isp.h ISP_FCTAPE_OFF opt_isp.h # Options used only in dev/iscsi ISCSI_INITIATOR_DEBUG opt_iscsi_initiator.h # Net stuff. ACCEPT_FILTER_DATA ACCEPT_FILTER_DNS ACCEPT_FILTER_HTTP ALTQ opt_global.h ALTQ_CBQ opt_altq.h ALTQ_CDNR opt_altq.h ALTQ_CODEL opt_altq.h ALTQ_DEBUG opt_altq.h ALTQ_HFSC opt_altq.h ALTQ_FAIRQ opt_altq.h ALTQ_NOPCC opt_altq.h ALTQ_PRIQ opt_altq.h ALTQ_RED opt_altq.h ALTQ_RIO opt_altq.h BOOTP opt_bootp.h BOOTP_BLOCKSIZE opt_bootp.h BOOTP_COMPAT opt_bootp.h BOOTP_NFSROOT opt_bootp.h BOOTP_NFSV3 opt_bootp.h BOOTP_WIRED_TO opt_bootp.h DEVICE_POLLING DUMMYNET opt_ipdn.h RATELIMIT opt_ratelimit.h INET opt_inet.h INET6 opt_inet6.h IPDIVERT IPFILTER opt_ipfilter.h IPFILTER_DEFAULT_BLOCK opt_ipfilter.h IPFILTER_LOG opt_ipfilter.h IPFILTER_LOOKUP opt_ipfilter.h IPFIREWALL opt_ipfw.h IPFIREWALL_DEFAULT_TO_ACCEPT opt_ipfw.h IPFIREWALL_NAT opt_ipfw.h IPFIREWALL_NAT64 opt_ipfw.h IPFIREWALL_NAT64_DIRECT_OUTPUT opt_ipfw.h IPFIREWALL_NPTV6 opt_ipfw.h IPFIREWALL_VERBOSE opt_ipfw.h IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h IPSEC opt_ipsec.h IPSEC_DEBUG opt_ipsec.h -IPSEC_NAT_T opt_ipsec.h +IPSEC_SUPPORT opt_ipsec.h IPSTEALTH KRPC LIBALIAS LIBMBPOOL LIBMCHAIN MBUF_PROFILING MBUF_STRESS_TEST MROUTING opt_mrouting.h NFSLOCKD PCBGROUP opt_pcbgroup.h PF_DEFAULT_TO_DROP opt_pf.h RADIX_MPATH opt_mpath.h ROUTETABLES opt_route.h RSS opt_rss.h SLIP_IFF_OPTS opt_slip.h TCPDEBUG TCPPCAP opt_global.h SIFTR TCP_HHOOK opt_inet.h TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading TCP_RFC7413 opt_inet.h TCP_RFC7413_MAX_KEYS opt_inet.h -TCP_SIGNATURE opt_inet.h +TCP_SIGNATURE opt_ipsec.h VLAN_ARRAY opt_vlan.h XBONEHACK FLOWTABLE opt_route.h FLOWTABLE_HASH_ALL opt_route.h # # SCTP # SCTP opt_sctp.h SCTP_DEBUG opt_sctp.h # Enable debug printfs SCTP_WITH_NO_CSUM opt_sctp.h # Use this at your peril SCTP_LOCK_LOGGING opt_sctp.h # Log to KTR lock activity SCTP_MBUF_LOGGING opt_sctp.h # Log to KTR general mbuf aloc/free SCTP_MBCNT_LOGGING opt_sctp.h # Log to KTR mbcnt activity SCTP_PACKET_LOGGING opt_sctp.h # Log to a packet buffer last N packets SCTP_LTRACE_CHUNKS opt_sctp.h # Log to KTR chunks processed SCTP_LTRACE_ERRORS opt_sctp.h # Log to KTR error returns. SCTP_USE_PERCPU_STAT opt_sctp.h # Use per cpu stats. SCTP_MCORE_INPUT opt_sctp.h # Have multiple input threads for input mbufs SCTP_LOCAL_TRACE_BUF opt_sctp.h # Use tracebuffer exported via sysctl SCTP_DETAILED_STR_STATS opt_sctp.h # Use per PR-SCTP policy stream stats # # # # Netgraph(4). Use option NETGRAPH to enable the base netgraph code. # Each netgraph node type can be either be compiled into the kernel # or loaded dynamically. To get the former, include the corresponding # option below. Each type has its own man page, e.g. ng_async(4). NETGRAPH NETGRAPH_DEBUG opt_netgraph.h NETGRAPH_ASYNC opt_netgraph.h NETGRAPH_ATMLLC opt_netgraph.h NETGRAPH_ATM_ATMPIF opt_netgraph.h NETGRAPH_BLUETOOTH opt_netgraph.h NETGRAPH_BLUETOOTH_BT3C opt_netgraph.h NETGRAPH_BLUETOOTH_H4 opt_netgraph.h NETGRAPH_BLUETOOTH_HCI opt_netgraph.h NETGRAPH_BLUETOOTH_L2CAP opt_netgraph.h NETGRAPH_BLUETOOTH_SOCKET opt_netgraph.h NETGRAPH_BLUETOOTH_UBT opt_netgraph.h NETGRAPH_BLUETOOTH_UBTBCMFW opt_netgraph.h NETGRAPH_BPF opt_netgraph.h NETGRAPH_BRIDGE opt_netgraph.h NETGRAPH_CAR opt_netgraph.h NETGRAPH_CISCO opt_netgraph.h NETGRAPH_DEFLATE opt_netgraph.h NETGRAPH_DEVICE opt_netgraph.h NETGRAPH_ECHO opt_netgraph.h NETGRAPH_EIFACE opt_netgraph.h NETGRAPH_ETHER opt_netgraph.h NETGRAPH_ETHER_ECHO opt_netgraph.h NETGRAPH_FEC opt_netgraph.h NETGRAPH_FRAME_RELAY opt_netgraph.h NETGRAPH_GIF opt_netgraph.h NETGRAPH_GIF_DEMUX opt_netgraph.h NETGRAPH_HOLE opt_netgraph.h NETGRAPH_IFACE opt_netgraph.h NETGRAPH_IP_INPUT opt_netgraph.h NETGRAPH_IPFW opt_netgraph.h NETGRAPH_KSOCKET opt_netgraph.h NETGRAPH_L2TP opt_netgraph.h NETGRAPH_LMI opt_netgraph.h NETGRAPH_MPPC_COMPRESSION opt_netgraph.h NETGRAPH_MPPC_ENCRYPTION opt_netgraph.h NETGRAPH_NAT opt_netgraph.h NETGRAPH_NETFLOW opt_netgraph.h NETGRAPH_ONE2MANY opt_netgraph.h NETGRAPH_PATCH opt_netgraph.h NETGRAPH_PIPE opt_netgraph.h NETGRAPH_PPP opt_netgraph.h NETGRAPH_PPPOE opt_netgraph.h NETGRAPH_PPTPGRE opt_netgraph.h NETGRAPH_PRED1 opt_netgraph.h NETGRAPH_RFC1490 opt_netgraph.h NETGRAPH_SOCKET opt_netgraph.h NETGRAPH_SPLIT opt_netgraph.h NETGRAPH_SPPP opt_netgraph.h NETGRAPH_TAG opt_netgraph.h NETGRAPH_TCPMSS opt_netgraph.h NETGRAPH_TEE opt_netgraph.h NETGRAPH_TTY opt_netgraph.h NETGRAPH_UI opt_netgraph.h NETGRAPH_VJC opt_netgraph.h NETGRAPH_VLAN opt_netgraph.h # NgATM options NGATM_ATM opt_netgraph.h NGATM_ATMBASE opt_netgraph.h NGATM_SSCOP opt_netgraph.h NGATM_SSCFU opt_netgraph.h NGATM_UNI opt_netgraph.h NGATM_CCATM opt_netgraph.h # DRM options DRM_DEBUG opt_drm.h TI_SF_BUF_JUMBO opt_ti.h TI_JUMBO_HDRSPLIT opt_ti.h # XXX Conflict: # of devices vs network protocol (Native ATM). # This makes "atm.h" unusable. NATM # DPT driver debug flags DPT_MEASURE_PERFORMANCE opt_dpt.h DPT_RESET_HBA opt_dpt.h # Misc debug flags. Most of these should probably be replaced with # 'DEBUG', and then let people recompile just the interesting modules # with 'make CC="cc -DDEBUG"'. CLUSTERDEBUG opt_debug_cluster.h DEBUG_1284 opt_ppb_1284.h VP0_DEBUG opt_vpo.h LPT_DEBUG opt_lpt.h PLIP_DEBUG opt_plip.h LOCKF_DEBUG opt_debug_lockf.h SI_DEBUG opt_debug_si.h IFMEDIA_DEBUG opt_ifmedia.h # Fb options FB_DEBUG opt_fb.h FB_INSTALL_CDEV opt_fb.h # ppbus related options PERIPH_1284 opt_ppb_1284.h DONTPROBE_1284 opt_ppb_1284.h # smbus related options ENABLE_ALART opt_intpm.h # These cause changes all over the kernel BLKDEV_IOSIZE opt_global.h BURN_BRIDGES opt_global.h DEBUG opt_global.h DEBUG_LOCKS opt_global.h DEBUG_VFS_LOCKS opt_global.h DFLTPHYS opt_global.h DIAGNOSTIC opt_global.h INVARIANT_SUPPORT opt_global.h INVARIANTS opt_global.h MAXCPU opt_global.h MAXMEMDOM opt_global.h MAXPHYS opt_global.h MCLSHIFT opt_global.h MUTEX_NOINLINE opt_global.h LOCK_PROFILING opt_global.h LOCK_PROFILING_FAST opt_global.h MSIZE opt_global.h REGRESSION opt_global.h RWLOCK_NOINLINE opt_global.h SX_NOINLINE opt_global.h VFS_BIO_DEBUG opt_global.h # These are VM related options VM_KMEM_SIZE opt_vm.h VM_KMEM_SIZE_SCALE opt_vm.h VM_KMEM_SIZE_MAX opt_vm.h VM_NRESERVLEVEL opt_vm.h VM_NUMA_ALLOC opt_vm.h VM_LEVEL_0_ORDER opt_vm.h NO_SWAPPING opt_vm.h MALLOC_MAKE_FAILURES opt_vm.h MALLOC_PROFILE opt_vm.h MALLOC_DEBUG_MAXZONES opt_vm.h # The MemGuard replacement allocator used for tamper-after-free detection DEBUG_MEMGUARD opt_vm.h # The RedZone malloc(9) protection DEBUG_REDZONE opt_vm.h # Standard SMP options EARLY_AP_STARTUP opt_global.h SMP opt_global.h # Size of the kernel message buffer MSGBUF_SIZE opt_msgbuf.h # NFS options NFS_MINATTRTIMO opt_nfs.h NFS_MAXATTRTIMO opt_nfs.h NFS_MINDIRATTRTIMO opt_nfs.h NFS_MAXDIRATTRTIMO opt_nfs.h NFS_DEBUG opt_nfs.h # For the Bt848/Bt848A/Bt849/Bt878/Bt879 driver OVERRIDE_CARD opt_bktr.h OVERRIDE_TUNER opt_bktr.h OVERRIDE_DBX opt_bktr.h OVERRIDE_MSP opt_bktr.h BROOKTREE_SYSTEM_DEFAULT opt_bktr.h BROOKTREE_ALLOC_PAGES opt_bktr.h BKTR_OVERRIDE_CARD opt_bktr.h BKTR_OVERRIDE_TUNER opt_bktr.h BKTR_OVERRIDE_DBX opt_bktr.h BKTR_OVERRIDE_MSP opt_bktr.h BKTR_SYSTEM_DEFAULT opt_bktr.h BKTR_ALLOC_PAGES opt_bktr.h BKTR_USE_PLL opt_bktr.h BKTR_GPIO_ACCESS opt_bktr.h BKTR_NO_MSP_RESET opt_bktr.h BKTR_430_FX_MODE opt_bktr.h BKTR_SIS_VIA_MODE opt_bktr.h BKTR_USE_FREEBSD_SMBUS opt_bktr.h BKTR_NEW_MSP34XX_DRIVER opt_bktr.h # Options for uart(4) UART_PPS_ON_CTS opt_uart.h UART_POLL_FREQ opt_uart.h UART_DEV_TOLERANCE_PCT opt_uart.h # options for bus/device framework BUS_DEBUG opt_bus.h # options for USB support USB_DEBUG opt_usb.h USB_HOST_ALIGN opt_usb.h USB_REQ_DEBUG opt_usb.h USB_TEMPLATE opt_usb.h USB_VERBOSE opt_usb.h USB_DMA_SINGLE_ALLOC opt_usb.h USB_EHCI_BIG_ENDIAN_DESC opt_usb.h U3G_DEBUG opt_u3g.h UKBD_DFLT_KEYMAP opt_ukbd.h UPLCOM_INTR_INTERVAL opt_uplcom.h UVSCOM_DEFAULT_OPKTSIZE opt_uvscom.h UVSCOM_INTR_INTERVAL opt_uvscom.h # options for the Realtek rtwn driver RTWN_DEBUG opt_rtwn.h RTWN_WITHOUT_UCODE opt_rtwn.h # Embedded system options INIT_PATH ROOTDEVNAME FDC_DEBUG opt_fdc.h PCFCLOCK_VERBOSE opt_pcfclock.h PCFCLOCK_MAX_RETRIES opt_pcfclock.h KTR opt_global.h KTR_ALQ opt_ktr.h KTR_MASK opt_ktr.h KTR_CPUMASK opt_ktr.h KTR_COMPILE opt_global.h KTR_BOOT_ENTRIES opt_global.h KTR_ENTRIES opt_global.h KTR_VERBOSE opt_ktr.h WITNESS opt_global.h WITNESS_KDB opt_witness.h WITNESS_NO_VNODE opt_witness.h WITNESS_SKIPSPIN opt_witness.h WITNESS_COUNT opt_witness.h OPENSOLARIS_WITNESS opt_global.h # options for ACPI support ACPI_DEBUG opt_acpi.h ACPI_MAX_TASKS opt_acpi.h ACPI_MAX_THREADS opt_acpi.h ACPI_DMAR opt_acpi.h DEV_ACPI opt_acpi.h # ISA support DEV_ISA opt_isa.h ISAPNP opt_isa.h # various 'device presence' options. DEV_BPF opt_bpf.h DEV_CARP opt_carp.h DEV_MCA opt_mca.h DEV_NETMAP opt_global.h DEV_PCI opt_pci.h DEV_PF opt_pf.h DEV_PFLOG opt_pf.h DEV_PFSYNC opt_pf.h DEV_RANDOM opt_global.h DEV_SPLASH opt_splash.h DEV_VLAN opt_vlan.h # EISA support DEV_EISA opt_eisa.h EISA_SLOTS opt_eisa.h # ed driver ED_HPP opt_ed.h ED_3C503 opt_ed.h ED_SIC opt_ed.h # bce driver BCE_DEBUG opt_bce.h BCE_NVRAM_WRITE_SUPPORT opt_bce.h SOCKBUF_DEBUG opt_global.h # options for ubsec driver UBSEC_DEBUG opt_ubsec.h UBSEC_RNDTEST opt_ubsec.h UBSEC_NO_RNG opt_ubsec.h # options for hifn driver HIFN_DEBUG opt_hifn.h HIFN_RNDTEST opt_hifn.h # options for safenet driver SAFE_DEBUG opt_safe.h SAFE_NO_RNG opt_safe.h SAFE_RNDTEST opt_safe.h # syscons/vt options MAXCONS opt_syscons.h SC_ALT_MOUSE_IMAGE opt_syscons.h SC_CUT_SPACES2TABS opt_syscons.h SC_CUT_SEPCHARS opt_syscons.h SC_DEBUG_LEVEL opt_syscons.h SC_DFLT_FONT opt_syscons.h SC_DISABLE_KDBKEY opt_syscons.h SC_DISABLE_REBOOT opt_syscons.h SC_HISTORY_SIZE opt_syscons.h SC_KERNEL_CONS_ATTR opt_syscons.h SC_KERNEL_CONS_REV_ATTR opt_syscons.h SC_MOUSE_CHAR opt_syscons.h SC_NO_CUTPASTE opt_syscons.h SC_NO_FONT_LOADING opt_syscons.h SC_NO_HISTORY opt_syscons.h SC_NO_MODE_CHANGE opt_syscons.h SC_NO_SUSPEND_VTYSWITCH opt_syscons.h SC_NO_SYSMOUSE opt_syscons.h SC_NORM_ATTR opt_syscons.h SC_NORM_REV_ATTR opt_syscons.h SC_PIXEL_MODE opt_syscons.h SC_RENDER_DEBUG opt_syscons.h SC_TWOBUTTON_MOUSE opt_syscons.h VT_ALT_TO_ESC_HACK opt_syscons.h VT_FB_DEFAULT_WIDTH opt_syscons.h VT_FB_DEFAULT_HEIGHT opt_syscons.h VT_MAXWINDOWS opt_syscons.h VT_TWOBUTTON_MOUSE opt_syscons.h DEV_SC opt_syscons.h DEV_VT opt_syscons.h # teken terminal emulator options TEKEN_CONS25 opt_teken.h TEKEN_UTF8 opt_teken.h TERMINAL_KERN_ATTR opt_teken.h TERMINAL_NORM_ATTR opt_teken.h # options for printf PRINTF_BUFR_SIZE opt_printf.h # kbd options KBD_DISABLE_KEYMAP_LOAD opt_kbd.h KBD_INSTALL_CDEV opt_kbd.h KBD_MAXRETRY opt_kbd.h KBD_MAXWAIT opt_kbd.h KBD_RESETDELAY opt_kbd.h KBDIO_DEBUG opt_kbd.h KBDMUX_DFLT_KEYMAP opt_kbdmux.h # options for the Atheros driver ATH_DEBUG opt_ath.h ATH_TXBUF opt_ath.h ATH_RXBUF opt_ath.h ATH_DIAGAPI opt_ath.h ATH_TX99_DIAG opt_ath.h ATH_ENABLE_11N opt_ath.h ATH_ENABLE_DFS opt_ath.h ATH_EEPROM_FIRMWARE opt_ath.h ATH_ENABLE_RADIOTAP_VENDOR_EXT opt_ath.h ATH_DEBUG_ALQ opt_ath.h ATH_KTR_INTR_DEBUG opt_ath.h # options for the Atheros hal AH_SUPPORT_AR5416 opt_ah.h # XXX For now, this breaks non-AR9130 chipsets, so only use it # XXX when actually targeting AR9130. AH_SUPPORT_AR9130 opt_ah.h # This is required for AR933x SoC support AH_SUPPORT_AR9330 opt_ah.h AH_SUPPORT_AR9340 opt_ah.h AH_SUPPORT_QCA9530 opt_ah.h AH_SUPPORT_QCA9550 opt_ah.h AH_DEBUG opt_ah.h AH_ASSERT opt_ah.h AH_DEBUG_ALQ opt_ah.h AH_REGOPS_FUNC opt_ah.h AH_WRITE_REGDOMAIN opt_ah.h AH_DEBUG_COUNTRY opt_ah.h AH_WRITE_EEPROM opt_ah.h AH_PRIVATE_DIAG opt_ah.h AH_NEED_DESC_SWAP opt_ah.h AH_USE_INIPDGAIN opt_ah.h AH_MAXCHAN opt_ah.h AH_RXCFG_SDMAMW_4BYTES opt_ah.h AH_INTERRUPT_DEBUGGING opt_ah.h # AR5416 and later interrupt mitigation # XXX do not use this for AR9130 AH_AR5416_INTERRUPT_MITIGATION opt_ah.h # options for the Broadcom BCM43xx driver (bwi) BWI_DEBUG opt_bwi.h BWI_DEBUG_VERBOSE opt_bwi.h # options for the Brodacom BCM43xx driver (bwn) BWN_DEBUG opt_bwn.h BWN_GPL_PHY opt_bwn.h # Options for the SIBA driver SIBA_DEBUG opt_siba.h # options for the Marvell 8335 wireless driver MALO_DEBUG opt_malo.h MALO_TXBUF opt_malo.h MALO_RXBUF opt_malo.h # options for the Marvell wireless driver MWL_DEBUG opt_mwl.h MWL_TXBUF opt_mwl.h MWL_RXBUF opt_mwl.h MWL_DIAGAPI opt_mwl.h MWL_AGGR_SIZE opt_mwl.h MWL_TX_NODROP opt_mwl.h # Options for the Intel 802.11ac wireless driver IWM_DEBUG opt_iwm.h # Options for the Intel 802.11n wireless driver IWN_DEBUG opt_iwn.h # Options for the Intel 3945ABG wireless driver WPI_DEBUG opt_wpi.h # dcons options DCONS_BUF_SIZE opt_dcons.h DCONS_POLL_HZ opt_dcons.h DCONS_FORCE_CONSOLE opt_dcons.h DCONS_FORCE_GDB opt_dcons.h # HWPMC options HWPMC_DEBUG opt_global.h HWPMC_HOOKS HWPMC_MIPS_BACKTRACE opt_hwpmc_hooks.h # XBOX options for FreeBSD/i386, but some files are MI XBOX opt_xbox.h # Interrupt filtering INTR_FILTER # 802.11 support layer IEEE80211_DEBUG opt_wlan.h IEEE80211_DEBUG_REFCNT opt_wlan.h IEEE80211_AMPDU_AGE opt_wlan.h IEEE80211_SUPPORT_MESH opt_wlan.h IEEE80211_SUPPORT_SUPERG opt_wlan.h IEEE80211_SUPPORT_TDMA opt_wlan.h IEEE80211_ALQ opt_wlan.h IEEE80211_DFS_DEBUG opt_wlan.h # 802.11 TDMA support TDMA_SLOTLEN_DEFAULT opt_tdma.h TDMA_SLOTCNT_DEFAULT opt_tdma.h TDMA_BINTVAL_DEFAULT opt_tdma.h TDMA_TXRATE_11B_DEFAULT opt_tdma.h TDMA_TXRATE_11G_DEFAULT opt_tdma.h TDMA_TXRATE_11A_DEFAULT opt_tdma.h TDMA_TXRATE_TURBO_DEFAULT opt_tdma.h TDMA_TXRATE_HALF_DEFAULT opt_tdma.h TDMA_TXRATE_QUARTER_DEFAULT opt_tdma.h TDMA_TXRATE_11NA_DEFAULT opt_tdma.h TDMA_TXRATE_11NG_DEFAULT opt_tdma.h # VideoMode PICKMODE_DEBUG opt_videomode.h # Network stack virtualization options VIMAGE opt_global.h VNET_DEBUG opt_global.h # Common Flash Interface (CFI) options CFI_SUPPORT_STRATAFLASH opt_cfi.h CFI_ARMEDANDDANGEROUS opt_cfi.h CFI_HARDWAREBYTESWAP opt_cfi.h # Sound options SND_DEBUG opt_snd.h SND_DIAGNOSTIC opt_snd.h SND_FEEDER_MULTIFORMAT opt_snd.h SND_FEEDER_FULL_MULTIFORMAT opt_snd.h SND_FEEDER_RATE_HP opt_snd.h SND_PCM_64 opt_snd.h SND_OLDSTEREO opt_snd.h X86BIOS # Flattened device tree options FDT opt_platform.h FDT_DTB_STATIC opt_platform.h # OFED Infiniband stack OFED opt_ofed.h OFED_DEBUG_INIT opt_ofed.h SDP opt_ofed.h SDP_DEBUG opt_ofed.h IPOIB opt_ofed.h IPOIB_DEBUG opt_ofed.h IPOIB_CM opt_ofed.h # Resource Accounting RACCT opt_global.h RACCT_DEFAULT_TO_DISABLED opt_global.h # Resource Limits RCTL opt_global.h # Random number generator(s) # Which CSPRNG hash we get. # If Yarrow is not chosen, Fortuna is selected. RANDOM_YARROW opt_global.h # With this, no entropy processor is loaded, but the entropy # harvesting infrastructure is present. This means an entropy # processor may be loaded as a module. RANDOM_LOADABLE opt_global.h # This turns on high-rate and potentially expensive harvesting in # the uma slab allocator. RANDOM_ENABLE_UMA opt_global.h # BHND(4) driver BHND_LOGLEVEL opt_global.h # GPIO and child devices GPIO_SPI_DEBUG opt_gpio.h # etherswitch(4) driver RTL8366_SOFT_RESET opt_etherswitch.h # evdev protocol support EVDEV_SUPPORT opt_evdev.h EVDEV_DEBUG opt_evdev.h UINPUT_DEBUG opt_evdev.h # Hyper-V network driver HN_DEBUG opt_hn.h # Encrypted kernel crash dumps EKCD opt_ekcd.h diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 99604d1418fb..20f23387648f 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -1,807 +1,813 @@ # $FreeBSD$ SYSDIR?=${.CURDIR}/.. .include "${SYSDIR}/conf/kern.opts.mk" SUBDIR_PARALLEL= # Modules that include binary-only blobs of microcode should be selectable by # MK_SOURCELESS_UCODE option (see below). .if defined(MODULES_OVERRIDE) && !defined(ALL_MODULES) SUBDIR=${MODULES_OVERRIDE} .else SUBDIR= \ ${_3dfx} \ ${_3dfx_linux} \ ${_aac} \ ${_aacraid} \ accf_data \ accf_dns \ accf_http \ acl_nfs4 \ acl_posix1e \ ${_acpi} \ ae \ ${_aesni} \ age \ ${_agp} \ aha \ ${_ahb} \ ahci \ ${_aic} \ aic7xxx \ alc \ ale \ alq \ ${_amdsbwd} \ ${_amdtemp} \ amr \ ${_an} \ ${_aout} \ ${_apm} \ ${_arcmsr} \ ${_arcnet} \ ${_armv8crypto} \ ${_asmc} \ ata \ ath \ ath_pci \ ${_autofs} \ ${_auxio} \ ${_bce} \ bfe \ bge \ bhnd \ ${_bxe} \ ${_bios} \ ${_bktr} \ ${_bm} \ bnxt \ bridgestp \ bwi \ bwn \ bwn_pci \ ${_bytgpio} \ cam \ ${_cardbus} \ ${_carp} \ cas \ ${_cbb} \ cc \ cd9660 \ cd9660_iconv \ ${_ce} \ ${_cfi} \ ${_chromebook_platform} \ ${_ciss} \ cloudabi \ ${_cloudabi32} \ ${_cloudabi64} \ ${_cm} \ ${_cmx} \ ${_coff} \ ${_coretemp} \ ${_cp} \ ${_cpsw} \ ${_cpuctl} \ ${_cpufreq} \ ${_crypto} \ ${_cryptodev} \ ${_cs} \ ${_ctau} \ ctl \ ${_cxgb} \ ${_cxgbe} \ dc \ dcons \ dcons_crom \ de \ ${_dpms} \ ${_dpt} \ ${_drm} \ ${_drm2} \ dummynet \ ${_ed} \ ${_efirt} \ ${_elink} \ ${_em} \ en \ ${_ep} \ ${_epic} \ esp \ ${_et} \ evdev \ ${_ex} \ ${_exca} \ ext2fs \ ${_fatm} \ fdc \ fdescfs \ ${_fe} \ filemon \ firewire \ firmware \ fuse \ ${_fxp} \ gem \ geom \ ${_glxiic} \ ${_glxsb} \ gpio \ hatm \ hifn \ hme \ ${_hpt27xx} \ ${_hptiop} \ ${_hptmv} \ ${_hptnr} \ ${_hptrr} \ hwpmc \ ${_hwpmc_mips24k} \ ${_hwpmc_mips74k} \ ${_hyperv} \ i2c \ ${_ibcore} \ ${_ibcs2} \ ${_ichwd} \ ${_ida} \ if_bridge \ if_disc \ if_edsc \ ${_if_enc} \ if_epair \ ${_if_gif} \ ${_if_gre} \ ${_if_me} \ if_lagg \ ${_if_ndis} \ ${_if_stf} \ if_tap \ if_tun \ if_vlan \ if_vxlan \ ${_iir} \ imgact_binmisc \ ${_intelspi} \ ${_io} \ ${_ioat} \ ${_ipoib} \ ${_ipdivert} \ ${_ipfilter} \ ${_ipfw} \ ipfw_nat \ ${_ipfw_nat64} \ ${_ipfw_nptv6} \ ${_ipmi} \ ip6_mroute_mod \ ip_mroute_mod \ ${_ips} \ + ${_ipsec} \ ${_ipw} \ ${_ipwfw} \ ${_isci} \ ${_iser} \ isp \ ${_ispfw} \ ${_iwi} \ ${_iwifw} \ ${_iwm} \ ${_iwmfw} \ ${_iwn} \ ${_iwnfw} \ ${_ix} \ ${_ixv} \ ${_ixgb} \ ${_ixl} \ ${_ixlv} \ jme \ joy \ kbdmux \ kgssapi \ kgssapi_krb5 \ khelp \ krpc \ ksyms \ le \ lge \ libalias \ libiconv \ libmbpool \ libmchain \ ${_linprocfs} \ ${_linsysfs} \ ${_linux} \ ${_linux_common} \ ${_linux64} \ linuxkpi \ lmc \ lpt \ mac_biba \ mac_bsdextended \ mac_ifoff \ mac_lomac \ mac_mls \ mac_none \ mac_partition \ mac_portacl \ mac_seeotheruids \ mac_stub \ mac_test \ malo \ md \ mdio \ mem \ mfi \ mii \ mlx \ ${_mlx4} \ ${_mlx4ib} \ ${_mlx4en} \ ${_mlx5} \ ${_mlx5en} \ ${_mly} \ mmc \ mmcsd \ mpr \ mps \ mpt \ mqueue \ mrsas \ msdosfs \ msdosfs_iconv \ ${_mse} \ msk \ ${_mthca} \ mvs \ mwl \ ${_mwlfw} \ mxge \ my \ ${_nandfs} \ ${_nandsim} \ ${_ncr} \ ${_nctgpio} \ ${_ncv} \ ${_ndis} \ ${_netgraph} \ ${_nfe} \ nfscl \ nfscommon \ nfsd \ nfslock \ nfslockd \ nfssvc \ nge \ nmdm \ ${_nsp} \ nullfs \ ${_ntb} \ ${_nvd} \ ${_nvme} \ ${_nvram} \ ${_nxge} \ oce \ otus \ ${_otusfw} \ ow \ ${_padlock} \ ${_padlock_rng} \ patm \ ${_pccard} \ ${_pcfclock} \ pcn \ ${_pf} \ ${_pflog} \ ${_pfsync} \ plip \ ${_pms} \ ppbus \ ppc \ ppi \ pps \ procfs \ proto \ pseudofs \ ${_pst} \ pty \ puc \ ${_qlxge} \ ${_qlxgb} \ ${_qlxgbe} \ ral \ ${_ralfw} \ ${_random_fortuna} \ ${_random_yarrow} \ ${_random_other} \ rc4 \ ${_rdma} \ ${_rdrand_rng} \ re \ rl \ rtwn \ rtwn_pci \ rtwn_usb \ ${_rtwnfw} \ ${_s3} \ ${_safe} \ ${_sbni} \ scc \ ${_scsi_low} \ sdhci \ ${_sdhci_acpi} \ sdhci_pci \ sem \ send \ ${_sf} \ ${_sfxge} \ sge \ siba_bwn \ siftr \ siis \ sis \ sk \ smbfs \ sn \ snp \ sound \ ${_speaker} \ ${_splash} \ ${_sppp} \ ste \ ${_stg} \ stge \ ${_streams} \ ${_svr4} \ ${_sym} \ ${_syscons} \ sysvipc \ ${_ti} \ ${_tcp_fastpath} \ + ${_tcpmd5} \ tests/framework \ tests/callout_test \ tl \ tmpfs \ ${_toecore} \ ${_tpm} \ trm \ ${_twa} \ twe \ tws \ tx \ ${_txp} \ uart \ ubsec \ udf \ udf_iconv \ ufs \ uinput \ unionfs \ usb \ utopia \ ${_vesa} \ ${_virtio} \ vge \ ${_viawd} \ videomode \ vkbd \ ${_vmm} \ ${_vmware} \ ${_vpo} \ vr \ vte \ vx \ ${_vxge} \ wb \ ${_wbwd} \ ${_wi} \ wlan \ wlan_acl \ wlan_amrr \ wlan_ccmp \ wlan_rssadapt \ wlan_tkip \ wlan_wep \ wlan_xauth \ ${_wpi} \ ${_wpifw} \ ${_x86bios} \ ${_xe} \ xl \ zlib .if ${MK_AUTOFS} != "no" || defined(ALL_MODULES) _autofs= autofs .endif .if ${MK_CDDL} != "no" || defined(ALL_MODULES) .if (${MACHINE_CPUARCH} != "arm" || ${MACHINE_ARCH:Marmv6*} != "") && \ ${MACHINE_CPUARCH} != "mips" && \ ${MACHINE_CPUARCH} != "sparc64" SUBDIR+= dtrace .endif SUBDIR+= opensolaris .endif .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) .if exists(${.CURDIR}/../opencrypto) _crypto= crypto _cryptodev= cryptodev _random_fortuna=random_fortuna _random_yarrow= random_yarrow _random_other= random_other .endif .endif .if ${MK_CUSE} != "no" || defined(ALL_MODULES) SUBDIR+= cuse .endif .if ${MK_EXTRA_TCP_STACKS} != "no" || defined(ALL_MODULES) _tcp_fastpath= tcp/fastpath .endif .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _carp= carp _toecore= toecore _if_enc= if_enc _if_gif= if_gif _if_gre= if_gre +.if ${MK_IPSEC_SUPPORT} != "no" +_ipsec= ipsec +_tcpmd5= tcp/tcpmd5 +.endif .endif .if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _if_stf= if_stf .endif .if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES) _if_me= if_me _ipdivert= ipdivert _ipfw= ipfw .if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES) _ipfw_nat64= ipfw_nat64 .endif .endif .if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES) _ipfw_nptv6= ipfw_nptv6 .endif .if ${MK_IPFILTER} != "no" || defined(ALL_MODULES) _ipfilter= ipfilter .endif .if ${MK_ISCSI} != "no" || defined(ALL_MODULES) SUBDIR+= iscsi SUBDIR+= iscsi_initiator .endif .if ${MK_NAND} != "no" || defined(ALL_MODULES) _nandfs= nandfs _nandsim= nandsim .endif .if ${MK_NETGRAPH} != "no" || defined(ALL_MODULES) _netgraph= netgraph .endif .if (${MK_PF} != "no" && (${MK_INET_SUPPORT} != "no" || \ ${MK_INET6_SUPPORT} != "no")) || defined(ALL_MODULES) _pf= pf _pflog= pflog .if ${MK_INET_SUPPORT} != "no" _pfsync= pfsync .endif .endif .if ${MK_SOURCELESS_UCODE} != "no" _bce= bce _fatm= fatm _fxp= fxp _ispfw= ispfw _sf= sf _ti= ti _txp= txp .if ${MACHINE_CPUARCH} != "mips" _mwlfw= mwlfw _otusfw= otusfw _ralfw= ralfw _rtwnfw= rtwnfw .endif .endif .if ${MK_SOURCELESS_UCODE} != "no" && ${MACHINE_CPUARCH} != "arm" && \ ${MACHINE_CPUARCH} != "mips" && \ ${MACHINE_ARCH} != "powerpc" && ${MACHINE_ARCH} != "powerpcspe" && \ ${MACHINE_CPUARCH} != "riscv" _cxgbe= cxgbe .endif .if ${MK_ZFS} != "no" || defined(ALL_MODULES) SUBDIR+= zfs .endif .if (${MACHINE_CPUARCH} == "mips" && ${MACHINE_ARCH:Mmips64} == "") _hwpmc_mips24k= hwpmc_mips24k _hwpmc_mips74k= hwpmc_mips74k .endif .if ${MACHINE_CPUARCH} != "aarch64" && ${MACHINE_CPUARCH} != "arm" && \ ${MACHINE_CPUARCH} != "mips" && ${MACHINE_CPUARCH} != "powerpc" && \ ${MACHINE_CPUARCH} != "riscv" _syscons= syscons _vpo= vpo .endif .if ${MACHINE_CPUARCH} != "mips" # no BUS_SPACE_UNSPECIFIED # No barrier instruction support (specific to this driver) _sym= sym # intr_disable() is a macro, causes problems .if ${MK_SOURCELESS_UCODE} != "no" _cxgb= cxgb .endif .endif .if ${MACHINE_CPUARCH} == "aarch64" _armv8crypto= armv8crypto _em= em .endif .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" _agp= agp _an= an _aout= aout _bios= bios _bktr= bktr _bxe= bxe _cardbus= cardbus _cbb= cbb _cpuctl= cpuctl _cpufreq= cpufreq _cs= cs _dpms= dpms _drm= drm _drm2= drm2 _ed= ed _em= em _ep= ep _et= et _exca= exca _fe= fe .if ${MK_OFED} != "no" || defined(ALL_MODULES) _ibcore= ibcore .endif _if_ndis= if_ndis _io= io .if ${MK_OFED} != "no" || defined(ALL_MODULES) _ipoib= ipoib _iser= iser .endif _ix= ix _ixv= ixv _linprocfs= linprocfs _linsysfs= linsysfs _linux= linux _nctgpio= nctgpio _ndis= ndis _pccard= pccard .if ${MK_OFED} != "no" || defined(ALL_MODULES) _rdma= rdma .endif _safe= safe _scsi_low= scsi_low _speaker= speaker _splash= splash _sppp= sppp _vmware= vmware _vxge= vxge _wbwd= wbwd _wi= wi _xe= xe _aac= aac _aacraid= aacraid _acpi= acpi .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) _aesni= aesni .endif _amdsbwd= amdsbwd _amdtemp= amdtemp _arcmsr= arcmsr _asmc= asmc _bytgpio= bytgpio _ciss= ciss _chromebook_platform= chromebook_platform _cmx= cmx _coretemp= coretemp .if ${MK_SOURCELESS_HOST} != "no" _hpt27xx= hpt27xx .endif _hptiop= hptiop .if ${MK_SOURCELESS_HOST} != "no" _hptmv= hptmv _hptnr= hptnr _hptrr= hptrr .endif _hyperv= hyperv _ichwd= ichwd _ida= ida _iir= iir _intelspi= intelspi _ipmi= ipmi _ips= ips _isci= isci _ipw= ipw _iwi= iwi _iwm= iwm _iwn= iwn _ixgb= ixgb .if ${MK_SOURCELESS_UCODE} != "no" _ipwfw= ipwfw _iwifw= iwifw _iwmfw= iwmfw _iwnfw= iwnfw .endif _mlx4= mlx4 _mlx5= mlx5 .if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _mlx4en= mlx4en _mlx5en= mlx5en .endif .if ${MK_OFED} != "no" || defined(ALL_MODULES) _mlx4ib= mlx4ib .endif _mly= mly .if ${MK_OFED} != "no" || defined(ALL_MODULES) _mthca= mthca .endif _nfe= nfe _nvd= nvd _nvme= nvme _nvram= nvram _nxge= nxge .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) _padlock= padlock _padlock_rng= padlock_rng _rdrand_rng= rdrand_rng .endif _s3= s3 _sdhci_acpi= sdhci_acpi _tpm= tpm _twa= twa _vesa= vesa _viawd= viawd _virtio= virtio _wpi= wpi .if ${MK_SOURCELESS_UCODE} != "no" _wpifw= wpifw .endif _x86bios= x86bios .endif .if ${MACHINE_CPUARCH} == "amd64" _efirt= efirt _ioat= ioat _ixl= ixl _ixlv= ixlv _linux64= linux64 _linux_common= linux_common _ntb= ntb _pms= pms _qlxge= qlxge _qlxgb= qlxgb _qlxgbe= qlxgbe _sfxge= sfxge .if ${MK_BHYVE} != "no" || defined(ALL_MODULES) _vmm= vmm .endif .endif .if ${MACHINE_CPUARCH} == "i386" # XXX some of these can move to the general case when de-i386'ed # XXX some of these can move now, but are untested on other architectures. _3dfx= 3dfx _3dfx_linux= 3dfx_linux _aic= aic _apm= apm _arcnet= arcnet .if ${MK_SOURCELESS_UCODE} != "no" _ce= ce .endif _coff= coff .if ${MK_SOURCELESS_UCODE} != "no" _cp= cp .endif _elink= elink _glxiic= glxiic _glxsb= glxsb #_ibcs2= ibcs2 _mse= mse _ncr= ncr _ncv= ncv _nsp= nsp _pcfclock= pcfclock _pst= pst _sbni= sbni _streams= streams _stg= stg _svr4= svr4 .if ${MK_EISA} != "no" _ahb= ahb .endif _cm= cm .if ${MK_SOURCELESS_UCODE} != "no" _ctau= ctau .endif _dpt= dpt _ex= ex .endif .if ${MACHINE_CPUARCH} == "arm" _cfi= cfi _cpsw= cpsw .endif .if ${MACHINE_CPUARCH} == "powerpc" _agp= agp _an= an _bm= bm _cardbus= cardbus _cbb= cbb _cfi= cfi _cpufreq= cpufreq _drm= drm _exca= exca _pccard= pccard _wi= wi .endif .if ${MACHINE_ARCH} == "powerpc64" _drm2= drm2 .endif .if ${MACHINE_ARCH} == "powerpc64" || ${MACHINE_ARCH} == "powerpc" # Don't build powermac_nvram for powerpcspe, it's never supported. _nvram= powermac_nvram .endif .if ${MACHINE_CPUARCH} == "sparc64" _auxio= auxio _em= em _epic= epic .endif .if (${MACHINE_CPUARCH} == "amd64" || ${MACHINE_ARCH} == "armv6" || \ ${MACHINE_CPUARCH} == "i386") _cloudabi32= cloudabi32 .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" _cloudabi64= cloudabi64 .endif .endif SUBDIR+=${MODULES_EXTRA} .for reject in ${WITHOUT_MODULES} SUBDIR:= ${SUBDIR:N${reject}} .endfor # Calling kldxref(8) for each module is expensive. .if !defined(NO_XREF) .MAKEFLAGS+= -DNO_XREF afterinstall: .PHONY @if type kldxref >/dev/null 2>&1; then \ ${ECHO} kldxref ${DESTDIR}${KMODDIR}; \ kldxref ${DESTDIR}${KMODDIR}; \ fi .endif .include "${SYSDIR}/conf/config.mk" SUBDIR:= ${SUBDIR:u:O} .include diff --git a/sys/modules/ipsec/Makefile b/sys/modules/ipsec/Makefile new file mode 100644 index 000000000000..ed061f317768 --- /dev/null +++ b/sys/modules/ipsec/Makefile @@ -0,0 +1,14 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../net ${.CURDIR}/../../netipsec + +KMOD= ipsec +SRCS= if_ipsec.c ipsec.c ipsec_input.c ipsec_mbuf.c ipsec_mod.c \ + ipsec_output.c xform_ah.c xform_esp.c xform_ipcomp.c \ + opt_inet.h opt_inet6.h opt_ipsec.h opt_sctp.h +SRCS.INET= udpencap.c + +opt_ipsec.h: + @echo "#define IPSEC_SUPPORT 1" > ${.TARGET} + +.include diff --git a/sys/modules/tcp/tcpmd5/Makefile b/sys/modules/tcp/tcpmd5/Makefile new file mode 100644 index 000000000000..1337f1d5fd2d --- /dev/null +++ b/sys/modules/tcp/tcpmd5/Makefile @@ -0,0 +1,11 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../netipsec + +KMOD= tcpmd5 +SRCS= xform_tcp.c opt_inet.h opt_inet6.h opt_ipsec.h + +opt_ipsec.h: + @echo "#define IPSEC_SUPPORT 1" > ${.TARGET} + +.include diff --git a/sys/net/if_ipsec.c b/sys/net/if_ipsec.c new file mode 100644 index 000000000000..aa5b75302f91 --- /dev/null +++ b/sys/net/if_ipsec.c @@ -0,0 +1,1000 @@ +/*- + * Copyright (c) 2016 Yandex LLC + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#ifdef INET6 +#include +#endif + +#include +#include + +#include + +static MALLOC_DEFINE(M_IPSEC, "ipsec", "IPsec Virtual Tunnel Interface"); +static const char ipsecname[] = "ipsec"; + +#if defined(INET) && defined(INET6) +#define IPSEC_SPCOUNT 4 +#else +#define IPSEC_SPCOUNT 2 +#endif + +struct ipsec_softc { + struct ifnet *ifp; + + struct rmlock lock; + struct secpolicy *sp[IPSEC_SPCOUNT]; + + uint32_t reqid; + u_int family; + u_int fibnum; + LIST_ENTRY(ipsec_softc) chain; + LIST_ENTRY(ipsec_softc) hash; +}; + +#define IPSEC_LOCK_INIT(sc) rm_init(&(sc)->lock, "if_ipsec softc") +#define IPSEC_LOCK_DESTROY(sc) rm_destroy(&(sc)->lock) +#define IPSEC_RLOCK_TRACKER struct rm_priotracker ipsec_tracker +#define IPSEC_RLOCK(sc) rm_rlock(&(sc)->lock, &ipsec_tracker) +#define IPSEC_RUNLOCK(sc) rm_runlock(&(sc)->lock, &ipsec_tracker) +#define IPSEC_RLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_RLOCKED) +#define IPSEC_WLOCK(sc) rm_wlock(&(sc)->lock) +#define IPSEC_WUNLOCK(sc) rm_wunlock(&(sc)->lock) +#define IPSEC_WLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_WLOCKED) + +static struct rmlock ipsec_sc_lock; +RM_SYSINIT(ipsec_sc_lock, &ipsec_sc_lock, "if_ipsec softc list"); + +#define IPSEC_SC_RLOCK_TRACKER struct rm_priotracker ipsec_sc_tracker +#define IPSEC_SC_RLOCK() rm_rlock(&ipsec_sc_lock, &ipsec_sc_tracker) +#define IPSEC_SC_RUNLOCK() rm_runlock(&ipsec_sc_lock, &ipsec_sc_tracker) +#define IPSEC_SC_RLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_RLOCKED) +#define IPSEC_SC_WLOCK() rm_wlock(&ipsec_sc_lock) +#define IPSEC_SC_WUNLOCK() rm_wunlock(&ipsec_sc_lock) +#define IPSEC_SC_WLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_WLOCKED) + +LIST_HEAD(ipsec_iflist, ipsec_softc); +static VNET_DEFINE(struct ipsec_iflist, ipsec_sc_list); +static VNET_DEFINE(struct ipsec_iflist *, ipsec_sc_htbl); +static VNET_DEFINE(u_long, ipsec_sc_hmask); +#define V_ipsec_sc_list VNET(ipsec_sc_list) +#define V_ipsec_sc_htbl VNET(ipsec_sc_htbl) +#define V_ipsec_sc_hmask VNET(ipsec_sc_hmask) + +static uint32_t +ipsec_hash(uint32_t id) +{ + + return (fnv_32_buf(&id, sizeof(id), FNV1_32_INIT)); +} + +#define SCHASH_NHASH_LOG2 5 +#define SCHASH_NHASH (1 << SCHASH_NHASH_LOG2) +#define SCHASH_HASHVAL(id) (ipsec_hash((id)) & V_ipsec_sc_hmask) +#define SCHASH_HASH(id) &V_ipsec_sc_htbl[SCHASH_HASHVAL(id)] + +/* + * ipsec_ioctl_sx protects from concurrent ioctls. + */ +static struct sx ipsec_ioctl_sx; +SX_SYSINIT(ipsec_ioctl_sx, &ipsec_ioctl_sx, "ipsec_ioctl"); + +static int ipsec_init_reqid(struct ipsec_softc *); +static int ipsec_set_tunnel(struct ipsec_softc *, struct sockaddr *, + struct sockaddr *, uint32_t); +static void ipsec_delete_tunnel(struct ifnet *, int); + +static int ipsec_set_addresses(struct ifnet *, struct sockaddr *, + struct sockaddr *); +static int ipsec_set_reqid(struct ifnet *, uint32_t); + +static int ipsec_ioctl(struct ifnet *, u_long, caddr_t); +static int ipsec_transmit(struct ifnet *, struct mbuf *); +static int ipsec_output(struct ifnet *, struct mbuf *, + const struct sockaddr *, struct route *); +static void ipsec_qflush(struct ifnet *); +static int ipsec_clone_create(struct if_clone *, int, caddr_t); +static void ipsec_clone_destroy(struct ifnet *); + +static VNET_DEFINE(struct if_clone *, ipsec_cloner); +#define V_ipsec_cloner VNET(ipsec_cloner) + +static int +ipsec_clone_create(struct if_clone *ifc, int unit, caddr_t params) +{ + struct ipsec_softc *sc; + struct ifnet *ifp; + + sc = malloc(sizeof(*sc), M_IPSEC, M_WAITOK | M_ZERO); + sc->fibnum = curthread->td_proc->p_fibnum; + sc->ifp = ifp = if_alloc(IFT_TUNNEL); + IPSEC_LOCK_INIT(sc); + ifp->if_softc = sc; + if_initname(ifp, ipsecname, unit); + + ifp->if_addrlen = 0; + ifp->if_mtu = IPSEC_MTU; + ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; + ifp->if_ioctl = ipsec_ioctl; + ifp->if_transmit = ipsec_transmit; + ifp->if_qflush = ipsec_qflush; + ifp->if_output = ipsec_output; + if_attach(ifp); + bpfattach(ifp, DLT_NULL, sizeof(uint32_t)); + + IPSEC_SC_WLOCK(); + LIST_INSERT_HEAD(&V_ipsec_sc_list, sc, chain); + IPSEC_SC_WUNLOCK(); + return (0); +} + +static void +ipsec_clone_destroy(struct ifnet *ifp) +{ + struct ipsec_softc *sc; + + sx_xlock(&ipsec_ioctl_sx); + sc = ifp->if_softc; + + IPSEC_SC_WLOCK(); + ipsec_delete_tunnel(ifp, 1); + LIST_REMOVE(sc, chain); + IPSEC_SC_WUNLOCK(); + + bpfdetach(ifp); + if_detach(ifp); + ifp->if_softc = NULL; + sx_xunlock(&ipsec_ioctl_sx); + + if_free(ifp); + IPSEC_LOCK_DESTROY(sc); + free(sc, M_IPSEC); +} + +static void +vnet_ipsec_init(const void *unused __unused) +{ + + LIST_INIT(&V_ipsec_sc_list); + V_ipsec_sc_htbl = hashinit(SCHASH_NHASH, M_IPSEC, &V_ipsec_sc_hmask); + V_ipsec_cloner = if_clone_simple(ipsecname, ipsec_clone_create, + ipsec_clone_destroy, 0); +} +VNET_SYSINIT(vnet_ipsec_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_ipsec_init, NULL); + +static void +vnet_ipsec_uninit(const void *unused __unused) +{ + + if_clone_detach(V_ipsec_cloner); + hashdestroy(V_ipsec_sc_htbl, M_IPSEC, V_ipsec_sc_hmask); +} +VNET_SYSUNINIT(vnet_ipsec_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_ipsec_uninit, NULL); + +static struct secpolicy * +ipsec_getpolicy(struct ipsec_softc *sc, int dir, sa_family_t af) +{ + + switch (af) { +#ifdef INET + case AF_INET: + return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1)]); +#endif +#ifdef INET6 + case AF_INET6: + return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1) +#ifdef INET + + 2 +#endif + ]); +#endif + } + return (NULL); +} + +static struct secasindex * +ipsec_getsaidx(struct ipsec_softc *sc, int dir, sa_family_t af) +{ + struct secpolicy *sp; + + sp = ipsec_getpolicy(sc, dir, af); + if (sp == NULL) + return (NULL); + return (&sp->req[0]->saidx); +} + +static int +ipsec_transmit(struct ifnet *ifp, struct mbuf *m) +{ + IPSEC_RLOCK_TRACKER; + struct ipsec_softc *sc; + struct secpolicy *sp; + struct ip *ip; + uint32_t af; + int error; + +#ifdef MAC + error = mac_ifnet_check_transmit(ifp, m); + if (error) { + m_freem(m); + goto err; + } +#endif + error = ENETDOWN; + sc = ifp->if_softc; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + (ifp->if_flags & IFF_MONITOR) != 0 || + (ifp->if_flags & IFF_UP) == 0) { + m_freem(m); + goto err; + } + + /* Determine address family to correctly handle packet in BPF */ + ip = mtod(m, struct ip *); + switch (ip->ip_v) { +#ifdef INET + case IPVERSION: + af = AF_INET; + break; +#endif +#ifdef INET6 + case (IPV6_VERSION >> 4): + af = AF_INET6; + break; +#endif + default: + error = EAFNOSUPPORT; + m_freem(m); + goto err; + } + + /* + * Loop prevention. + * XXX: for now just check presence of IPSEC_OUT_DONE mbuf tag. + * We can read full chain and compare destination address, + * proto and mode from xform_history with values from softc. + */ + if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { + m_freem(m); + goto err; + } + + IPSEC_RLOCK(sc); + if (sc->family == 0) { + IPSEC_RUNLOCK(sc); + m_freem(m); + goto err; + } + sp = ipsec_getpolicy(sc, IPSEC_DIR_OUTBOUND, af); + key_addref(sp); + M_SETFIB(m, sc->fibnum); + IPSEC_RUNLOCK(sc); + + BPF_MTAP2(ifp, &af, sizeof(af), m); + if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); + + switch (af) { +#ifdef INET + case AF_INET: + error = ipsec4_process_packet(m, sp, NULL); + break; +#endif +#ifdef INET6 + case AF_INET6: + error = ipsec6_process_packet(m, sp, NULL); + break; +#endif + default: + panic("%s: unknown address family\n", __func__); + } +err: + if (error != 0) + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return (error); +} + +static void +ipsec_qflush(struct ifnet *ifp __unused) +{ + +} + +static int +ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, + struct route *ro) +{ + + return (ifp->if_transmit(ifp, m)); +} + +int +ipsec_if_input(struct mbuf *m, struct secasvar *sav, uint32_t af) +{ + IPSEC_SC_RLOCK_TRACKER; + struct secasindex *saidx; + struct ipsec_softc *sc; + struct ifnet *ifp; + + if (sav->state != SADB_SASTATE_MATURE && + sav->state != SADB_SASTATE_DYING) { + m_freem(m); + return (ENETDOWN); + } + + if (sav->sah->saidx.mode != IPSEC_MODE_TUNNEL || + sav->sah->saidx.proto != IPPROTO_ESP) + return (0); + + IPSEC_SC_RLOCK(); + /* + * We only acquire SC_RLOCK() while we are doing search in + * ipsec_sc_htbl. It is safe, because removing softc or changing + * of reqid/addresses requires removing from hash table. + */ + LIST_FOREACH(sc, SCHASH_HASH(sav->sah->saidx.reqid), hash) { + saidx = ipsec_getsaidx(sc, IPSEC_DIR_INBOUND, + sav->sah->saidx.src.sa.sa_family); + /* SA's reqid should match reqid in SP */ + if (saidx == NULL || + sav->sah->saidx.reqid != saidx->reqid) + continue; + /* SAH's addresses should match tunnel endpoints. */ + if (key_sockaddrcmp(&sav->sah->saidx.dst.sa, + &saidx->dst.sa, 0) != 0) + continue; + if (key_sockaddrcmp(&sav->sah->saidx.src.sa, + &saidx->src.sa, 0) == 0) + break; + } + if (sc == NULL) { + IPSEC_SC_RUNLOCK(); + /* Tunnel was not found. Nothing to do. */ + return (0); + } + ifp = sc->ifp; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + (ifp->if_flags & IFF_UP) == 0) { + IPSEC_SC_RUNLOCK(); + m_freem(m); + return (ENETDOWN); + } + /* + * We found matching and working tunnel. + * Set its ifnet as receiving interface. + */ + m->m_pkthdr.rcvif = ifp; + IPSEC_SC_RUNLOCK(); + + /* m_clrprotoflags(m); */ + M_SETFIB(m, ifp->if_fib); + BPF_MTAP2(ifp, &af, sizeof(af), m); + if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); + if ((ifp->if_flags & IFF_MONITOR) != 0) { + m_freem(m); + return (ENETDOWN); + } + return (0); +} + +/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ +int +ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + IPSEC_RLOCK_TRACKER; + struct ifreq *ifr = (struct ifreq*)data; + struct sockaddr *dst, *src; + struct ipsec_softc *sc; + struct secasindex *saidx; +#ifdef INET + struct sockaddr_in *sin = NULL; +#endif +#ifdef INET6 + struct sockaddr_in6 *sin6 = NULL; +#endif + uint32_t reqid; + int error; + + switch (cmd) { + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCGIFMTU: + case SIOCSIFFLAGS: + return (0); + case SIOCSIFMTU: + if (ifr->ifr_mtu < IPSEC_MTU_MIN || + ifr->ifr_mtu > IPSEC_MTU_MAX) + return (EINVAL); + else + ifp->if_mtu = ifr->ifr_mtu; + return (0); + } + sx_xlock(&ipsec_ioctl_sx); + sc = ifp->if_softc; + /* Check that softc is still here */ + if (sc == NULL) { + error = ENXIO; + goto bad; + } + error = 0; + switch (cmd) { + case SIOCSIFPHYADDR: +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: +#endif + error = EINVAL; + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + src = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_dstaddr); + break; +#endif +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: + src = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_dstaddr); + break; +#endif + default: + goto bad; + } + /* sa_family must be equal */ + if (src->sa_family != dst->sa_family || + src->sa_len != dst->sa_len) + goto bad; + + /* validate sa_len */ + switch (src->sa_family) { +#ifdef INET + case AF_INET: + if (src->sa_len != sizeof(struct sockaddr_in)) + goto bad; + break; +#endif +#ifdef INET6 + case AF_INET6: + if (src->sa_len != sizeof(struct sockaddr_in6)) + goto bad; + break; +#endif + default: + error = EAFNOSUPPORT; + goto bad; + } + /* check sa_family looks sane for the cmd */ + error = EAFNOSUPPORT; + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + if (src->sa_family == AF_INET) + break; + goto bad; +#endif +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: + if (src->sa_family == AF_INET6) + break; + goto bad; +#endif + } + error = EADDRNOTAVAIL; + switch (src->sa_family) { +#ifdef INET + case AF_INET: + if (satosin(src)->sin_addr.s_addr == INADDR_ANY || + satosin(dst)->sin_addr.s_addr == INADDR_ANY) + goto bad; + break; +#endif +#ifdef INET6 + case AF_INET6: + if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) + || + IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) + goto bad; + /* + * Check validity of the scope zone ID of the + * addresses, and convert it into the kernel + * internal form if necessary. + */ + error = sa6_embedscope(satosin6(src), 0); + if (error != 0) + goto bad; + error = sa6_embedscope(satosin6(dst), 0); + if (error != 0) + goto bad; +#endif + }; + error = ipsec_set_addresses(ifp, src, dst); + break; + case SIOCDIFPHYADDR: + ipsec_delete_tunnel(ifp, 0); + break; + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: +#endif + IPSEC_RLOCK(sc); + if (sc->family == 0) { + IPSEC_RUNLOCK(sc); + error = EADDRNOTAVAIL; + break; + } + saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + if (saidx->src.sa.sa_family != AF_INET) { + error = EADDRNOTAVAIL; + break; + } + sin = (struct sockaddr_in *)&ifr->ifr_addr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + if (saidx->src.sa.sa_family != AF_INET6) { + error = EADDRNOTAVAIL; + break; + } + sin6 = (struct sockaddr_in6 *) + &(((struct in6_ifreq *)data)->ifr_addr); + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + break; +#endif + default: + error = EAFNOSUPPORT; + } + if (error == 0) { + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + sin->sin_addr = saidx->src.sin.sin_addr; + break; + case SIOCGIFPDSTADDR: + sin->sin_addr = saidx->dst.sin.sin_addr; + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + sin6->sin6_addr = saidx->src.sin6.sin6_addr; + break; + case SIOCGIFPDSTADDR_IN6: + sin6->sin6_addr = saidx->dst.sin6.sin6_addr; + break; +#endif + } + } + IPSEC_RUNLOCK(sc); + if (error != 0) + break; + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + error = prison_if(curthread->td_ucred, + (struct sockaddr *)sin); + if (error != 0) + memset(sin, 0, sizeof(*sin)); + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + error = prison_if(curthread->td_ucred, + (struct sockaddr *)sin6); + if (error == 0) + error = sa6_recoverscope(sin6); + if (error != 0) + memset(sin6, 0, sizeof(*sin6)); +#endif + } + break; + case SIOCGTUNFIB: + ifr->ifr_fib = sc->fibnum; + break; + case SIOCSTUNFIB: + if ((error = priv_check(curthread, PRIV_NET_SETIFFIB)) != 0) + break; + if (ifr->ifr_fib >= rt_numfibs) + error = EINVAL; + else + sc->fibnum = ifr->ifr_fib; + break; + case IPSECGREQID: + reqid = sc->reqid; + error = copyout(&reqid, ifr->ifr_data, sizeof(reqid)); + break; + case IPSECSREQID: + if ((error = priv_check(curthread, PRIV_NET_SETIFCAP)) != 0) + break; + error = copyin(ifr->ifr_data, &reqid, sizeof(reqid)); + if (error != 0) + break; + error = ipsec_set_reqid(ifp, reqid); + break; + default: + error = EINVAL; + break; + } +bad: + sx_xunlock(&ipsec_ioctl_sx); + return (error); +} + +/* + * Allocate new private security policies for tunneling interface. + * Each tunneling interface has following security policies for + * both AF: + * 0.0.0.0/0[any] 0.0.0.0/0[any] -P in \ + * ipsec esp/tunnel/RemoteIP-LocalIP/unique:reqid + * 0.0.0.0/0[any] 0.0.0.0/0[any] -P out \ + * ipsec esp/tunnel/LocalIP-RemoteIP/unique:reqid + */ +static int +ipsec_newpolicies(struct secpolicy *sp[IPSEC_SPCOUNT], + const struct sockaddr *src, const struct sockaddr *dst, uint32_t reqid) +{ + struct ipsecrequest *isr; + int i; + + memset(sp, 0, sizeof(struct secpolicy *) * IPSEC_SPCOUNT); + for (i = 0; i < IPSEC_SPCOUNT; i++) { + if ((sp[i] = key_newsp()) == NULL) + goto fail; + if ((isr = ipsec_newisr()) == NULL) + goto fail; + + sp[i]->policy = IPSEC_POLICY_IPSEC; + sp[i]->state = IPSEC_SPSTATE_DEAD; + sp[i]->req[sp[i]->tcount++] = isr; + sp[i]->created = time_second; + isr->level = IPSEC_LEVEL_UNIQUE; + isr->saidx.proto = IPPROTO_ESP; + isr->saidx.mode = IPSEC_MODE_TUNNEL; + isr->saidx.reqid = reqid; + if (i % 2 == 0) { + sp[i]->spidx.dir = IPSEC_DIR_INBOUND; + bcopy(src, &isr->saidx.dst, src->sa_len); + bcopy(dst, &isr->saidx.src, dst->sa_len); + } else { + sp[i]->spidx.dir = IPSEC_DIR_OUTBOUND; + bcopy(src, &isr->saidx.src, src->sa_len); + bcopy(dst, &isr->saidx.dst, dst->sa_len); + } + sp[i]->spidx.ul_proto = IPSEC_ULPROTO_ANY; +#ifdef INET + if (i < 2) { + sp[i]->spidx.src.sa.sa_family = + sp[i]->spidx.dst.sa.sa_family = AF_INET; + sp[i]->spidx.src.sa.sa_len = + sp[i]->spidx.dst.sa.sa_len = + sizeof(struct sockaddr_in); + continue; + } +#endif +#ifdef INET6 + sp[i]->spidx.src.sa.sa_family = + sp[i]->spidx.dst.sa.sa_family = AF_INET6; + sp[i]->spidx.src.sa.sa_len = + sp[i]->spidx.dst.sa.sa_len = sizeof(struct sockaddr_in6); +#endif + } + return (0); +fail: + for (i = 0; i < IPSEC_SPCOUNT; i++) + key_freesp(&sp[i]); + return (ENOMEM); +} + +static int +ipsec_check_reqid(uint32_t reqid) +{ + struct ipsec_softc *sc; + + IPSEC_SC_RLOCK_ASSERT(); + LIST_FOREACH(sc, &V_ipsec_sc_list, chain) { + if (sc->reqid == reqid) + return (EEXIST); + } + return (0); +} + +/* + * We use key_newreqid() to automatically obtain unique reqid. + * Then we check that given id is unique, i.e. it is not used by + * another if_ipsec(4) interface. This macro limits the number of + * tries to get unique id. + */ +#define IPSEC_REQID_TRYCNT 64 +static int +ipsec_init_reqid(struct ipsec_softc *sc) +{ + uint32_t reqid; + int trycount; + + IPSEC_SC_RLOCK_ASSERT(); + + if (sc->reqid != 0) /* already initialized */ + return (0); + + trycount = IPSEC_REQID_TRYCNT; + while (--trycount > 0) { + reqid = key_newreqid(); + if (ipsec_check_reqid(reqid) == 0) + break; + } + if (trycount == 0) + return (EEXIST); + sc->reqid = reqid; + return (0); +} + +/* + * Set or update reqid for given tunneling interface. + * When specified reqid is zero, generate new one. + * We are protected by ioctl_sx lock from concurrent id generation. + * Also softc would not disappear while we hold ioctl_sx lock. + */ +static int +ipsec_set_reqid(struct ifnet *ifp, uint32_t reqid) +{ + IPSEC_SC_RLOCK_TRACKER; + struct ipsec_softc *sc; + struct secasindex *saidx; + + sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); + + sc = ifp->if_softc; + if (sc->reqid == reqid && reqid != 0) + return (0); + + IPSEC_SC_RLOCK(); + if (reqid != 0) { + /* Check that specified reqid doesn't exist */ + if (ipsec_check_reqid(reqid) != 0) { + IPSEC_SC_RUNLOCK(); + return (EEXIST); + } + sc->reqid = reqid; + } else { + /* Generate new reqid */ + if (ipsec_init_reqid(sc) != 0) { + IPSEC_SC_RUNLOCK(); + return (EEXIST); + } + } + IPSEC_SC_RUNLOCK(); + + /* Tunnel isn't fully configured, just return. */ + if (sc->family == 0) + return (0); + + saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); + KASSERT(saidx != NULL, + ("saidx is NULL, but family is %d", sc->family)); + return (ipsec_set_tunnel(sc, &saidx->src.sa, &saidx->dst.sa, + sc->reqid)); +} + +/* + * Set tunnel endpoints addresses. + */ +static int +ipsec_set_addresses(struct ifnet *ifp, struct sockaddr *src, + struct sockaddr *dst) +{ + IPSEC_SC_RLOCK_TRACKER; + struct ipsec_softc *sc, *tsc; + struct secasindex *saidx; + + sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); + + sc = ifp->if_softc; + if (sc->family != 0) { + saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, + src->sa_family); + if (saidx != NULL && saidx->reqid == sc->reqid && + key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 && + key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0) + return (0); /* Nothing has been changed. */ + + } + /* + * We cannot service IPsec tunnel when source address is + * not our own. + */ +#ifdef INET + if (src->sa_family == AF_INET && + in_localip(satosin(src)->sin_addr) == 0) + return (EADDRNOTAVAIL); +#endif +#ifdef INET6 + /* + * NOTE: IPv6 addresses are in kernel internal form with + * embedded scope zone id. + */ + if (src->sa_family == AF_INET6 && + in6_localip(&satosin6(src)->sin6_addr) == 0) + return (EADDRNOTAVAIL); +#endif + /* Check that given addresses aren't already configured */ + IPSEC_SC_RLOCK(); + LIST_FOREACH(tsc, &V_ipsec_sc_list, chain) { + if (tsc == sc || tsc->family != src->sa_family) + continue; + saidx = ipsec_getsaidx(tsc, IPSEC_DIR_OUTBOUND, tsc->family); + if (key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 && + key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0) { + /* We already have tunnel with such addresses */ + IPSEC_SC_RUNLOCK(); + return (EADDRNOTAVAIL); + } + } + /* If reqid is not set, generate new one. */ + if (ipsec_init_reqid(sc) != 0) { + IPSEC_SC_RUNLOCK(); + return (EEXIST); + } + IPSEC_SC_RUNLOCK(); + return (ipsec_set_tunnel(sc, src, dst, sc->reqid)); +} + +static int +ipsec_set_tunnel(struct ipsec_softc *sc, struct sockaddr *src, + struct sockaddr *dst, uint32_t reqid) +{ + struct secpolicy *sp[IPSEC_SPCOUNT]; + struct secpolicy *oldsp[IPSEC_SPCOUNT]; + int i, f; + + sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); + + /* Allocate SP with new addresses. */ + if (ipsec_newpolicies(sp, src, dst, reqid) == 0) { + /* Add new policies to SPDB */ + if (key_register_ifnet(sp, IPSEC_SPCOUNT) != 0) { + for (i = 0; i < IPSEC_SPCOUNT; i++) + key_freesp(&sp[i]); + return (EAGAIN); + } + IPSEC_SC_WLOCK(); + if ((f = sc->family) != 0) + LIST_REMOVE(sc, hash); + IPSEC_WLOCK(sc); + for (i = 0; i < IPSEC_SPCOUNT; i++) { + oldsp[i] = sc->sp[i]; + sc->sp[i] = sp[i]; + } + sc->family = src->sa_family; + IPSEC_WUNLOCK(sc); + LIST_INSERT_HEAD(SCHASH_HASH(sc->reqid), sc, hash); + IPSEC_SC_WUNLOCK(); + } else { + sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + return (ENOMEM); + } + + sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; + if (f != 0) { + key_unregister_ifnet(oldsp, IPSEC_SPCOUNT); + for (i = 0; i < IPSEC_SPCOUNT; i++) + key_freesp(&oldsp[i]); + } + return (0); +} + +static void +ipsec_delete_tunnel(struct ifnet *ifp, int locked) +{ + struct ipsec_softc *sc = ifp->if_softc; + struct secpolicy *oldsp[IPSEC_SPCOUNT]; + int i; + + sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); + + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + if (sc->family != 0) { + if (!locked) + IPSEC_SC_WLOCK(); + /* Remove from hash table */ + LIST_REMOVE(sc, hash); + IPSEC_WLOCK(sc); + for (i = 0; i < IPSEC_SPCOUNT; i++) { + oldsp[i] = sc->sp[i]; + sc->sp[i] = NULL; + } + sc->family = 0; + IPSEC_WUNLOCK(sc); + if (!locked) + IPSEC_SC_WUNLOCK(); + key_unregister_ifnet(oldsp, IPSEC_SPCOUNT); + for (i = 0; i < IPSEC_SPCOUNT; i++) + key_freesp(&oldsp[i]); + } +} diff --git a/sys/net/if_ipsec.h b/sys/net/if_ipsec.h new file mode 100644 index 000000000000..e7c8e7b8c564 --- /dev/null +++ b/sys/net/if_ipsec.h @@ -0,0 +1,39 @@ +/*- + * Copyright (c) 2016 Yandex LLC + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NET_IF_IPSEC_H_ +#define _NET_IF_IPSEC_H_ + +#define IPSEC_MTU 1400 +#define IPSEC_MTU_MIN 1280 +#define IPSEC_MTU_MAX 8192 +#define IPSECGREQID _IOWR('i', 160, struct ifreq) +#define IPSECSREQID _IOW('i', 161, struct ifreq) + +#endif /* _NET_IF_IPSEC_H_ */ diff --git a/sys/net/pfkeyv2.h b/sys/net/pfkeyv2.h index 353488197c9d..cc2b746aec3b 100644 --- a/sys/net/pfkeyv2.h +++ b/sys/net/pfkeyv2.h @@ -1,450 +1,452 @@ /* $FreeBSD$ */ /* $KAME: pfkeyv2.h,v 1.37 2003/09/06 05:15:43 itojun Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This file has been derived rfc 2367, * And added some flags of SADB_KEY_FLAGS_ as SADB_X_EXT_. * sakane@ydc.co.jp */ #ifndef _NET_PFKEYV2_H_ #define _NET_PFKEYV2_H_ /* This file defines structures and symbols for the PF_KEY Version 2 key management interface. It was written at the U.S. Naval Research Laboratory. This file is in the public domain. The authors ask that you leave this credit intact on any copies of this file. */ #ifndef __PFKEY_V2_H #define __PFKEY_V2_H 1 #define PF_KEY_V2 2 #define PFKEYV2_REVISION 199806L #define SADB_RESERVED 0 #define SADB_GETSPI 1 #define SADB_UPDATE 2 #define SADB_ADD 3 #define SADB_DELETE 4 #define SADB_GET 5 #define SADB_ACQUIRE 6 #define SADB_REGISTER 7 #define SADB_EXPIRE 8 #define SADB_FLUSH 9 #define SADB_DUMP 10 #define SADB_X_PROMISC 11 #define SADB_X_PCHANGE 12 #define SADB_X_SPDUPDATE 13 #define SADB_X_SPDADD 14 #define SADB_X_SPDDELETE 15 /* by policy index */ #define SADB_X_SPDGET 16 #define SADB_X_SPDACQUIRE 17 #define SADB_X_SPDDUMP 18 #define SADB_X_SPDFLUSH 19 #define SADB_X_SPDSETIDX 20 #define SADB_X_SPDEXPIRE 21 #define SADB_X_SPDDELETE2 22 /* by policy id */ #define SADB_MAX 22 struct sadb_msg { u_int8_t sadb_msg_version; u_int8_t sadb_msg_type; u_int8_t sadb_msg_errno; u_int8_t sadb_msg_satype; u_int16_t sadb_msg_len; u_int16_t sadb_msg_reserved; u_int32_t sadb_msg_seq; u_int32_t sadb_msg_pid; }; struct sadb_ext { u_int16_t sadb_ext_len; u_int16_t sadb_ext_type; }; struct sadb_sa { u_int16_t sadb_sa_len; u_int16_t sadb_sa_exttype; u_int32_t sadb_sa_spi; u_int8_t sadb_sa_replay; u_int8_t sadb_sa_state; u_int8_t sadb_sa_auth; u_int8_t sadb_sa_encrypt; u_int32_t sadb_sa_flags; }; struct sadb_lifetime { u_int16_t sadb_lifetime_len; u_int16_t sadb_lifetime_exttype; u_int32_t sadb_lifetime_allocations; u_int64_t sadb_lifetime_bytes; u_int64_t sadb_lifetime_addtime; u_int64_t sadb_lifetime_usetime; }; struct sadb_address { u_int16_t sadb_address_len; u_int16_t sadb_address_exttype; u_int8_t sadb_address_proto; u_int8_t sadb_address_prefixlen; u_int16_t sadb_address_reserved; }; struct sadb_key { u_int16_t sadb_key_len; u_int16_t sadb_key_exttype; u_int16_t sadb_key_bits; u_int16_t sadb_key_reserved; }; struct sadb_ident { u_int16_t sadb_ident_len; u_int16_t sadb_ident_exttype; u_int16_t sadb_ident_type; u_int16_t sadb_ident_reserved; u_int64_t sadb_ident_id; }; struct sadb_sens { u_int16_t sadb_sens_len; u_int16_t sadb_sens_exttype; u_int32_t sadb_sens_dpd; u_int8_t sadb_sens_sens_level; u_int8_t sadb_sens_sens_len; u_int8_t sadb_sens_integ_level; u_int8_t sadb_sens_integ_len; u_int32_t sadb_sens_reserved; }; struct sadb_prop { u_int16_t sadb_prop_len; u_int16_t sadb_prop_exttype; u_int8_t sadb_prop_replay; u_int8_t sadb_prop_reserved[3]; }; struct sadb_comb { u_int8_t sadb_comb_auth; u_int8_t sadb_comb_encrypt; u_int16_t sadb_comb_flags; u_int16_t sadb_comb_auth_minbits; u_int16_t sadb_comb_auth_maxbits; u_int16_t sadb_comb_encrypt_minbits; u_int16_t sadb_comb_encrypt_maxbits; u_int32_t sadb_comb_reserved; u_int32_t sadb_comb_soft_allocations; u_int32_t sadb_comb_hard_allocations; u_int64_t sadb_comb_soft_bytes; u_int64_t sadb_comb_hard_bytes; u_int64_t sadb_comb_soft_addtime; u_int64_t sadb_comb_hard_addtime; u_int64_t sadb_comb_soft_usetime; u_int64_t sadb_comb_hard_usetime; }; struct sadb_supported { u_int16_t sadb_supported_len; u_int16_t sadb_supported_exttype; u_int32_t sadb_supported_reserved; }; struct sadb_alg { u_int8_t sadb_alg_id; u_int8_t sadb_alg_ivlen; u_int16_t sadb_alg_minbits; u_int16_t sadb_alg_maxbits; u_int16_t sadb_alg_reserved; }; struct sadb_spirange { u_int16_t sadb_spirange_len; u_int16_t sadb_spirange_exttype; u_int32_t sadb_spirange_min; u_int32_t sadb_spirange_max; u_int32_t sadb_spirange_reserved; }; struct sadb_x_kmprivate { u_int16_t sadb_x_kmprivate_len; u_int16_t sadb_x_kmprivate_exttype; u_int32_t sadb_x_kmprivate_reserved; }; /* * XXX Additional SA Extension. * mode: tunnel or transport * reqid: to make SA unique nevertheless the address pair of SA are same. * Mainly it's for VPN. */ struct sadb_x_sa2 { u_int16_t sadb_x_sa2_len; u_int16_t sadb_x_sa2_exttype; u_int8_t sadb_x_sa2_mode; u_int8_t sadb_x_sa2_reserved1; u_int16_t sadb_x_sa2_reserved2; u_int32_t sadb_x_sa2_sequence; /* lowermost 32bit of sequence number */ u_int32_t sadb_x_sa2_reqid; }; /* XXX Policy Extension */ struct sadb_x_policy { u_int16_t sadb_x_policy_len; u_int16_t sadb_x_policy_exttype; u_int16_t sadb_x_policy_type; /* See policy type of ipsec.h */ u_int8_t sadb_x_policy_dir; /* direction, see ipsec.h */ u_int8_t sadb_x_policy_reserved; u_int32_t sadb_x_policy_id; u_int32_t sadb_x_policy_priority; }; _Static_assert(sizeof(struct sadb_x_policy) == 16, "struct size mismatch"); /* * When policy_type == IPSEC, it is followed by some of * the ipsec policy request. * [total length of ipsec policy requests] * = (sadb_x_policy_len * sizeof(uint64_t) - sizeof(struct sadb_x_policy)) */ /* XXX IPsec Policy Request Extension */ /* * This structure is aligned 8 bytes. */ struct sadb_x_ipsecrequest { u_int16_t sadb_x_ipsecrequest_len; /* structure length in 64 bits. */ u_int16_t sadb_x_ipsecrequest_proto; /* See ipsec.h */ u_int8_t sadb_x_ipsecrequest_mode; /* See IPSEC_MODE_XX in ipsec.h. */ u_int8_t sadb_x_ipsecrequest_level; /* See IPSEC_LEVEL_XX in ipsec.h */ u_int16_t sadb_x_ipsecrequest_reqid; /* See ipsec.h */ /* * followed by source IP address of SA, and immediately followed by * destination IP address of SA. These encoded into two of sockaddr * structure without any padding. Must set each sa_len exactly. * Each of length of the sockaddr structure are not aligned to 64bits, * but sum of x_request and addresses is aligned to 64bits. */ }; /* NAT-Traversal type, see RFC 3948 (and drafts). */ struct sadb_x_nat_t_type { u_int16_t sadb_x_nat_t_type_len; u_int16_t sadb_x_nat_t_type_exttype; u_int8_t sadb_x_nat_t_type_type; u_int8_t sadb_x_nat_t_type_reserved[3]; }; _Static_assert(sizeof(struct sadb_x_nat_t_type) == 8, "struct size mismatch"); /* NAT-Traversal source or destination port. */ struct sadb_x_nat_t_port { u_int16_t sadb_x_nat_t_port_len; u_int16_t sadb_x_nat_t_port_exttype; u_int16_t sadb_x_nat_t_port_port; u_int16_t sadb_x_nat_t_port_reserved; }; _Static_assert(sizeof(struct sadb_x_nat_t_port) == 8, "struct size mismatch"); /* ESP fragmentation size. */ struct sadb_x_nat_t_frag { u_int16_t sadb_x_nat_t_frag_len; u_int16_t sadb_x_nat_t_frag_exttype; u_int16_t sadb_x_nat_t_frag_fraglen; u_int16_t sadb_x_nat_t_frag_reserved; }; _Static_assert(sizeof(struct sadb_x_nat_t_frag) == 8, "struct size mismatch"); /* Additional large replay window support */ struct sadb_x_sa_replay { u_int16_t sadb_x_sa_replay_len; u_int16_t sadb_x_sa_replay_exttype; u_int32_t sadb_x_sa_replay_replay; /* in packets */ }; _Static_assert(sizeof(struct sadb_x_sa_replay) == 8, "struct size mismatch"); #define SADB_EXT_RESERVED 0 #define SADB_EXT_SA 1 #define SADB_EXT_LIFETIME_CURRENT 2 #define SADB_EXT_LIFETIME_HARD 3 #define SADB_EXT_LIFETIME_SOFT 4 #define SADB_EXT_ADDRESS_SRC 5 #define SADB_EXT_ADDRESS_DST 6 #define SADB_EXT_ADDRESS_PROXY 7 #define SADB_EXT_KEY_AUTH 8 #define SADB_EXT_KEY_ENCRYPT 9 #define SADB_EXT_IDENTITY_SRC 10 #define SADB_EXT_IDENTITY_DST 11 #define SADB_EXT_SENSITIVITY 12 #define SADB_EXT_PROPOSAL 13 #define SADB_EXT_SUPPORTED_AUTH 14 #define SADB_EXT_SUPPORTED_ENCRYPT 15 #define SADB_EXT_SPIRANGE 16 #define SADB_X_EXT_KMPRIVATE 17 #define SADB_X_EXT_POLICY 18 #define SADB_X_EXT_SA2 19 #define SADB_X_EXT_NAT_T_TYPE 20 #define SADB_X_EXT_NAT_T_SPORT 21 #define SADB_X_EXT_NAT_T_DPORT 22 #define SADB_X_EXT_NAT_T_OA 23 /* Deprecated. */ #define SADB_X_EXT_NAT_T_OAI 23 /* Peer's NAT_OA for src of SA. */ #define SADB_X_EXT_NAT_T_OAR 24 /* Peer's NAT_OA for dst of SA. */ #define SADB_X_EXT_NAT_T_FRAG 25 /* Manual MTU override. */ #define SADB_X_EXT_SA_REPLAY 26 /* Replay window override. */ -#define SADB_EXT_MAX 26 +#define SADB_X_EXT_NEW_ADDRESS_SRC 27 +#define SADB_X_EXT_NEW_ADDRESS_DST 28 +#define SADB_EXT_MAX 28 #define SADB_SATYPE_UNSPEC 0 #define SADB_SATYPE_AH 2 #define SADB_SATYPE_ESP 3 #define SADB_SATYPE_RSVP 5 #define SADB_SATYPE_OSPFV2 6 #define SADB_SATYPE_RIPV2 7 #define SADB_SATYPE_MIP 8 #define SADB_X_SATYPE_IPCOMP 9 /*#define SADB_X_SATYPE_POLICY 10 obsolete, do not reuse */ #define SADB_X_SATYPE_TCPSIGNATURE 11 #define SADB_SATYPE_MAX 12 #define SADB_SASTATE_LARVAL 0 #define SADB_SASTATE_MATURE 1 #define SADB_SASTATE_DYING 2 #define SADB_SASTATE_DEAD 3 #define SADB_SASTATE_MAX 3 #define SADB_SAFLAGS_PFS 1 /* * Though some of these numbers (both _AALG and _EALG) appear to be * IKEv2 numbers and others original IKE numbers, they have no meaning. * These are constants that the various IKE daemons use to tell the kernel * what cipher to use. * * Do not use these constants directly to decide which Transformation ID * to send. You are responsible for mapping them yourself. */ #define SADB_AALG_NONE 0 #define SADB_AALG_MD5HMAC 2 #define SADB_AALG_SHA1HMAC 3 #define SADB_AALG_MAX 252 #define SADB_X_AALG_SHA2_256 5 #define SADB_X_AALG_SHA2_384 6 #define SADB_X_AALG_SHA2_512 7 #define SADB_X_AALG_RIPEMD160HMAC 8 #define SADB_X_AALG_AES_XCBC_MAC 9 /* RFC3566 */ #define SADB_X_AALG_AES128GMAC 11 /* RFC4543 + Errata1821 */ #define SADB_X_AALG_AES192GMAC 12 #define SADB_X_AALG_AES256GMAC 13 #define SADB_X_AALG_MD5 249 /* Keyed MD5 */ #define SADB_X_AALG_SHA 250 /* Keyed SHA */ #define SADB_X_AALG_NULL 251 /* null authentication */ #define SADB_X_AALG_TCP_MD5 252 /* Keyed TCP-MD5 (RFC2385) */ #define SADB_EALG_NONE 0 #define SADB_EALG_DESCBC 2 #define SADB_EALG_3DESCBC 3 #define SADB_X_EALG_CAST128CBC 6 #define SADB_X_EALG_BLOWFISHCBC 7 #define SADB_EALG_NULL 11 #define SADB_X_EALG_RIJNDAELCBC 12 #define SADB_X_EALG_AES 12 #define SADB_X_EALG_AESCTR 13 #define SADB_X_EALG_AESGCM8 18 /* RFC4106 */ #define SADB_X_EALG_AESGCM12 19 #define SADB_X_EALG_AESGCM16 20 #define SADB_X_EALG_CAMELLIACBC 22 #define SADB_X_EALG_AESGMAC 23 /* RFC4543 + Errata1821 */ #define SADB_EALG_MAX 23 /* !!! keep updated !!! */ /* private allocations - based on RFC2407/IANA assignment */ #define SADB_X_CALG_NONE 0 #define SADB_X_CALG_OUI 1 #define SADB_X_CALG_DEFLATE 2 #define SADB_X_CALG_LZS 3 #define SADB_X_CALG_MAX 4 #define SADB_IDENTTYPE_RESERVED 0 #define SADB_IDENTTYPE_PREFIX 1 #define SADB_IDENTTYPE_FQDN 2 #define SADB_IDENTTYPE_USERFQDN 3 #define SADB_X_IDENTTYPE_ADDR 4 #define SADB_IDENTTYPE_MAX 4 /* `flags' in sadb_sa structure holds followings */ #define SADB_X_EXT_NONE 0x0000 /* i.e. new format. */ #define SADB_X_EXT_OLD 0x0001 /* old format. */ #define SADB_X_EXT_IV4B 0x0010 /* IV length of 4 bytes in use */ #define SADB_X_EXT_DERIV 0x0020 /* DES derived */ #define SADB_X_EXT_CYCSEQ 0x0040 /* allowing to cyclic sequence. */ /* three of followings are exclusive flags each them */ #define SADB_X_EXT_PSEQ 0x0000 /* sequencial padding for ESP */ #define SADB_X_EXT_PRAND 0x0100 /* random padding for ESP */ #define SADB_X_EXT_PZERO 0x0200 /* zero padding for ESP */ #define SADB_X_EXT_PMASK 0x0300 /* mask for padding flag */ #if 1 #define SADB_X_EXT_RAWCPI 0x0080 /* use well known CPI (IPComp) */ #endif #define SADB_KEY_FLAGS_MAX 0x0fff /* SPI size for PF_KEYv2 */ #define PFKEY_SPI_SIZE sizeof(u_int32_t) /* Identifier for menber of lifetime structure */ #define SADB_X_LIFETIME_ALLOCATIONS 0 #define SADB_X_LIFETIME_BYTES 1 #define SADB_X_LIFETIME_ADDTIME 2 #define SADB_X_LIFETIME_USETIME 3 /* The rate for SOFT lifetime against HARD one. */ #define PFKEY_SOFT_LIFETIME_RATE 80 /* Utilities */ #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) #define PFKEY_EXTLEN(msg) \ PFKEY_UNUNIT64(((struct sadb_ext *)(msg))->sadb_ext_len) #define PFKEY_ADDR_PREFIX(ext) \ (((struct sadb_address *)(ext))->sadb_address_prefixlen) #define PFKEY_ADDR_PROTO(ext) \ (((struct sadb_address *)(ext))->sadb_address_proto) #define PFKEY_ADDR_SADDR(ext) \ ((struct sockaddr *)((caddr_t)(ext) + sizeof(struct sadb_address))) /* in 64bits */ #define PFKEY_UNUNIT64(a) ((a) << 3) #define PFKEY_UNIT64(a) ((a) >> 3) #endif /* __PFKEY_V2_H */ #endif /* _NET_PFKEYV2_H_ */ diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index a14f78595e85..c9717dde0ab2 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1,2935 +1,2931 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993, 1995 * The Regents of the University of California. * Copyright (c) 2007-2009 Robert N. M. Watson * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ipsec.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ratelimit.h" #include "opt_pcbgroup.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #include #endif #ifdef INET #include #endif #ifdef INET6 #include #include #include #include #endif /* INET6 */ - -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ +#include #include static struct callout ipport_tick_callout; /* * These configure the range of local port addresses assigned to * "unspecified" outgoing connections/packets/whatever. */ VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1; /* 1023 */ VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART; /* 600 */ VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST; /* 10000 */ VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST; /* 65535 */ VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO; /* 49152 */ VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO; /* 65535 */ /* * Reserved ports accessible only to root. There are significant * security considerations that must be accounted for when changing these, * but the security benefits can be great. Please be careful. */ VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1; /* 1023 */ VNET_DEFINE(int, ipport_reservedlow); /* Variables dealing with random ephemeral port allocation. */ VNET_DEFINE(int, ipport_randomized) = 1; /* user controlled via sysctl */ VNET_DEFINE(int, ipport_randomcps) = 10; /* user controlled via sysctl */ VNET_DEFINE(int, ipport_randomtime) = 45; /* user controlled via sysctl */ VNET_DEFINE(int, ipport_stoprandom); /* toggled by ipport_tick */ VNET_DEFINE(int, ipport_tcpallocs); static VNET_DEFINE(int, ipport_tcplastcount); #define V_ipport_tcplastcount VNET(ipport_tcplastcount) static void in_pcbremlists(struct inpcb *inp); #ifdef INET static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp); #define RANGECHK(var, min, max) \ if ((var) < (min)) { (var) = (min); } \ else if ((var) > (max)) { (var) = (max); } static int sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, arg1, arg2, req); if (error == 0) { RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1); RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX); RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX); RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX); RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX); } return (error); } #undef RANGECHK static SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_lowfirstauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_lowlastauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_firstauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_lastauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_hifirstauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(ipport_hilastauto), 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(ipport_reservedhigh), 0, ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipport_randomized), 0, "Enable random port allocation"); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipport_randomcps), 0, "Maximum number of random port " "allocations before switching to a sequental one"); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipport_randomtime), 0, "Minimum time to keep sequental port " "allocation before switching to a random one"); #endif /* INET */ /* * in_pcb.c: manage the Protocol Control Blocks. * * NOTE: It is assumed that most of these functions will be called with * the pcbinfo lock held, and often, the inpcb lock held, as these utility * functions often modify hash chains or addresses in pcbs. */ /* * Initialize an inpcbinfo -- we should be able to reduce the number of * arguments in time. */ void in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, struct inpcbhead *listhead, int hash_nelements, int porthash_nelements, char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini, uint32_t inpcbzone_flags, u_int hashfields) { INP_INFO_LOCK_INIT(pcbinfo, name); INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */ INP_LIST_LOCK_INIT(pcbinfo, "pcbinfolist"); #ifdef VIMAGE pcbinfo->ipi_vnet = curvnet; #endif pcbinfo->ipi_listhead = listhead; LIST_INIT(pcbinfo->ipi_listhead); pcbinfo->ipi_count = 0; pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB, &pcbinfo->ipi_hashmask); pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB, &pcbinfo->ipi_porthashmask); #ifdef PCBGROUP in_pcbgroup_init(pcbinfo, hashfields, hash_nelements); #endif pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb), NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR, inpcbzone_flags); uma_zone_set_max(pcbinfo->ipi_zone, maxsockets); uma_zone_set_warning(pcbinfo->ipi_zone, "kern.ipc.maxsockets limit reached"); } /* * Destroy an inpcbinfo. */ void in_pcbinfo_destroy(struct inpcbinfo *pcbinfo) { KASSERT(pcbinfo->ipi_count == 0, ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count)); hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask); hashdestroy(pcbinfo->ipi_porthashbase, M_PCB, pcbinfo->ipi_porthashmask); #ifdef PCBGROUP in_pcbgroup_destroy(pcbinfo); #endif uma_zdestroy(pcbinfo->ipi_zone); INP_LIST_LOCK_DESTROY(pcbinfo); INP_HASH_LOCK_DESTROY(pcbinfo); INP_INFO_LOCK_DESTROY(pcbinfo); } /* * Allocate a PCB and associate it with the socket. * On success return with the PCB locked. */ int in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) { struct inpcb *inp; int error; #ifdef INVARIANTS if (pcbinfo == &V_tcbinfo) { INP_INFO_RLOCK_ASSERT(pcbinfo); } else { INP_INFO_WLOCK_ASSERT(pcbinfo); } #endif error = 0; inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT); if (inp == NULL) return (ENOBUFS); bzero(inp, inp_zero_size); inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; inp->inp_cred = crhold(so->so_cred); inp->inp_inc.inc_fibnum = so->so_fibnum; #ifdef MAC error = mac_inpcb_init(inp, M_NOWAIT); if (error != 0) goto out; mac_inpcb_create(so, inp); #endif -#ifdef IPSEC - error = ipsec_init_policy(so, &inp->inp_sp); +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + error = ipsec_init_pcbpolicy(inp); if (error != 0) { #ifdef MAC mac_inpcb_destroy(inp); #endif goto out; } #endif /*IPSEC*/ #ifdef INET6 if (INP_SOCKAF(so) == AF_INET6) { inp->inp_vflag |= INP_IPV6PROTO; if (V_ip6_v6only) inp->inp_flags |= IN6P_IPV6_V6ONLY; } #endif INP_WLOCK(inp); INP_LIST_WLOCK(pcbinfo); LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); pcbinfo->ipi_count++; so->so_pcb = (caddr_t)inp; #ifdef INET6 if (V_ip6_auto_flowlabel) inp->inp_flags |= IN6P_AUTOFLOWLABEL; #endif inp->inp_gencnt = ++pcbinfo->ipi_gencnt; refcount_init(&inp->inp_refcount, 1); /* Reference from inpcbinfo */ INP_LIST_WUNLOCK(pcbinfo); #if defined(IPSEC) || defined(MAC) out: if (error != 0) { crfree(inp->inp_cred); uma_zfree(pcbinfo->ipi_zone, inp); } #endif return (error); } #ifdef INET int in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { int anonport, error; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) return (EINVAL); anonport = nam == NULL || ((struct sockaddr_in *)nam)->sin_port == 0; error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, &inp->inp_lport, cred); if (error) return (error); if (in_pcbinshash(inp) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; return (EAGAIN); } if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); } #endif /* * Select a local port (number) to use. */ #if defined(INET) || defined(INET6) int in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, struct ucred *cred, int lookupflags) { struct inpcbinfo *pcbinfo; struct inpcb *tmpinp; unsigned short *lastport; int count, dorandom, error; u_short aux, first, last, lport; #ifdef INET struct in_addr laddr; #endif pcbinfo = inp->inp_pcbinfo; /* * Because no actual state changes occur here, a global write lock on * the pcbinfo isn't required. */ INP_LOCK_ASSERT(inp); INP_HASH_LOCK_ASSERT(pcbinfo); if (inp->inp_flags & INP_HIGHPORT) { first = V_ipport_hifirstauto; /* sysctl */ last = V_ipport_hilastauto; lastport = &pcbinfo->ipi_lasthi; } else if (inp->inp_flags & INP_LOWPORT) { error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); if (error) return (error); first = V_ipport_lowfirstauto; /* 1023 */ last = V_ipport_lowlastauto; /* 600 */ lastport = &pcbinfo->ipi_lastlow; } else { first = V_ipport_firstauto; /* sysctl */ last = V_ipport_lastauto; lastport = &pcbinfo->ipi_lastport; } /* * For UDP(-Lite), use random port allocation as long as the user * allows it. For TCP (and as of yet unknown) connections, * use random port allocation only if the user allows it AND * ipport_tick() allows it. */ if (V_ipport_randomized && (!V_ipport_stoprandom || pcbinfo == &V_udbinfo || pcbinfo == &V_ulitecbinfo)) dorandom = 1; else dorandom = 0; /* * It makes no sense to do random port allocation if * we have the only port available. */ if (first == last) dorandom = 0; /* Make sure to not include UDP(-Lite) packets in the count. */ if (pcbinfo != &V_udbinfo || pcbinfo != &V_ulitecbinfo) V_ipport_tcpallocs++; /* * Instead of having two loops further down counting up or down * make sure that first is always <= last and go with only one * code path implementing all logic. */ if (first > last) { aux = first; first = last; last = aux; } #ifdef INET /* Make the compiler happy. */ laddr.s_addr = 0; if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) { KASSERT(laddrp != NULL, ("%s: laddrp NULL for v4 inp %p", __func__, inp)); laddr = *laddrp; } #endif tmpinp = NULL; /* Make compiler happy. */ lport = *lportp; if (dorandom) *lastport = first + (arc4random() % (last - first)); count = last - first; do { if (count-- < 0) /* completely used? */ return (EADDRNOTAVAIL); ++*lastport; if (*lastport < first || *lastport > last) *lastport = first; lport = htons(*lastport); #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) tmpinp = in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport, lookupflags, cred); #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET tmpinp = in_pcblookup_local(pcbinfo, laddr, lport, lookupflags, cred); #endif } while (tmpinp != NULL); #ifdef INET if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) laddrp->s_addr = laddr.s_addr; #endif *lportp = lport; return (0); } /* * Return cached socket options. */ short inp_so_options(const struct inpcb *inp) { short so_options; so_options = 0; if ((inp->inp_flags2 & INP_REUSEPORT) != 0) so_options |= SO_REUSEPORT; if ((inp->inp_flags2 & INP_REUSEADDR) != 0) so_options |= SO_REUSEADDR; return (so_options); } #endif /* INET || INET6 */ /* * Check if a new BINDMULTI socket is allowed to be created. * * ni points to the new inp. * oi points to the exisitng inp. * * This checks whether the existing inp also has BINDMULTI and * whether the credentials match. */ int in_pcbbind_check_bindmulti(const struct inpcb *ni, const struct inpcb *oi) { /* Check permissions match */ if ((ni->inp_flags2 & INP_BINDMULTI) && (ni->inp_cred->cr_uid != oi->inp_cred->cr_uid)) return (0); /* Check the existing inp has BINDMULTI set */ if ((ni->inp_flags2 & INP_BINDMULTI) && ((oi->inp_flags2 & INP_BINDMULTI) == 0)) return (0); /* * We're okay - either INP_BINDMULTI isn't set on ni, or * it is and it matches the checks. */ return (1); } #ifdef INET /* * Set up a bind operation on a PCB, performing port allocation * as required, but do not actually modify the PCB. Callers can * either complete the bind by setting inp_laddr/inp_lport and * calling in_pcbinshash(), or they can just use the resulting * port and address to authorise the sending of a once-off packet. * * On error, the values of *laddrp and *lportp are not changed. */ int in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, u_short *lportp, struct ucred *cred) { struct socket *so = inp->inp_socket; struct sockaddr_in *sin; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct in_addr laddr; u_short lport = 0; int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT); int error; /* * No state changes, so read locks are sufficient here. */ INP_LOCK_ASSERT(inp); INP_HASH_LOCK_ASSERT(pcbinfo); if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); laddr.s_addr = *laddrp; if (nam != NULL && laddr.s_addr != INADDR_ANY) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) lookupflags = INPLOOKUP_WILDCARD; if (nam == NULL) { if ((error = prison_local_ip4(cred, &laddr)) != 0) return (error); } else { sin = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sin)) return (EINVAL); #ifdef notdef /* * We should check the family, but old programs * incorrectly fail to initialize it. */ if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); #endif error = prison_local_ip4(cred, &sin->sin_addr); if (error) return (error); if (sin->sin_port != *lportp) { /* Don't allow the port to change. */ if (*lportp != 0) return (EINVAL); lport = sin->sin_port; } /* NB: lport is left as 0 if the port isn't being changed. */ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow complete duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (sin->sin_addr.s_addr != INADDR_ANY) { sin->sin_port = 0; /* yech... */ bzero(&sin->sin_zero, sizeof(sin->sin_zero)); /* * Is the address a local IP address? * If INP_BINDANY is set, then the socket may be bound * to any endpoint address, local or not. */ if ((inp->inp_flags & INP_BINDANY) == 0 && ifa_ifwithaddr_check((struct sockaddr *)sin) == 0) return (EADDRNOTAVAIL); } laddr = sin->sin_addr; if (lport) { struct inpcb *t; struct tcptw *tw; /* GROSS */ if (ntohs(lport) <= V_ipport_reservedhigh && ntohs(lport) >= V_ipport_reservedlow && priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) return (EACCES); if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT, 0) != 0) { t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, INPLOOKUP_WILDCARD, cred); /* * XXX * This entire block sorely needs a rewrite. */ if (t && ((inp->inp_flags2 & INP_BINDMULTI) == 0) && ((t->inp_flags & INP_TIMEWAIT) == 0) && (so->so_type != SOCK_STREAM || ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (t->inp_flags2 & INP_REUSEPORT) == 0) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); /* * If the socket is a BINDMULTI socket, then * the credentials need to match and the * original socket also has to have been bound * with BINDMULTI. */ if (t && (! in_pcbbind_check_bindmulti(inp, t))) return (EADDRINUSE); } t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, lookupflags, cred); if (t && (t->inp_flags & INP_TIMEWAIT)) { /* * XXXRW: If an incpb has had its timewait * state recycled, we treat the address as * being in use (for now). This is better * than a panic, but not desirable. */ tw = intotw(t); if (tw == NULL || (reuseport & tw->tw_so_options) == 0) return (EADDRINUSE); } else if (t && ((inp->inp_flags2 & INP_BINDMULTI) == 0) && (reuseport & inp_so_options(t)) == 0) { #ifdef INET6 if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (inp->inp_vflag & INP_IPV6PROTO) == 0 || (t->inp_vflag & INP_IPV6PROTO) == 0) #endif return (EADDRINUSE); if (t && (! in_pcbbind_check_bindmulti(inp, t))) return (EADDRINUSE); } } } if (*lportp != 0) lport = *lportp; if (lport == 0) { error = in_pcb_lport(inp, &laddr, &lport, cred, lookupflags); if (error != 0) return (error); } *laddrp = laddr.s_addr; *lportp = lport; return (0); } /* * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred, struct mbuf *m) { u_short lport, fport; in_addr_t laddr, faddr; int anonport, error; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); lport = inp->inp_lport; laddr = inp->inp_laddr.s_addr; anonport = (lport == 0); error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, NULL, cred); if (error) return (error); /* Do the initial binding of the local address if required. */ if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { inp->inp_lport = lport; inp->inp_laddr.s_addr = laddr; if (in_pcbinshash(inp) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; return (EAGAIN); } } /* Commit the remaining changes. */ inp->inp_lport = lport; inp->inp_laddr.s_addr = laddr; inp->inp_faddr.s_addr = faddr; inp->inp_fport = fport; in_pcbrehash_mbuf(inp, m); if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); } int in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { return (in_pcbconnect_mbuf(inp, nam, cred, NULL)); } /* * Do proper source address selection on an unbound socket in case * of connect. Take jails into account as well. */ int in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, struct ucred *cred) { struct ifaddr *ifa; struct sockaddr *sa; struct sockaddr_in *sin; struct route sro; int error; KASSERT(laddr != NULL, ("%s: laddr NULL", __func__)); /* * Bypass source address selection and use the primary jail IP * if requested. */ if (cred != NULL && !prison_saddrsel_ip4(cred, laddr)) return (0); error = 0; bzero(&sro, sizeof(sro)); sin = (struct sockaddr_in *)&sro.ro_dst; sin->sin_family = AF_INET; sin->sin_len = sizeof(struct sockaddr_in); sin->sin_addr.s_addr = faddr->s_addr; /* * If route is known our src addr is taken from the i/f, * else punt. * * Find out route to destination. */ if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum); /* * If we found a route, use the address corresponding to * the outgoing interface. * * Otherwise assume faddr is reachable on a directly connected * network and try to find a corresponding interface to take * the source address from. */ if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) { struct in_ifaddr *ia; struct ifnet *ifp; ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin, inp->inp_socket->so_fibnum)); if (ia == NULL) ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0, inp->inp_socket->so_fibnum)); if (ia == NULL) { error = ENETUNREACH; goto done; } if (cred == NULL || !prison_flag(cred, PR_IP4)) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; ifa_free(&ia->ia_ifa); goto done; } ifp = ia->ia_ifp; ifa_free(&ia->ia_ifa); ia = NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; sin = (struct sockaddr_in *)sa; if (prison_check_ip4(cred, &sin->sin_addr) == 0) { ia = (struct in_ifaddr *)ifa; break; } } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; IF_ADDR_RUNLOCK(ifp); goto done; } IF_ADDR_RUNLOCK(ifp); /* 3. As a last resort return the 'default' jail address. */ error = prison_get_ip4(cred, laddr); goto done; } /* * If the outgoing interface on the route found is not * a loopback interface, use the address from that interface. * In case of jails do those three steps: * 1. check if the interface address belongs to the jail. If so use it. * 2. check if we have any address on the outgoing interface * belonging to this jail. If so use it. * 3. as a last resort return the 'default' jail address. */ if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) { struct in_ifaddr *ia; struct ifnet *ifp; /* If not jailed, use the default returned. */ if (cred == NULL || !prison_flag(cred, PR_IP4)) { ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa; laddr->s_addr = ia->ia_addr.sin_addr.s_addr; goto done; } /* Jailed. */ /* 1. Check if the iface address belongs to the jail. */ sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr; if (prison_check_ip4(cred, &sin->sin_addr) == 0) { ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa; laddr->s_addr = ia->ia_addr.sin_addr.s_addr; goto done; } /* * 2. Check if we have any address on the outgoing interface * belonging to this jail. */ ia = NULL; ifp = sro.ro_rt->rt_ifp; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; sin = (struct sockaddr_in *)sa; if (prison_check_ip4(cred, &sin->sin_addr) == 0) { ia = (struct in_ifaddr *)ifa; break; } } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; IF_ADDR_RUNLOCK(ifp); goto done; } IF_ADDR_RUNLOCK(ifp); /* 3. As a last resort return the 'default' jail address. */ error = prison_get_ip4(cred, laddr); goto done; } /* * The outgoing interface is marked with 'loopback net', so a route * to ourselves is here. * Try to find the interface of the destination address and then * take the address from there. That interface is not necessarily * a loopback interface. * In case of jails, check that it is an address of the jail * and if we cannot find, fall back to the 'default' jail address. */ if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { struct sockaddr_in sain; struct in_ifaddr *ia; bzero(&sain, sizeof(struct sockaddr_in)); sain.sin_family = AF_INET; sain.sin_len = sizeof(struct sockaddr_in); sain.sin_addr.s_addr = faddr->s_addr; ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain), inp->inp_socket->so_fibnum)); if (ia == NULL) ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0, inp->inp_socket->so_fibnum)); if (ia == NULL) ia = ifatoia(ifa_ifwithaddr(sintosa(&sain))); if (cred == NULL || !prison_flag(cred, PR_IP4)) { if (ia == NULL) { error = ENETUNREACH; goto done; } laddr->s_addr = ia->ia_addr.sin_addr.s_addr; ifa_free(&ia->ia_ifa); goto done; } /* Jailed. */ if (ia != NULL) { struct ifnet *ifp; ifp = ia->ia_ifp; ifa_free(&ia->ia_ifa); ia = NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; sin = (struct sockaddr_in *)sa; if (prison_check_ip4(cred, &sin->sin_addr) == 0) { ia = (struct in_ifaddr *)ifa; break; } } if (ia != NULL) { laddr->s_addr = ia->ia_addr.sin_addr.s_addr; IF_ADDR_RUNLOCK(ifp); goto done; } IF_ADDR_RUNLOCK(ifp); } /* 3. As a last resort return the 'default' jail address. */ error = prison_get_ip4(cred, laddr); goto done; } done: if (sro.ro_rt != NULL) RTFREE(sro.ro_rt); return (error); } /* * Set up for a connect from a socket to the specified address. * On entry, *laddrp and *lportp should contain the current local * address and port for the PCB; these are updated to the values * that should be placed in inp_laddr and inp_lport to complete * the connect. * * On success, *faddrp and *fportp will be set to the remote address * and port. These are not updated in the error case. * * If the operation fails because the connection already exists, * *oinpp will be set to the PCB of that connection so that the * caller can decide to override it. In all other cases, *oinpp * is set to NULL. */ int in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp, struct inpcb **oinpp, struct ucred *cred) { struct rm_priotracker in_ifa_tracker; struct sockaddr_in *sin = (struct sockaddr_in *)nam; struct in_ifaddr *ia; struct inpcb *oinp; struct in_addr laddr, faddr; u_short lport, fport; int error; /* * Because a global state change doesn't actually occur here, a read * lock is sufficient. */ INP_LOCK_ASSERT(inp); INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo); if (oinpp != NULL) *oinpp = NULL; if (nam->sa_len != sizeof (*sin)) return (EINVAL); if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); if (sin->sin_port == 0) return (EADDRNOTAVAIL); laddr.s_addr = *laddrp; lport = *lportp; faddr = sin->sin_addr; fport = sin->sin_port; if (!TAILQ_EMPTY(&V_in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, * use the primary local address. * If the supplied address is INADDR_BROADCAST, * and the primary interface supports broadcast, * choose the broadcast address for that interface. */ if (faddr.s_addr == INADDR_ANY) { IN_IFADDR_RLOCK(&in_ifa_tracker); faddr = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr; IN_IFADDR_RUNLOCK(&in_ifa_tracker); if (cred != NULL && (error = prison_get_ip4(cred, &faddr)) != 0) return (error); } else if (faddr.s_addr == (u_long)INADDR_BROADCAST) { IN_IFADDR_RLOCK(&in_ifa_tracker); if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST) faddr = satosin(&TAILQ_FIRST( &V_in_ifaddrhead)->ia_broadaddr)->sin_addr; IN_IFADDR_RUNLOCK(&in_ifa_tracker); } } if (laddr.s_addr == INADDR_ANY) { error = in_pcbladdr(inp, &faddr, &laddr, cred); /* * If the destination address is multicast and an outgoing * interface has been set as a multicast option, prefer the * address of that interface as our source address. */ if (IN_MULTICAST(ntohl(faddr.s_addr)) && inp->inp_moptions != NULL) { struct ip_moptions *imo; struct ifnet *ifp; imo = inp->inp_moptions; if (imo->imo_multicast_ifp != NULL) { ifp = imo->imo_multicast_ifp; IN_IFADDR_RLOCK(&in_ifa_tracker); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if ((ia->ia_ifp == ifp) && (cred == NULL || prison_check_ip4(cred, &ia->ia_addr.sin_addr) == 0)) break; } if (ia == NULL) error = EADDRNOTAVAIL; else { laddr = ia->ia_addr.sin_addr; error = 0; } IN_IFADDR_RUNLOCK(&in_ifa_tracker); } } if (error) return (error); } oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, fport, laddr, lport, 0, NULL); if (oinp != NULL) { if (oinpp != NULL) *oinpp = oinp; return (EADDRINUSE); } if (lport == 0) { error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, cred); if (error) return (error); } *laddrp = laddr.s_addr; *lportp = lport; *faddrp = faddr.s_addr; *fportp = fport; return (0); } void in_pcbdisconnect(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); inp->inp_faddr.s_addr = INADDR_ANY; inp->inp_fport = 0; in_pcbrehash(inp); } #endif /* INET */ /* * in_pcbdetach() is responsibe for disassociating a socket from an inpcb. * For most protocols, this will be invoked immediately prior to calling * in_pcbfree(). However, with TCP the inpcb may significantly outlive the * socket, in which case in_pcbfree() is deferred. */ void in_pcbdetach(struct inpcb *inp) { KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__)); #ifdef RATELIMIT if (inp->inp_snd_tag != NULL) in_pcbdetach_txrtlmt(inp); #endif inp->inp_socket->so_pcb = NULL; inp->inp_socket = NULL; } /* * in_pcbref() bumps the reference count on an inpcb in order to maintain * stability of an inpcb pointer despite the inpcb lock being released. This * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded, * but where the inpcb lock may already held, or when acquiring a reference * via a pcbgroup. * * in_pcbref() should be used only to provide brief memory stability, and * must always be followed by a call to INP_WLOCK() and in_pcbrele() to * garbage collect the inpcb if it has been in_pcbfree()'d from another * context. Until in_pcbrele() has returned that the inpcb is still valid, * lock and rele are the *only* safe operations that may be performed on the * inpcb. * * While the inpcb will not be freed, releasing the inpcb lock means that the * connection's state may change, so the caller should be careful to * revalidate any cached state on reacquiring the lock. Drop the reference * using in_pcbrele(). */ void in_pcbref(struct inpcb *inp) { KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); refcount_acquire(&inp->inp_refcount); } /* * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we * return a flag indicating whether or not the inpcb remains valid. If it is * valid, we return with the inpcb lock held. * * Notice that, unlike in_pcbref(), the inpcb lock must be held to drop a * reference on an inpcb. Historically more work was done here (actually, in * in_pcbfree_internal()) but has been moved to in_pcbfree() to avoid the * need for the pcbinfo lock in in_pcbrele(). Deferring the free is entirely * about memory stability (and continued use of the write lock). */ int in_pcbrele_rlocked(struct inpcb *inp) { struct inpcbinfo *pcbinfo; KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); INP_RLOCK_ASSERT(inp); if (refcount_release(&inp->inp_refcount) == 0) { /* * If the inpcb has been freed, let the caller know, even if * this isn't the last reference. */ if (inp->inp_flags2 & INP_FREED) { INP_RUNLOCK(inp); return (1); } return (0); } KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); INP_RUNLOCK(inp); pcbinfo = inp->inp_pcbinfo; uma_zfree(pcbinfo->ipi_zone, inp); return (1); } int in_pcbrele_wlocked(struct inpcb *inp) { struct inpcbinfo *pcbinfo; KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); INP_WLOCK_ASSERT(inp); if (refcount_release(&inp->inp_refcount) == 0) { /* * If the inpcb has been freed, let the caller know, even if * this isn't the last reference. */ if (inp->inp_flags2 & INP_FREED) { INP_WUNLOCK(inp); return (1); } return (0); } KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); INP_WUNLOCK(inp); pcbinfo = inp->inp_pcbinfo; uma_zfree(pcbinfo->ipi_zone, inp); return (1); } /* * Temporary wrapper. */ int in_pcbrele(struct inpcb *inp) { return (in_pcbrele_wlocked(inp)); } /* * Unconditionally schedule an inpcb to be freed by decrementing its * reference count, which should occur only after the inpcb has been detached * from its socket. If another thread holds a temporary reference (acquired * using in_pcbref()) then the free is deferred until that reference is * released using in_pcbrele(), but the inpcb is still unlocked. Almost all * work, including removal from global lists, is done in this context, where * the pcbinfo lock is held. */ void in_pcbfree(struct inpcb *inp) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); #ifdef INVARIANTS if (pcbinfo == &V_tcbinfo) { INP_INFO_LOCK_ASSERT(pcbinfo); } else { INP_INFO_WLOCK_ASSERT(pcbinfo); } #endif INP_WLOCK_ASSERT(inp); /* XXXRW: Do as much as possible here. */ -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) if (inp->inp_sp != NULL) ipsec_delete_pcbpolicy(inp); #endif INP_LIST_WLOCK(pcbinfo); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; in_pcbremlists(inp); INP_LIST_WUNLOCK(pcbinfo); #ifdef INET6 if (inp->inp_vflag & INP_IPV6PROTO) { ip6_freepcbopts(inp->in6p_outputopts); if (inp->in6p_moptions != NULL) ip6_freemoptions(inp->in6p_moptions); } #endif if (inp->inp_options) (void)m_free(inp->inp_options); #ifdef INET if (inp->inp_moptions != NULL) inp_freemoptions(inp->inp_moptions); #endif RO_RTFREE(&inp->inp_route); if (inp->inp_route.ro_lle) LLE_FREE(inp->inp_route.ro_lle); /* zeros ro_lle */ inp->inp_vflag = 0; inp->inp_flags2 |= INP_FREED; crfree(inp->inp_cred); #ifdef MAC mac_inpcb_destroy(inp); #endif if (!in_pcbrele_wlocked(inp)) INP_WUNLOCK(inp); } /* * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and * port reservation, and preventing it from being returned by inpcb lookups. * * It is used by TCP to mark an inpcb as unused and avoid future packet * delivery or event notification when a socket remains open but TCP has * closed. This might occur as a result of a shutdown()-initiated TCP close * or a RST on the wire, and allows the port binding to be reused while still * maintaining the invariant that so_pcb always points to a valid inpcb until * in_pcbdetach(). * * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by * in_pcbnotifyall() and in_pcbpurgeif0()? */ void in_pcbdrop(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); /* * XXXRW: Possibly we should protect the setting of INP_DROPPED with * the hash lock...? */ inp->inp_flags |= INP_DROPPED; if (inp->inp_flags & INP_INHASHLIST) { struct inpcbport *phd = inp->inp_phd; INP_HASH_WLOCK(inp->inp_pcbinfo); LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (LIST_FIRST(&phd->phd_pcblist) == NULL) { LIST_REMOVE(phd, phd_hash); free(phd, M_PCB); } INP_HASH_WUNLOCK(inp->inp_pcbinfo); inp->inp_flags &= ~INP_INHASHLIST; #ifdef PCBGROUP in_pcbgroup_remove(inp); #endif } } #ifdef INET /* * Common routines to return the socket addresses associated with inpcbs. */ struct sockaddr * in_sockaddr(in_port_t port, struct in_addr *addr_p) { struct sockaddr_in *sin; sin = malloc(sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = *addr_p; sin->sin_port = port; return (struct sockaddr *)sin; } int in_getsockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct in_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_lport; addr = inp->inp_laddr; INP_RUNLOCK(inp); *nam = in_sockaddr(port, &addr); return 0; } int in_getpeeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct in_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_fport; addr = inp->inp_faddr; INP_RUNLOCK(inp); *nam = in_sockaddr(port, &addr); return 0; } void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno, struct inpcb *(*notify)(struct inpcb *, int)) { struct inpcb *inp, *inp_temp; INP_INFO_WLOCK(pcbinfo); LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { INP_WLOCK(inp); #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) { INP_WUNLOCK(inp); continue; } #endif if (inp->inp_faddr.s_addr != faddr.s_addr || inp->inp_socket == NULL) { INP_WUNLOCK(inp); continue; } if ((*notify)(inp, errno)) INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(pcbinfo); } void in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) { struct inpcb *inp; struct ip_moptions *imo; int i, gap; INP_INFO_WLOCK(pcbinfo); LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { INP_WLOCK(inp); imo = inp->inp_moptions; if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { /* * Unselect the outgoing interface if it is being * detached. */ if (imo->imo_multicast_ifp == ifp) imo->imo_multicast_ifp = NULL; /* * Drop multicast group membership if we joined * through the interface being detached. */ for (i = 0, gap = 0; i < imo->imo_num_memberships; i++) { if (imo->imo_membership[i]->inm_ifp == ifp) { in_delmulti(imo->imo_membership[i]); gap++; } else if (gap != 0) imo->imo_membership[i - gap] = imo->imo_membership[i]; } imo->imo_num_memberships -= gap; } INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(pcbinfo); } /* * Lookup a PCB based on the local address and port. Caller must hold the * hash lock. No inpcb locks or references are acquired. */ #define INP_LOOKUP_MAPPED_PCB_COST 3 struct inpcb * in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short lport, int lookupflags, struct ucred *cred) { struct inpcb *inp; #ifdef INET6 int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST; #else int matchwild = 3; #endif int wildcard; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_LOCK_ASSERT(pcbinfo); if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == INADDR_ANY && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_lport == lport) { /* * Found? */ if (cred == NULL || prison_equal_ip4(cred->cr_prison, inp->inp_cred->cr_prison)) return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, pcbinfo->ipi_porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if (cred != NULL && !prison_equal_ip4(inp->inp_cred->cr_prison, cred->cr_prison)) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; /* * We never select the PCB that has * INP_IPV6 flag and is bound to :: if * we have another PCB which is bound * to 0.0.0.0. If a PCB has the * INP_IPV6 flag, then we set its cost * higher than IPv4 only PCBs. * * Note that the case only happens * when a socket is bound to ::, under * the condition that the use of the * mapped address is allowed. */ if ((inp->inp_vflag & INP_IPV6) != 0) wildcard += INP_LOOKUP_MAPPED_PCB_COST; #endif if (inp->inp_faddr.s_addr != INADDR_ANY) wildcard++; if (inp->inp_laddr.s_addr != INADDR_ANY) { if (laddr.s_addr == INADDR_ANY) wildcard++; else if (inp->inp_laddr.s_addr != laddr.s_addr) continue; } else { if (laddr.s_addr != INADDR_ANY) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) break; } } } return (match); } } #undef INP_LOOKUP_MAPPED_PCB_COST #ifdef PCBGROUP /* * Lookup PCB in hash list, using pcbgroup tables. */ static struct inpcb * in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, struct in_addr faddr, u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; /* * First look for an exact match. */ tmpinp = NULL; INP_GROUP_LOCK(pcbgroup); head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbgroup->ipg_hashmask)]; LIST_FOREACH(inp, head, inp_pcbgrouphash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == faddr.s_addr && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_fport == fport && inp->inp_lport == lport) { /* * XXX We should be able to directly return * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ if (prison_flag(inp->inp_cred, PR_IP4)) goto found; if (tmpinp == NULL) tmpinp = inp; } } if (tmpinp != NULL) { inp = tmpinp; goto found; } #ifdef RSS /* * For incoming connections, we may wish to do a wildcard * match for an RSS-local socket. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; #ifdef INET6 struct inpcb *local_wild_mapped = NULL; #endif struct inpcb *jail_wild = NULL; struct inpcbhead *head; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbgroup->ipg_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)]; LIST_FOREACH(inp, head, inp_pcbgrouphash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr != INADDR_ANY || inp->inp_lport != lport) continue; injail = prison_flag(inp->inp_cred, PR_IP4); if (injail) { if (prison_check_ip4(inp->inp_cred, &laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (inp->inp_laddr.s_addr == laddr.s_addr) { if (injail) goto found; else local_exact = inp; } else if (inp->inp_laddr.s_addr == INADDR_ANY) { #ifdef INET6 /* XXX inp locking, NULL check */ if (inp->inp_vflag & INP_IPV6PROTO) local_wild_mapped = inp; else #endif if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ inp = jail_wild; if (inp == NULL) inp = local_exact; if (inp == NULL) inp = local_wild; #ifdef INET6 if (inp == NULL) inp = local_wild_mapped; #endif if (inp != NULL) goto found; } #endif /* * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; #ifdef INET6 struct inpcb *local_wild_mapped = NULL; #endif struct inpcb *jail_wild = NULL; struct inpcbhead *head; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->ipi_wildmask)]; LIST_FOREACH(inp, head, inp_pcbgroup_wild) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr != INADDR_ANY || inp->inp_lport != lport) continue; injail = prison_flag(inp->inp_cred, PR_IP4); if (injail) { if (prison_check_ip4(inp->inp_cred, &laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (inp->inp_laddr.s_addr == laddr.s_addr) { if (injail) goto found; else local_exact = inp; } else if (inp->inp_laddr.s_addr == INADDR_ANY) { #ifdef INET6 /* XXX inp locking, NULL check */ if (inp->inp_vflag & INP_IPV6PROTO) local_wild_mapped = inp; else #endif if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ inp = jail_wild; if (inp == NULL) inp = local_exact; if (inp == NULL) inp = local_wild; #ifdef INET6 if (inp == NULL) inp = local_wild_mapped; #endif if (inp != NULL) goto found; } /* if (lookupflags & INPLOOKUP_WILDCARD) */ INP_GROUP_UNLOCK(pcbgroup); return (NULL); found: in_pcbref(inp); INP_GROUP_UNLOCK(pcbgroup); if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (NULL); } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); if (in_pcbrele_rlocked(inp)) return (NULL); } else panic("%s: locking bug", __func__); return (inp); } #endif /* PCBGROUP */ /* * Lookup PCB in hash list, using pcbinfo tables. This variation assumes * that the caller has locked the hash list, and will not perform any further * locking or reference operations on either the hash list or the connection. */ static struct inpcb * in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_LOCK_ASSERT(pcbinfo); /* * First look for an exact match. */ tmpinp = NULL; head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == faddr.s_addr && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_fport == fport && inp->inp_lport == lport) { /* * XXX We should be able to directly return * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ if (prison_flag(inp->inp_cred, PR_IP4)) return (inp); if (tmpinp == NULL) tmpinp = inp; } } if (tmpinp != NULL) return (tmpinp); /* * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; #ifdef INET6 struct inpcb *local_wild_mapped = NULL; #endif struct inpcb *jail_wild = NULL; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr != INADDR_ANY || inp->inp_lport != lport) continue; injail = prison_flag(inp->inp_cred, PR_IP4); if (injail) { if (prison_check_ip4(inp->inp_cred, &laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (inp->inp_laddr.s_addr == laddr.s_addr) { if (injail) return (inp); else local_exact = inp; } else if (inp->inp_laddr.s_addr == INADDR_ANY) { #ifdef INET6 /* XXX inp locking, NULL check */ if (inp->inp_vflag & INP_IPV6PROTO) local_wild_mapped = inp; else #endif if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ if (jail_wild != NULL) return (jail_wild); if (local_exact != NULL) return (local_exact); if (local_wild != NULL) return (local_wild); #ifdef INET6 if (local_wild_mapped != NULL) return (local_wild_mapped); #endif } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ return (NULL); } /* * Lookup PCB in hash list, using pcbinfo tables. This variation locks the * hash list lock, and will return the inpcb locked (i.e., requires * INPLOOKUP_LOCKPCB). */ static struct inpcb * in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport, struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp) { struct inpcb *inp; INP_HASH_RLOCK(pcbinfo); inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); if (inp != NULL) { in_pcbref(inp); INP_HASH_RUNLOCK(pcbinfo); if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (NULL); } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); if (in_pcbrele_rlocked(inp)) return (NULL); } else panic("%s: locking bug", __func__); } else INP_HASH_RUNLOCK(pcbinfo); return (inp); } /* * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf * from which a pre-calculated hash value may be extracted. * * Possibly more of this logic should be in in_pcbgroup.c. */ struct inpcb * in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport, struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp) { #if defined(PCBGROUP) && !defined(RSS) struct inpcbgroup *pcbgroup; #endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); /* * When not using RSS, use connection groups in preference to the * reservation table when looking up 4-tuples. When using RSS, just * use the reservation table, due to the cost of the Toeplitz hash * in software. * * XXXRW: This policy belongs in the pcbgroup code, as in principle * we could be doing RSS with a non-Toeplitz hash that is affordable * in software. */ #if defined(PCBGROUP) && !defined(RSS) if (in_pcbgroup_enabled(pcbinfo)) { pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); } #endif return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } struct inpcb * in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport, struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp, struct mbuf *m) { #ifdef PCBGROUP struct inpcbgroup *pcbgroup; #endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); #ifdef PCBGROUP /* * If we can use a hardware-generated hash to look up the connection * group, use that connection group to find the inpcb. Otherwise * fall back on a software hash -- or the reservation table if we're * using RSS. * * XXXRW: As above, that policy belongs in the pcbgroup code. */ if (in_pcbgroup_enabled(pcbinfo) && !(M_HASHTYPE_TEST(m, M_HASHTYPE_NONE))) { pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), m->m_pkthdr.flowid); if (pcbgroup != NULL) return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); #ifndef RSS pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); #endif } #endif return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } #endif /* INET */ /* * Insert PCB onto various hash lists. */ static int in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update) { struct inpcbhead *pcbhash; struct inpcbporthead *pcbporthash; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbport *phd; u_int32_t hashkey_faddr; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); KASSERT((inp->inp_flags & INP_INHASHLIST) == 0, ("in_pcbinshash: INP_INHASHLIST")); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr); else #endif hashkey_faddr = inp->inp_faddr.s_addr; pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)]; pcbporthash = &pcbinfo->ipi_porthashbase[ INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)]; /* * Go through port list and look for a head for this lport. */ LIST_FOREACH(phd, pcbporthash, phd_hash) { if (phd->phd_port == inp->inp_lport) break; } /* * If none exists, malloc one and tack it on. */ if (phd == NULL) { phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT); if (phd == NULL) { return (ENOBUFS); /* XXX */ } phd->phd_port = inp->inp_lport; LIST_INIT(&phd->phd_pcblist); LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); } inp->inp_phd = phd; LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); inp->inp_flags |= INP_INHASHLIST; #ifdef PCBGROUP if (do_pcbgroup_update) in_pcbgroup_update(inp); #endif return (0); } /* * For now, there are two public interfaces to insert an inpcb into the hash * lists -- one that does update pcbgroups, and one that doesn't. The latter * is used only in the TCP syncache, where in_pcbinshash is called before the * full 4-tuple is set for the inpcb, and we don't want to install in the * pcbgroup until later. * * XXXRW: This seems like a misfeature. in_pcbinshash should always update * connection groups, and partially initialised inpcbs should not be exposed * to either reservation hash tables or pcbgroups. */ int in_pcbinshash(struct inpcb *inp) { return (in_pcbinshash_internal(inp, 1)); } int in_pcbinshash_nopcbgroup(struct inpcb *inp) { return (in_pcbinshash_internal(inp, 0)); } /* * Move PCB to the proper hash bucket when { faddr, fport } have been * changed. NOTE: This does not handle the case of the lport changing (the * hashed port list would have to be updated as well), so the lport must * not change after in_pcbinshash() has been called. */ void in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbhead *head; u_int32_t hashkey_faddr; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); KASSERT(inp->inp_flags & INP_INHASHLIST, ("in_pcbrehash: !INP_INHASHLIST")); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr); else #endif hashkey_faddr = inp->inp_faddr.s_addr; head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)]; LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); #ifdef PCBGROUP if (m != NULL) in_pcbgroup_update_mbuf(inp, m); else in_pcbgroup_update(inp); #endif } void in_pcbrehash(struct inpcb *inp) { in_pcbrehash_mbuf(inp, NULL); } /* * Remove PCB from various lists. */ static void in_pcbremlists(struct inpcb *inp) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; #ifdef INVARIANTS if (pcbinfo == &V_tcbinfo) { INP_INFO_RLOCK_ASSERT(pcbinfo); } else { INP_INFO_WLOCK_ASSERT(pcbinfo); } #endif INP_WLOCK_ASSERT(inp); INP_LIST_WLOCK_ASSERT(pcbinfo); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; if (inp->inp_flags & INP_INHASHLIST) { struct inpcbport *phd = inp->inp_phd; INP_HASH_WLOCK(pcbinfo); LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (LIST_FIRST(&phd->phd_pcblist) == NULL) { LIST_REMOVE(phd, phd_hash); free(phd, M_PCB); } INP_HASH_WUNLOCK(pcbinfo); inp->inp_flags &= ~INP_INHASHLIST; } LIST_REMOVE(inp, inp_list); pcbinfo->ipi_count--; #ifdef PCBGROUP in_pcbgroup_remove(inp); #endif } /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached * routing information. If the route was created dynamically * (by a redirect), time to try a default gateway again. */ void in_losing(struct inpcb *inp) { RO_RTFREE(&inp->inp_route); if (inp->inp_route.ro_lle) LLE_FREE(inp->inp_route.ro_lle); /* zeros ro_lle */ return; } /* * A set label operation has occurred at the socket layer, propagate the * label change into the in_pcb for the socket. */ void in_pcbsosetlabel(struct socket *so) { #ifdef MAC struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL")); INP_WLOCK(inp); SOCK_LOCK(so); mac_inpcb_sosetlabel(so, inp); SOCK_UNLOCK(so); INP_WUNLOCK(inp); #endif } /* * ipport_tick runs once per second, determining if random port allocation * should be continued. If more than ipport_randomcps ports have been * allocated in the last second, then we return to sequential port * allocation. We return to random allocation only once we drop below * ipport_randomcps for at least ipport_randomtime seconds. */ static void ipport_tick(void *xtp) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */ if (V_ipport_tcpallocs <= V_ipport_tcplastcount + V_ipport_randomcps) { if (V_ipport_stoprandom > 0) V_ipport_stoprandom--; } else V_ipport_stoprandom = V_ipport_randomtime; V_ipport_tcplastcount = V_ipport_tcpallocs; CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL); } static void ip_fini(void *xtp) { callout_stop(&ipport_tick_callout); } /* * The ipport_callout should start running at about the time we attach the * inet or inet6 domains. */ static void ipport_tick_init(const void *unused __unused) { /* Start ipport_tick. */ callout_init(&ipport_tick_callout, 1); callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL); EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL, SHUTDOWN_PRI_DEFAULT); } SYSINIT(ipport_tick_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ipport_tick_init, NULL); void inp_wlock(struct inpcb *inp) { INP_WLOCK(inp); } void inp_wunlock(struct inpcb *inp) { INP_WUNLOCK(inp); } void inp_rlock(struct inpcb *inp) { INP_RLOCK(inp); } void inp_runlock(struct inpcb *inp) { INP_RUNLOCK(inp); } #ifdef INVARIANTS void inp_lock_assert(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); } void inp_unlock_assert(struct inpcb *inp) { INP_UNLOCK_ASSERT(inp); } #endif void inp_apply_all(void (*func)(struct inpcb *, void *), void *arg) { struct inpcb *inp; INP_INFO_WLOCK(&V_tcbinfo); LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) { INP_WLOCK(inp); func(inp, arg); INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(&V_tcbinfo); } struct socket * inp_inpcbtosocket(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); return (inp->inp_socket); } struct tcpcb * inp_inpcbtotcpcb(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); return ((struct tcpcb *)inp->inp_ppcb); } int inp_ip_tos_get(const struct inpcb *inp) { return (inp->inp_ip_tos); } void inp_ip_tos_set(struct inpcb *inp, int val) { inp->inp_ip_tos = val; } void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, uint32_t *faddr, uint16_t *fp) { INP_LOCK_ASSERT(inp); *laddr = inp->inp_laddr.s_addr; *faddr = inp->inp_faddr.s_addr; *lp = inp->inp_lport; *fp = inp->inp_fport; } struct inpcb * so_sotoinpcb(struct socket *so) { return (sotoinpcb(so)); } struct tcpcb * so_sototcpcb(struct socket *so) { return (sototcpcb(so)); } #ifdef DDB static void db_print_indent(int indent) { int i; for (i = 0; i < indent; i++) db_printf(" "); } static void db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent) { char faddr_str[48], laddr_str[48]; db_print_indent(indent); db_printf("%s at %p\n", name, inc); indent += 2; #ifdef INET6 if (inc->inc_flags & INC_ISIPV6) { /* IPv6. */ ip6_sprintf(laddr_str, &inc->inc6_laddr); ip6_sprintf(faddr_str, &inc->inc6_faddr); } else #endif { /* IPv4. */ inet_ntoa_r(inc->inc_laddr, laddr_str); inet_ntoa_r(inc->inc_faddr, faddr_str); } db_print_indent(indent); db_printf("inc_laddr %s inc_lport %u\n", laddr_str, ntohs(inc->inc_lport)); db_print_indent(indent); db_printf("inc_faddr %s inc_fport %u\n", faddr_str, ntohs(inc->inc_fport)); } static void db_print_inpflags(int inp_flags) { int comma; comma = 0; if (inp_flags & INP_RECVOPTS) { db_printf("%sINP_RECVOPTS", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_RECVRETOPTS) { db_printf("%sINP_RECVRETOPTS", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_RECVDSTADDR) { db_printf("%sINP_RECVDSTADDR", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_HDRINCL) { db_printf("%sINP_HDRINCL", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_HIGHPORT) { db_printf("%sINP_HIGHPORT", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_LOWPORT) { db_printf("%sINP_LOWPORT", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_ANONPORT) { db_printf("%sINP_ANONPORT", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_RECVIF) { db_printf("%sINP_RECVIF", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_MTUDISC) { db_printf("%sINP_MTUDISC", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_RECVTTL) { db_printf("%sINP_RECVTTL", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_DONTFRAG) { db_printf("%sINP_DONTFRAG", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_RECVTOS) { db_printf("%sINP_RECVTOS", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_IPV6_V6ONLY) { db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_PKTINFO) { db_printf("%sIN6P_PKTINFO", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_HOPLIMIT) { db_printf("%sIN6P_HOPLIMIT", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_HOPOPTS) { db_printf("%sIN6P_HOPOPTS", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_DSTOPTS) { db_printf("%sIN6P_DSTOPTS", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_RTHDR) { db_printf("%sIN6P_RTHDR", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_RTHDRDSTOPTS) { db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_TCLASS) { db_printf("%sIN6P_TCLASS", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_AUTOFLOWLABEL) { db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_TIMEWAIT) { db_printf("%sINP_TIMEWAIT", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_ONESBCAST) { db_printf("%sINP_ONESBCAST", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_DROPPED) { db_printf("%sINP_DROPPED", comma ? ", " : ""); comma = 1; } if (inp_flags & INP_SOCKREF) { db_printf("%sINP_SOCKREF", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_RFC2292) { db_printf("%sIN6P_RFC2292", comma ? ", " : ""); comma = 1; } if (inp_flags & IN6P_MTU) { db_printf("IN6P_MTU%s", comma ? ", " : ""); comma = 1; } } static void db_print_inpvflag(u_char inp_vflag) { int comma; comma = 0; if (inp_vflag & INP_IPV4) { db_printf("%sINP_IPV4", comma ? ", " : ""); comma = 1; } if (inp_vflag & INP_IPV6) { db_printf("%sINP_IPV6", comma ? ", " : ""); comma = 1; } if (inp_vflag & INP_IPV6PROTO) { db_printf("%sINP_IPV6PROTO", comma ? ", " : ""); comma = 1; } } static void db_print_inpcb(struct inpcb *inp, const char *name, int indent) { db_print_indent(indent); db_printf("%s at %p\n", name, inp); indent += 2; db_print_indent(indent); db_printf("inp_flow: 0x%x\n", inp->inp_flow); db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent); db_print_indent(indent); db_printf("inp_ppcb: %p inp_pcbinfo: %p inp_socket: %p\n", inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket); db_print_indent(indent); db_printf("inp_label: %p inp_flags: 0x%x (", inp->inp_label, inp->inp_flags); db_print_inpflags(inp->inp_flags); db_printf(")\n"); db_print_indent(indent); db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp, inp->inp_vflag); db_print_inpvflag(inp->inp_vflag); db_printf(")\n"); db_print_indent(indent); db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n", inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl); db_print_indent(indent); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { db_printf("in6p_options: %p in6p_outputopts: %p " "in6p_moptions: %p\n", inp->in6p_options, inp->in6p_outputopts, inp->in6p_moptions); db_printf("in6p_icmp6filt: %p in6p_cksum %d " "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum, inp->in6p_hops); } else #endif { db_printf("inp_ip_tos: %d inp_ip_options: %p " "inp_ip_moptions: %p\n", inp->inp_ip_tos, inp->inp_options, inp->inp_moptions); } db_print_indent(indent); db_printf("inp_phd: %p inp_gencnt: %ju\n", inp->inp_phd, (uintmax_t)inp->inp_gencnt); } DB_SHOW_COMMAND(inpcb, db_show_inpcb) { struct inpcb *inp; if (!have_addr) { db_printf("usage: show inpcb \n"); return; } inp = (struct inpcb *)addr; db_print_inpcb(inp, "inpcb", 0); } #endif /* DDB */ #ifdef RATELIMIT /* * Modify TX rate limit based on the existing "inp->inp_snd_tag", * if any. */ int in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate) { union if_snd_tag_modify_params params = { .rate_limit.max_rate = max_pacing_rate, }; struct m_snd_tag *mst; struct ifnet *ifp; int error; mst = inp->inp_snd_tag; if (mst == NULL) return (EINVAL); ifp = mst->ifp; if (ifp == NULL) return (EINVAL); if (ifp->if_snd_tag_modify == NULL) { error = EOPNOTSUPP; } else { error = ifp->if_snd_tag_modify(mst, ¶ms); } return (error); } /* * Query existing TX rate limit based on the existing * "inp->inp_snd_tag", if any. */ int in_pcbquery_txrtlmt(struct inpcb *inp, uint32_t *p_max_pacing_rate) { union if_snd_tag_query_params params = { }; struct m_snd_tag *mst; struct ifnet *ifp; int error; mst = inp->inp_snd_tag; if (mst == NULL) return (EINVAL); ifp = mst->ifp; if (ifp == NULL) return (EINVAL); if (ifp->if_snd_tag_query == NULL) { error = EOPNOTSUPP; } else { error = ifp->if_snd_tag_query(mst, ¶ms); if (error == 0 && p_max_pacing_rate != NULL) *p_max_pacing_rate = params.rate_limit.max_rate; } return (error); } /* * Allocate a new TX rate limit send tag from the network interface * given by the "ifp" argument and save it in "inp->inp_snd_tag": */ int in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp, uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate) { union if_snd_tag_alloc_params params = { .rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT, .rate_limit.hdr.flowid = flowid, .rate_limit.hdr.flowtype = flowtype, .rate_limit.max_rate = max_pacing_rate, }; int error; INP_WLOCK_ASSERT(inp); if (inp->inp_snd_tag != NULL) return (EINVAL); if (ifp->if_snd_tag_alloc == NULL) { error = EOPNOTSUPP; } else { error = ifp->if_snd_tag_alloc(ifp, ¶ms, &inp->inp_snd_tag); /* * At success increment the refcount on * the send tag's network interface: */ if (error == 0) if_ref(inp->inp_snd_tag->ifp); } return (error); } /* * Free an existing TX rate limit tag based on the "inp->inp_snd_tag", * if any: */ void in_pcbdetach_txrtlmt(struct inpcb *inp) { struct m_snd_tag *mst; struct ifnet *ifp; INP_WLOCK_ASSERT(inp); mst = inp->inp_snd_tag; inp->inp_snd_tag = NULL; if (mst == NULL) return; ifp = mst->ifp; if (ifp == NULL) return; /* * If the device was detached while we still had reference(s) * on the ifp, we assume if_snd_tag_free() was replaced with * stubs. */ ifp->if_snd_tag_free(mst); /* release reference count on network interface */ if_rele(ifp); } /* * This function should be called when the INP_RATE_LIMIT_CHANGED flag * is set in the fast path and will attach/detach/modify the TX rate * limit send tag based on the socket's so_max_pacing_rate value. */ void in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb) { struct socket *socket; uint32_t max_pacing_rate; bool did_upgrade; int error; if (inp == NULL) return; socket = inp->inp_socket; if (socket == NULL) return; if (!INP_WLOCKED(inp)) { /* * NOTE: If the write locking fails, we need to bail * out and use the non-ratelimited ring for the * transmit until there is a new chance to get the * write lock. */ if (!INP_TRY_UPGRADE(inp)) return; did_upgrade = 1; } else { did_upgrade = 0; } /* * NOTE: The so_max_pacing_rate value is read unlocked, * because atomic updates are not required since the variable * is checked at every mbuf we send. It is assumed that the * variable read itself will be atomic. */ max_pacing_rate = socket->so_max_pacing_rate; /* * NOTE: When attaching to a network interface a reference is * made to ensure the network interface doesn't go away until * all ratelimit connections are gone. The network interface * pointers compared below represent valid network interfaces, * except when comparing towards NULL. */ if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) { error = 0; } else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) { if (inp->inp_snd_tag != NULL) in_pcbdetach_txrtlmt(inp); error = 0; } else if (inp->inp_snd_tag == NULL) { /* * In order to utilize packet pacing with RSS, we need * to wait until there is a valid RSS hash before we * can proceed: */ if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) { error = EAGAIN; } else { error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb), mb->m_pkthdr.flowid, max_pacing_rate); } } else { error = in_pcbmodify_txrtlmt(inp, max_pacing_rate); } if (error == 0 || error == EOPNOTSUPP) inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; if (did_upgrade) INP_DOWNGRADE(inp); } /* * Track route changes for TX rate limiting. */ void in_pcboutput_eagain(struct inpcb *inp) { struct socket *socket; bool did_upgrade; if (inp == NULL) return; socket = inp->inp_socket; if (socket == NULL) return; if (inp->inp_snd_tag == NULL) return; if (!INP_WLOCKED(inp)) { /* * NOTE: If the write locking fails, we need to bail * out and use the non-ratelimited ring for the * transmit until there is a new chance to get the * write lock. */ if (!INP_TRY_UPGRADE(inp)) return; did_upgrade = 1; } else { did_upgrade = 0; } /* detach rate limiting */ in_pcbdetach_txrtlmt(inp); /* make sure new mbuf send tag allocation is made */ inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; if (did_upgrade) INP_DOWNGRADE(inp); } #endif /* RATELIMIT */ diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 7403ec08ba5b..932dc567236d 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -1,379 +1,347 @@ /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_proto.c 8.2 (Berkeley) 2/9/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_mrouting.h" #include "opt_ipsec.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_sctp.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include /* * While this file provides the domain and protocol switch tables for IPv4, it * also provides the sysctl node declarations for net.inet.* often shared with * IPv6 for common features or by upper layer protocols. In case of no IPv4 * support compile out everything but these sysctl nodes. */ #ifdef INET #include #include #include #ifdef RADIX_MPATH #include #endif #include #endif /* INET */ #if defined(INET) || defined(INET6) #include #endif #ifdef INET #include #include #include #include #include #include #include #include #include #include #include #include /* * TCP/IP protocol family: IP, ICMP, UDP, TCP. */ static struct pr_usrreqs nousrreqs; -#ifdef IPSEC -#include -#endif /* IPSEC */ - #ifdef SCTP #include #include #include #include #endif /* SCTP */ FEATURE(inet, "Internet Protocol version 4"); extern struct domain inetdomain; /* Spacer for loadable protocols. */ #define IPPROTOSPACER \ { \ .pr_domain = &inetdomain, \ .pr_protocol = PROTO_SPACER, \ .pr_usrreqs = &nousrreqs \ } struct protosw inetsw[] = { { .pr_type = 0, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_IP, .pr_init = ip_init, .pr_slowtimo = ip_slowtimo, .pr_drain = ip_drain, .pr_usrreqs = &nousrreqs }, { .pr_type = SOCK_DGRAM, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_UDP, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = udp_input, .pr_ctlinput = udp_ctlinput, .pr_ctloutput = udp_ctloutput, .pr_init = udp_init, .pr_usrreqs = &udp_usrreqs }, { .pr_type = SOCK_STREAM, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_TCP, .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD, .pr_input = tcp_input, .pr_ctlinput = tcp_ctlinput, .pr_ctloutput = tcp_ctloutput, .pr_init = tcp_init, .pr_slowtimo = tcp_slowtimo, .pr_drain = tcp_drain, .pr_usrreqs = &tcp_usrreqs }, #ifdef SCTP { .pr_type = SOCK_SEQPACKET, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_WANTRCVD, .pr_input = sctp_input, .pr_ctlinput = sctp_ctlinput, .pr_ctloutput = sctp_ctloutput, .pr_init = sctp_init, .pr_drain = sctp_drain, .pr_usrreqs = &sctp_usrreqs }, { .pr_type = SOCK_STREAM, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_input = sctp_input, .pr_ctlinput = sctp_ctlinput, .pr_ctloutput = sctp_ctloutput, .pr_drain = sctp_drain, .pr_usrreqs = &sctp_usrreqs }, #endif /* SCTP */ { .pr_type = SOCK_DGRAM, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_UDPLITE, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = udp_input, .pr_ctlinput = udplite_ctlinput, .pr_ctloutput = udp_ctloutput, .pr_init = udplite_init, .pr_usrreqs = &udp_usrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_RAW, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = rip_input, .pr_ctlinput = rip_ctlinput, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_ICMP, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = icmp_input, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_IGMP, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = igmp_input, .pr_ctloutput = rip_ctloutput, .pr_fasttimo = igmp_fasttimo, .pr_slowtimo = igmp_slowtimo, .pr_usrreqs = &rip_usrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_RSVP, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = rsvp_input, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }, -#ifdef IPSEC -{ - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_AH, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ah4_input, - .pr_ctlinput = ah4_ctlinput, - .pr_usrreqs = &nousrreqs -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_ESP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = esp4_input, - .pr_ctlinput = esp4_ctlinput, - .pr_usrreqs = &nousrreqs -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_IPCOMP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipcomp4_input, - .pr_usrreqs = &nousrreqs -}, -#endif /* IPSEC */ { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_IPV4, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap4_input, .pr_ctloutput = rip_ctloutput, .pr_init = encap_init, .pr_usrreqs = &rip_usrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_MOBILE, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap4_input, .pr_ctloutput = rip_ctloutput, .pr_init = encap_init, .pr_usrreqs = &rip_usrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_ETHERIP, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap4_input, .pr_ctloutput = rip_ctloutput, .pr_init = encap_init, .pr_usrreqs = &rip_usrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_GRE, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap4_input, .pr_ctloutput = rip_ctloutput, .pr_init = encap_init, .pr_usrreqs = &rip_usrreqs }, # ifdef INET6 { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_IPV6, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap4_input, .pr_ctloutput = rip_ctloutput, .pr_init = encap_init, .pr_usrreqs = &rip_usrreqs }, #endif { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_PIM, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap4_input, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }, /* Spacer n-times for loadable protocols. */ IPPROTOSPACER, IPPROTOSPACER, IPPROTOSPACER, IPPROTOSPACER, IPPROTOSPACER, IPPROTOSPACER, IPPROTOSPACER, IPPROTOSPACER, /* raw wildcard */ { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = rip_input, .pr_ctloutput = rip_ctloutput, .pr_init = rip_init, .pr_usrreqs = &rip_usrreqs }, }; extern int in_inithead(void **, int); extern int in_detachhead(void **, int); struct domain inetdomain = { .dom_family = AF_INET, .dom_name = "internet", .dom_protosw = inetsw, .dom_protoswNPROTOSW = &inetsw[nitems(inetsw)], #ifdef RADIX_MPATH .dom_rtattach = rn4_mpath_inithead, #else .dom_rtattach = in_inithead, #endif #ifdef VIMAGE .dom_rtdetach = in_detachhead, #endif .dom_ifattach = in_domifattach, .dom_ifdetach = in_domifdetach }; VNET_DOMAIN_SET(inet); #endif /* INET */ SYSCTL_NODE(_net, PF_INET, inet, CTLFLAG_RW, 0, "Internet Family"); SYSCTL_NODE(_net_inet, IPPROTO_IP, ip, CTLFLAG_RW, 0, "IP"); SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, CTLFLAG_RW, 0, "ICMP"); SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, CTLFLAG_RW, 0, "UDP"); SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW, 0, "TCP"); #ifdef SCTP SYSCTL_NODE(_net_inet, IPPROTO_SCTP, sctp, CTLFLAG_RW, 0, "SCTP"); #endif SYSCTL_NODE(_net_inet, IPPROTO_IGMP, igmp, CTLFLAG_RW, 0, "IGMP"); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* XXX no protocol # to use, pick something "reserved" */ SYSCTL_NODE(_net_inet, 253, ipsec, CTLFLAG_RW, 0, "IPSEC"); SYSCTL_NODE(_net_inet, IPPROTO_AH, ah, CTLFLAG_RW, 0, "AH"); SYSCTL_NODE(_net_inet, IPPROTO_ESP, esp, CTLFLAG_RW, 0, "ESP"); SYSCTL_NODE(_net_inet, IPPROTO_IPCOMP, ipcomp, CTLFLAG_RW, 0, "IPCOMP"); SYSCTL_NODE(_net_inet, IPPROTO_IPIP, ipip, CTLFLAG_RW, 0, "IPIP"); #endif /* IPSEC */ SYSCTL_NODE(_net_inet, IPPROTO_RAW, raw, CTLFLAG_RW, 0, "RAW"); SYSCTL_NODE(_net_inet, OID_AUTO, accf, CTLFLAG_RW, 0, "Accept filters"); diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index b5ae9c91ee00..4a3fa70bef56 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1,1396 +1,1375 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_bootp.h" #include "opt_ipstealth.h" #include "opt_ipsec.h" #include "opt_route.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef IPSEC -#include -#include -#include -#endif /* IPSEC */ #include +#include + #include #include #ifdef CTASSERT CTASSERT(sizeof(struct ip) == 20); #endif /* IP reassembly functions are defined in ip_reass.c. */ extern void ipreass_init(void); extern void ipreass_drain(void); extern void ipreass_slowtimo(void); #ifdef VIMAGE extern void ipreass_destroy(void); #endif struct rmlock in_ifaddr_lock; RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); VNET_DEFINE(int, rsvp_on); VNET_DEFINE(int, ipforwarding); SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipforwarding), 0, "Enable IP forwarding between interfaces"); static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ #define V_ipsendredirects VNET(ipsendredirects) SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsendredirects), 0, "Enable sending IP redirects"); /* * XXX - Setting ip_checkinterface mostly implements the receive side of * the Strong ES model described in RFC 1122, but since the routing table * and transmit implementation do not implement the Strong ES model, * setting this to 1 results in an odd hybrid. * * XXX - ip_checkinterface currently must be disabled if you use ipnat * to translate the destination address to another local interface. * * XXX - ip_checkinterface must be disabled if you add IP aliases * to the loopback interface instead of the interface where the * packets for those addresses are received. */ static VNET_DEFINE(int, ip_checkinterface); #define V_ip_checkinterface VNET(ip_checkinterface) SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_checkinterface), 0, "Verify packet arrives on correct interface"); VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ static struct netisr_handler ip_nh = { .nh_name = "ip", .nh_handler = ip_input, .nh_proto = NETISR_IP, #ifdef RSS .nh_m2cpuid = rss_soft_m2cpuid_v4, .nh_policy = NETISR_POLICY_CPU, .nh_dispatch = NETISR_DISPATCH_HYBRID, #else .nh_policy = NETISR_POLICY_FLOW, #endif }; #ifdef RSS /* * Directly dispatched frames are currently assumed * to have a flowid already calculated. * * It should likely have something that assert it * actually has valid flow details. */ static struct netisr_handler ip_direct_nh = { .nh_name = "ip_direct", .nh_handler = ip_direct_input, .nh_proto = NETISR_IP_DIRECT, .nh_m2cpuid = rss_soft_m2cpuid_v4, .nh_policy = NETISR_POLICY_CPU, .nh_dispatch = NETISR_DISPATCH_HYBRID, }; #endif extern struct domain inetdomain; extern struct protosw inetsw[]; u_char ip_protox[IPPROTO_MAX]; VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ #ifdef IPCTL_DEFMTU SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, &ip_mtu, 0, "Default MTU"); #endif #ifdef IPSTEALTH VNET_DEFINE(int, ipstealth); SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipstealth), 0, "IP stealth mode, no TTL decrementation on forwarding"); #endif /* * IP statistics are stored in the "array" of counter(9)s. */ VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); VNET_PCPUSTAT_SYSINIT(ipstat); SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)"); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(ipstat); #endif /* VIMAGE */ /* * Kernel module interface for updating ipstat. The argument is an index * into ipstat treated as an array. */ void kmod_ipstat_inc(int statnum) { counter_u64_add(VNET(ipstat)[statnum], 1); } void kmod_ipstat_dec(int statnum) { counter_u64_add(VNET(ipstat)[statnum], -1); } static int sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&ip_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&ip_nh, qlimit)); } SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", "Maximum size of the IP input queue"); static int sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) { u_int64_t qdrops_long; int error, qdrops; netisr_getqdrops(&ip_nh, &qdrops_long); qdrops = qdrops_long; error = sysctl_handle_int(oidp, &qdrops, 0, req); if (error || !req->newptr) return (error); if (qdrops != 0) return (EINVAL); netisr_clearqdrops(&ip_nh); return (0); } SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", "Number of packets dropped from the IP input queue"); #ifdef RSS static int sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&ip_direct_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&ip_direct_nh, qlimit)); } SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I", "Maximum size of the IP direct input queue"); static int sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) { u_int64_t qdrops_long; int error, qdrops; netisr_getqdrops(&ip_direct_nh, &qdrops_long); qdrops = qdrops_long; error = sysctl_handle_int(oidp, &qdrops, 0, req); if (error || !req->newptr) return (error); if (qdrops != 0) return (EINVAL); netisr_clearqdrops(&ip_direct_nh); return (0); } SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I", "Number of packets dropped from the IP direct input queue"); #endif /* RSS */ /* * IP initialization: fill in IP protocol switch table. * All protocols not implemented in kernel go to raw IP protocol handler. */ void ip_init(void) { struct protosw *pr; int i; TAILQ_INIT(&V_in_ifaddrhead); V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); /* Initialize IP reassembly queue. */ ipreass_init(); /* Initialize packet filter hooks. */ V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; V_inet_pfil_hook.ph_af = AF_INET; if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, &V_ipsec_hhh_in[HHOOK_IPSEC_INET], HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register input helper hook\n", __func__); if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET, &V_ipsec_hhh_out[HHOOK_IPSEC_INET], HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register output helper hook\n", __func__); /* Skip initialization of globals for non-default instances. */ #ifdef VIMAGE if (!IS_DEFAULT_VNET(curvnet)) { netisr_register_vnet(&ip_nh); #ifdef RSS netisr_register_vnet(&ip_direct_nh); #endif return; } #endif pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) panic("ip_init: PF_INET not found"); /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ for (i = 0; i < IPPROTO_MAX; i++) ip_protox[i] = pr - inetsw; /* * Cycle through IP protocols and put them into the appropriate place * in ip_protox[]. */ for (pr = inetdomain.dom_protosw; pr < inetdomain.dom_protoswNPROTOSW; pr++) if (pr->pr_domain->dom_family == PF_INET && pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { /* Be careful to only index valid IP protocols. */ if (pr->pr_protocol < IPPROTO_MAX) ip_protox[pr->pr_protocol] = pr - inetsw; } netisr_register(&ip_nh); #ifdef RSS netisr_register(&ip_direct_nh); #endif } #ifdef VIMAGE static void ip_destroy(void *unused __unused) { struct ifnet *ifp; int error; #ifdef RSS netisr_unregister_vnet(&ip_direct_nh); #endif netisr_unregister_vnet(&ip_nh); if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0) printf("%s: WARNING: unable to unregister pfil hook, " "error %d\n", __func__, error); error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); if (error != 0) { printf("%s: WARNING: unable to deregister input helper hook " "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: " "error %d returned\n", __func__, error); } error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]); if (error != 0) { printf("%s: WARNING: unable to deregister output helper hook " "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: " "error %d returned\n", __func__, error); } /* Remove the IPv4 addresses from all interfaces. */ in_ifscrub_all(); /* Make sure the IPv4 routes are gone as well. */ IFNET_RLOCK(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) rt_flushifroutes_af(ifp, AF_INET); IFNET_RUNLOCK(); /* Destroy IP reassembly queue. */ ipreass_destroy(); /* Cleanup in_ifaddr hash table; should be empty. */ hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); } VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL); #endif #ifdef RSS /* * IP direct input routine. * * This is called when reinjecting completed fragments where * all of the previous checking and book-keeping has been done. */ void ip_direct_input(struct mbuf *m) { struct ip *ip; int hlen; ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) + return; + } +#endif /* IPSEC */ IPSTAT_INC(ips_delivered); (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); return; } #endif /* * Ip input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. */ void ip_input(struct mbuf *m) { struct ip *ip = NULL; struct in_ifaddr *ia = NULL; struct ifaddr *ifa; struct ifnet *ifp; int checkif, hlen = 0; uint16_t sum, ip_len; int dchg = 0; /* dest changed after fw */ struct in_addr odst; /* original dst address */ M_ASSERTPKTHDR(m); if (m->m_flags & M_FASTFWD_OURS) { m->m_flags &= ~M_FASTFWD_OURS; /* Set up some basics that will be used later. */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; ip_len = ntohs(ip->ip_len); goto ours; } IPSTAT_INC(ips_total); if (m->m_pkthdr.len < sizeof(struct ip)) goto tooshort; if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == NULL) { IPSTAT_INC(ips_toosmall); return; } ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION) { IPSTAT_INC(ips_badvers); goto bad; } hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { /* minimum header length */ IPSTAT_INC(ips_badhlen); goto bad; } if (hlen > m->m_len) { if ((m = m_pullup(m, hlen)) == NULL) { IPSTAT_INC(ips_badhlen); return; } ip = mtod(m, struct ip *); } IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); /* 127/8 must not appear on wire - RFC1122 */ ifp = m->m_pkthdr.rcvif; if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { if ((ifp->if_flags & IFF_LOOPBACK) == 0) { IPSTAT_INC(ips_badaddr); goto bad; } } if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); } else { if (hlen == sizeof(struct ip)) { sum = in_cksum_hdr(ip); } else { sum = in_cksum(m, hlen); } } if (sum) { IPSTAT_INC(ips_badsum); goto bad; } #ifdef ALTQ if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) /* packet is dropped by traffic conditioner */ return; #endif ip_len = ntohs(ip->ip_len); if (ip_len < hlen) { IPSTAT_INC(ips_badlen); goto bad; } /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ if (m->m_pkthdr.len < ip_len) { tooshort: IPSTAT_INC(ips_tooshort); goto bad; } if (m->m_pkthdr.len > ip_len) { if (m->m_len == m->m_pkthdr.len) { m->m_len = ip_len; m->m_pkthdr.len = ip_len; } else m_adj(m, ip_len - m->m_pkthdr.len); } /* * Try to forward the packet, but if we fail continue. * ip_tryforward() does inbound and outbound packet firewall * processing. If firewall has decided that destination becomes * our local address, it sets M_FASTFWD_OURS flag. In this * case skip another inbound firewall processing and update * ip pointer. */ if (V_ipforwarding != 0 -#ifdef IPSEC - && !key_havesp(IPSEC_DIR_INBOUND) - && !key_havesp(IPSEC_DIR_OUTBOUND) +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + && (!IPSEC_ENABLED(ipv4) || + IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0) #endif - ) { + ) { if ((m = ip_tryforward(m)) == NULL) return; if (m->m_flags & M_FASTFWD_OURS) { m->m_flags &= ~M_FASTFWD_OURS; ip = mtod(m, struct ip *); goto ours; } } -#ifdef IPSEC + +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Bypass packet filtering for packets previously handled by IPsec. */ - if (ip_ipsec_filtertunnel(m)) - goto passin; + if (IPSEC_ENABLED(ipv4) && + IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0) + goto passin; #endif + /* * Run through list of hooks for input packets. * * NB: Beware of the destination address changing (e.g. * by NAT rewriting). When this happens, tell * ip_forward to do the right thing. */ /* Jump over all PFIL processing if hooks are not active. */ if (!PFIL_HOOKED(&V_inet_pfil_hook)) goto passin; odst = ip->ip_dst; if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0) return; if (m == NULL) /* consumed by filter */ return; ip = mtod(m, struct ip *); dchg = (odst.s_addr != ip->ip_dst.s_addr); ifp = m->m_pkthdr.rcvif; if (m->m_flags & M_FASTFWD_OURS) { m->m_flags &= ~M_FASTFWD_OURS; goto ours; } if (m->m_flags & M_IP_NEXTHOP) { if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { /* * Directly ship the packet on. This allows * forwarding packets originally destined to us * to some other directly connected host. */ ip_forward(m, 1); return; } } passin: /* * Process options and, if not destined for us, * ship it on. ip_dooptions returns 1 when an * error was detected (causing an icmp message * to be sent and the original packet to be freed). */ if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) return; /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no * matter if it is destined to another node, or whether it is * a multicast one, RSVP wants it! and prevents it from being forwarded * anywhere else. Also checks if the rsvp daemon is running before * grabbing the packet. */ if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) goto ours; /* * Check our list of addresses, to see if the packet is for us. * If we don't have any addresses, assume any unicast packet * we receive might be for us (and let the upper layers deal * with it). */ if (TAILQ_EMPTY(&V_in_ifaddrhead) && (m->m_flags & (M_MCAST|M_BCAST)) == 0) goto ours; /* * Enable a consistency check between the destination address * and the arrival interface for a unicast packet (the RFC 1122 * strong ES model) if IP forwarding is disabled and the packet * is not locally generated and the packet is not subject to * 'ipfw fwd'. * * XXX - Checking also should be disabled if the destination * address is ipnat'ed to a different interface. * * XXX - Checking is incompatible with IP aliases added * to the loopback interface instead of the interface where * the packets are received. * * XXX - This is the case for carp vhost IPs as well so we * insert a workaround. If the packet got here, we already * checked with carp_iamatch() and carp_forus(). */ checkif = V_ip_checkinterface && (V_ipforwarding == 0) && ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && ifp->if_carp == NULL && (dchg == 0); /* * Check for exact addresses in the hash bucket. */ /* IN_IFADDR_RLOCK(); */ LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { /* * If the address matches, verify that the packet * arrived via the correct interface if checking is * enabled. */ if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && (!checkif || ia->ia_ifp == ifp)) { counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); /* IN_IFADDR_RUNLOCK(); */ goto ours; } } /* IN_IFADDR_RUNLOCK(); */ /* * Check for broadcast addresses. * * Only accept broadcast packets that arrive via the matching * interface. Reception of forwarded directed broadcasts would * be handled via ip_forward() and ether_output() with the loopback * into the stack for SIMPLEX interfaces handled by ether_output(). */ if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == ip->ip_dst.s_addr) { counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); IF_ADDR_RUNLOCK(ifp); goto ours; } #ifdef BOOTP_COMPAT if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); IF_ADDR_RUNLOCK(ifp); goto ours; } #endif } IF_ADDR_RUNLOCK(ifp); ia = NULL; } /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { IPSTAT_INC(ips_cantforward); m_freem(m); return; } if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { if (V_ip_mrouter) { /* * If we are acting as a multicast router, all * incoming multicast packets are passed to the * kernel-level multicast forwarding function. * The packet is returned (relatively) intact; if * ip_mforward() returns a non-zero value, the packet * must be discarded, else it may be accepted below. */ if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { IPSTAT_INC(ips_cantforward); m_freem(m); return; } /* * The process-level routing daemon needs to receive * all multicast IGMP packets, whether or not this * host belongs to their destination groups. */ if (ip->ip_p == IPPROTO_IGMP) goto ours; IPSTAT_INC(ips_forward); } /* * Assume the packet is for us, to avoid prematurely taking * a lock on the in_multi hash. Protocols must perform * their own filtering and update statistics accordingly. */ goto ours; } if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) goto ours; if (ip->ip_dst.s_addr == INADDR_ANY) goto ours; /* * Not for us; forward if possible and desirable. */ if (V_ipforwarding == 0) { IPSTAT_INC(ips_cantforward); m_freem(m); } else { ip_forward(m, dchg); } return; ours: #ifdef IPSTEALTH /* * IPSTEALTH: Process non-routing options only * if the packet is destined for us. */ if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) return; #endif /* IPSTEALTH */ /* * Attempt reassembly; if it succeeds, proceed. * ip_reass() will return a different mbuf. */ if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { /* XXXGL: shouldn't we save & set m_flags? */ m = ip_reass(m); if (m == NULL) return; ip = mtod(m, struct ip *); /* Get the header length of the reassembled packet */ hlen = ip->ip_hl << 2; } -#ifdef IPSEC - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if (ip_ipsec_input(m, ip->ip_p) != 0) - goto bad; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) + return; + } #endif /* IPSEC */ /* * Switch out to protocol's input routine. */ IPSTAT_INC(ips_delivered); (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); return; bad: m_freem(m); } /* * IP timer processing; * if a timer expires on a reassembly * queue, discard it. */ void ip_slowtimo(void) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); ipreass_slowtimo(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } void ip_drain(void) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); ipreass_drain(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * The protocol to be inserted into ip_protox[] must be already registered * in inetsw[], either statically or through pf_proto_register(). */ int ipproto_register(short ipproto) { struct protosw *pr; /* Sanity checks. */ if (ipproto <= 0 || ipproto >= IPPROTO_MAX) return (EPROTONOSUPPORT); /* * The protocol slot must not be occupied by another protocol * already. An index pointing to IPPROTO_RAW is unused. */ pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) return (EPFNOSUPPORT); if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ return (EEXIST); /* Find the protocol position in inetsw[] and set the index. */ for (pr = inetdomain.dom_protosw; pr < inetdomain.dom_protoswNPROTOSW; pr++) { if (pr->pr_domain->dom_family == PF_INET && pr->pr_protocol && pr->pr_protocol == ipproto) { ip_protox[pr->pr_protocol] = pr - inetsw; return (0); } } return (EPROTONOSUPPORT); } int ipproto_unregister(short ipproto) { struct protosw *pr; /* Sanity checks. */ if (ipproto <= 0 || ipproto >= IPPROTO_MAX) return (EPROTONOSUPPORT); /* Check if the protocol was indeed registered. */ pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) return (EPFNOSUPPORT); if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ return (ENOENT); /* Reset the protocol slot to IPPROTO_RAW. */ ip_protox[ipproto] = pr - inetsw; return (0); } u_char inetctlerrmap[PRC_NCMDS] = { 0, 0, 0, 0, 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, EMSGSIZE, EHOSTUNREACH, 0, 0, 0, 0, EHOSTUNREACH, 0, ENOPROTOOPT, ECONNREFUSED }; /* * Forward a packet. If some error occurs return the sender * an icmp packet. Note we can't always generate a meaningful * icmp message because icmp doesn't have a large enough repertoire * of codes and types. * * If not forwarding, just drop the packet. This could be confusing * if ipforwarding was zero but some routing protocol was advancing * us as a gateway to somewhere. However, we must let the routing * protocol deal with that. * * The srcrt parameter indicates whether the packet is being forwarded * via a source route. */ void ip_forward(struct mbuf *m, int srcrt) { struct ip *ip = mtod(m, struct ip *); struct in_ifaddr *ia; struct mbuf *mcopy; struct sockaddr_in *sin; struct in_addr dest; struct route ro; int error, type = 0, code = 0, mtu = 0; if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { IPSTAT_INC(ips_cantforward); m_freem(m); return; } -#ifdef IPSEC - if (ip_ipsec_fwd(m) != 0) { - IPSTAT_INC(ips_cantforward); - m_freem(m); - return; - } -#endif /* IPSEC */ + if ( #ifdef IPSTEALTH - if (!V_ipstealth) { + V_ipstealth == 0 && #endif - if (ip->ip_ttl <= IPTTLDEC) { - icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, - 0, 0); - return; - } -#ifdef IPSTEALTH + ip->ip_ttl <= IPTTLDEC) { + icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); + return; } -#endif bzero(&ro, sizeof(ro)); sin = (struct sockaddr_in *)&ro.ro_dst; sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = ip->ip_dst; #ifdef RADIX_MPATH rtalloc_mpath_fib(&ro, ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), M_GETFIB(m)); #else in_rtalloc_ign(&ro, 0, M_GETFIB(m)); #endif if (ro.ro_rt != NULL) { ia = ifatoia(ro.ro_rt->rt_ifa); ifa_ref(&ia->ia_ifa); } else ia = NULL; -#ifndef IPSEC - /* - * 'ia' may be NULL if there is no route for this destination. - * In case of IPsec, Don't discard it just yet, but pass it to - * ip_output in case of outgoing IPsec policy. - */ - if (!srcrt && ia == NULL) { - icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); - RO_RTFREE(&ro); - return; - } -#endif - /* * Save the IP header and at most 8 bytes of the payload, * in case we need to generate an ICMP message to the src. * * XXX this can be optimized a lot by saving the data in a local * buffer on the stack (72 bytes at most), and only allocating the * mbuf if really necessary. The vast majority of the packets * are forwarded without having to send an ICMP back (either * because unnecessary, or because rate limited), so we are * really we are wasting a lot of work here. * * We don't use m_copym() because it might return a reference * to a shared cluster. Both this function and ip_output() * assume exclusive access to the IP header in `m', so any * data in a cluster may change before we reach icmp_error(). */ mcopy = m_gethdr(M_NOWAIT, m->m_type); if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { /* * It's probably ok if the pkthdr dup fails (because * the deep copy of the tag chain failed), but for now * be conservative and just discard the copy since * code below may some day want the tags. */ m_free(mcopy); mcopy = NULL; } if (mcopy != NULL) { mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); mcopy->m_pkthdr.len = mcopy->m_len; m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); } - #ifdef IPSTEALTH - if (!V_ipstealth) { + if (V_ipstealth == 0) #endif ip->ip_ttl -= IPTTLDEC; -#ifdef IPSTEALTH +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if ((error = IPSEC_FORWARD(ipv4, m)) != 0) { + /* mbuf consumed by IPsec */ + m_freem(mcopy); + if (error != EINPROGRESS) + IPSTAT_INC(ips_cantforward); + return; + } + /* No IPsec processing required */ } -#endif - +#endif /* IPSEC */ /* * If forwarding packet using same interface that it came in on, * perhaps should send a redirect to sender to shortcut a hop. * Only send redirect if source is sending directly to us, * and if packet was not source routed (or has any options). * Also, don't send redirect if forwarding using a default route * or a route modified by a redirect. */ dest.s_addr = 0; if (!srcrt && V_ipsendredirects && ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { struct rtentry *rt; rt = ro.ro_rt; if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && satosin(rt_key(rt))->sin_addr.s_addr != 0) { #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) u_long src = ntohl(ip->ip_src.s_addr); if (RTA(rt) && (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { if (rt->rt_flags & RTF_GATEWAY) dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; else dest.s_addr = ip->ip_dst.s_addr; /* Router requirements says to only send host redirects */ type = ICMP_REDIRECT; code = ICMP_REDIRECT_HOST; } } } error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); if (error == EMSGSIZE && ro.ro_rt) mtu = ro.ro_rt->rt_mtu; RO_RTFREE(&ro); if (error) IPSTAT_INC(ips_cantforward); else { IPSTAT_INC(ips_forward); if (type) IPSTAT_INC(ips_redirectsent); else { if (mcopy) m_freem(mcopy); if (ia != NULL) ifa_free(&ia->ia_ifa); return; } } if (mcopy == NULL) { if (ia != NULL) ifa_free(&ia->ia_ifa); return; } switch (error) { case 0: /* forwarded, but need redirect */ /* type, code set above */ break; case ENETUNREACH: case EHOSTUNREACH: case ENETDOWN: case EHOSTDOWN: default: type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; break; case EMSGSIZE: type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; - -#ifdef IPSEC - /* - * If IPsec is configured for this path, - * override any possibly mtu value set by ip_output. - */ - mtu = ip_ipsec_mtu(mcopy, mtu); -#endif /* IPSEC */ /* * If the MTU was set before make sure we are below the * interface MTU. * If the MTU wasn't set before use the interface mtu or * fall back to the next smaller mtu step compared to the * current packet size. */ if (mtu != 0) { if (ia != NULL) mtu = min(mtu, ia->ia_ifp->if_mtu); } else { if (ia != NULL) mtu = ia->ia_ifp->if_mtu; else mtu = ip_next_mtu(ntohs(ip->ip_len), 0); } IPSTAT_INC(ips_cantfrag); break; case ENOBUFS: case EACCES: /* ipfw denied packet */ m_freem(mcopy); if (ia != NULL) ifa_free(&ia->ia_ifa); return; } if (ia != NULL) ifa_free(&ia->ia_ifa); icmp_error(mcopy, type, code, dest.s_addr, mtu); } #define CHECK_SO_CT(sp, ct) \ (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0) void ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, struct mbuf *m) { if ((inp->inp_socket->so_options & SO_BINTIME) || CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) { struct bintime bt; bintime(&bt); *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), SCM_BINTIME, SOL_SOCKET); if (*mp) mp = &(*mp)->m_next; } if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) { struct timeval tv; microtime(&tv); *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), SCM_TIMESTAMP, SOL_SOCKET); if (*mp) mp = &(*mp)->m_next; } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) { struct timespec ts; nanotime(&ts); *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), SCM_REALTIME, SOL_SOCKET); if (*mp) mp = &(*mp)->m_next; } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) { struct timespec ts; nanouptime(&ts); *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), SCM_MONOTONIC, SOL_SOCKET); if (*mp) mp = &(*mp)->m_next; } if (inp->inp_flags & INP_RECVDSTADDR) { *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } if (inp->inp_flags & INP_RECVTTL) { *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, sizeof(u_char), IP_RECVTTL, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } #ifdef notyet /* XXX * Moving these out of udp_input() made them even more broken * than they already were. */ /* options were tossed already */ if (inp->inp_flags & INP_RECVOPTS) { *mp = sbcreatecontrol((caddr_t)opts_deleted_above, sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } /* ip_srcroute doesn't do what we want here, need to fix */ if (inp->inp_flags & INP_RECVRETOPTS) { *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } #endif if (inp->inp_flags & INP_RECVIF) { struct ifnet *ifp; struct sdlbuf { struct sockaddr_dl sdl; u_char pad[32]; } sdlbuf; struct sockaddr_dl *sdp; struct sockaddr_dl *sdl2 = &sdlbuf.sdl; if ((ifp = m->m_pkthdr.rcvif) && ifp->if_index && ifp->if_index <= V_if_index) { sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; /* * Change our mind and don't try copy. */ if (sdp->sdl_family != AF_LINK || sdp->sdl_len > sizeof(sdlbuf)) { goto makedummy; } bcopy(sdp, sdl2, sdp->sdl_len); } else { makedummy: sdl2->sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]); sdl2->sdl_family = AF_LINK; sdl2->sdl_index = 0; sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; } *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, IP_RECVIF, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } if (inp->inp_flags & INP_RECVTOS) { *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, sizeof(u_char), IP_RECVTOS, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } if (inp->inp_flags2 & INP_RECVFLOWID) { uint32_t flowid, flow_type; flowid = m->m_pkthdr.flowid; flow_type = M_HASHTYPE_GET(m); /* * XXX should handle the failure of one or the * other - don't populate both? */ *mp = sbcreatecontrol((caddr_t) &flowid, sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; *mp = sbcreatecontrol((caddr_t) &flow_type, sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } #ifdef RSS if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { uint32_t flowid, flow_type; uint32_t rss_bucketid; flowid = m->m_pkthdr.flowid; flow_type = M_HASHTYPE_GET(m); if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { *mp = sbcreatecontrol((caddr_t) &rss_bucketid, sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } } #endif } /* * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on * locking. This code remains in ip_input.c as ip_mroute.c is optionally * compiled. */ static VNET_DEFINE(int, ip_rsvp_on); VNET_DEFINE(struct socket *, ip_rsvpd); #define V_ip_rsvp_on VNET(ip_rsvp_on) int ip_rsvp_init(struct socket *so) { if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) return EOPNOTSUPP; if (V_ip_rsvpd != NULL) return EADDRINUSE; V_ip_rsvpd = so; /* * This may seem silly, but we need to be sure we don't over-increment * the RSVP counter, in case something slips up. */ if (!V_ip_rsvp_on) { V_ip_rsvp_on = 1; V_rsvp_on++; } return 0; } int ip_rsvp_done(void) { V_ip_rsvpd = NULL; /* * This may seem silly, but we need to be sure we don't over-decrement * the RSVP counter, in case something slips up. */ if (V_ip_rsvp_on) { V_ip_rsvp_on = 0; V_rsvp_on--; } return 0; } int rsvp_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m; m = *mp; *mp = NULL; if (rsvp_input_p) { /* call the real one if loaded */ *mp = m; rsvp_input_p(mp, offp, proto); return (IPPROTO_DONE); } /* Can still get packets with rsvp_on = 0 if there is a local member * of the group to which the RSVP packet is addressed. But in this * case we want to throw the packet away. */ if (!V_rsvp_on) { m_freem(m); return (IPPROTO_DONE); } if (V_ip_rsvpd != NULL) { *mp = m; rip_input(mp, offp, proto); return (IPPROTO_DONE); } /* Drop the packet */ m_freem(m); return (IPPROTO_DONE); } diff --git a/sys/netinet/ip_ipsec.c b/sys/netinet/ip_ipsec.c deleted file mode 100644 index 1b72553a42e5..000000000000 --- a/sys/netinet/ip_ipsec.c +++ /dev/null @@ -1,245 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include "opt_ipsec.h" -#include "opt_sctp.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef SCTP -#include -#endif - -#include - -#include -#include -#include - -extern struct protosw inetsw[]; - -static VNET_DEFINE(int, ip4_ipsec_filtertunnel) = 0; -#define V_ip4_ipsec_filtertunnel VNET(ip4_ipsec_filtertunnel) - -SYSCTL_DECL(_net_inet_ipsec); -SYSCTL_INT(_net_inet_ipsec, OID_AUTO, filtertunnel, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_filtertunnel), 0, - "If set filter packets from an IPsec tunnel."); - -/* - * Check if we have to jump over firewall processing for this packet. - * Called from ip_input(). - * 1 = jump over firewall, 0 = packet goes through firewall. - */ -int -ip_ipsec_filtertunnel(struct mbuf *m) -{ - - /* - * Bypass packet filtering for packets previously handled by IPsec. - */ - if (!V_ip4_ipsec_filtertunnel && - m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) - return 1; - return 0; -} - -/* - * Check if this packet has an active SA and needs to be dropped instead - * of forwarded. - * Called from ip_forward(). - * 1 = drop packet, 0 = forward packet. - */ -int -ip_ipsec_fwd(struct mbuf *m) -{ - - return (ipsec4_in_reject(m, NULL)); -} - -/* - * Check if protocol type doesn't have a further header and do IPSEC - * decryption or reject right now. Protocols with further headers get - * their IPSEC treatment within the protocol specific processing. - * Called from ip_input(). - * 1 = drop packet, 0 = continue processing packet. - */ -int -ip_ipsec_input(struct mbuf *m, int nxt) -{ - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if ((inetsw[ip_protox[nxt]].pr_flags & PR_LASTHDR) != 0) - return (ipsec4_in_reject(m, NULL)); - return (0); -} - -/* - * Compute the MTU for a forwarded packet that gets IPSEC encapsulated. - * Called from ip_forward(). - * Returns MTU suggestion for ICMP needfrag reply. - */ -int -ip_ipsec_mtu(struct mbuf *m, int mtu) -{ - /* - * If the packet is routed over IPsec tunnel, tell the - * originator the tunnel MTU. - * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz - * XXX quickhack!!! - */ - return (mtu - ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL)); -} - -/* - * - * Called from ip_output(). - * 1 = drop packet, 0 = continue processing packet, - * -1 = packet was reinjected and stop processing packet - */ -int -ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *error) -{ - struct secpolicy *sp; - - if (!key_havesp(IPSEC_DIR_OUTBOUND)) - return 0; - - /* - * Check the security policy (SP) for the packet and, if - * required, do IPsec-related processing. There are two - * cases here; the first time a packet is sent through - * it will be untagged and handled by ipsec4_checkpolicy. - * If the packet is resubmitted to ip_output (e.g. after - * AH, ESP, etc. processing), there will be a tag to bypass - * the lookup and related policy checking. - */ - if (m_tag_find(*m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { - *error = 0; - return (0); - } - sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, error, inp); - /* - * There are four return cases: - * sp != NULL apply IPsec policy - * sp == NULL, error == 0 no IPsec handling needed - * sp == NULL, error == -EINVAL discard packet w/o error - * sp == NULL, error != 0 discard packet, report error - */ - if (sp != NULL) { - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - */ - if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(*m); - (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } -#ifdef SCTP - if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP) { - struct ip *ip = mtod(*m, struct ip *); - - sctp_delayed_cksum(*m, (uint32_t)(ip->ip_hl << 2)); - (*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP; - } -#endif - - /* NB: callee frees mbuf */ - *error = ipsec4_process_packet(*m, sp->req); - KEY_FREESP(&sp); - if (*error == EJUSTRETURN) { - /* - * We had a SP with a level of 'use' and no SA. We - * will just continue to process the packet without - * IPsec processing and return without error. - */ - *error = 0; - goto done; - } - /* - * Preserve KAME behaviour: ENOENT can be returned - * when an SA acquire is in progress. Don't propagate - * this to user-level; it confuses applications. - * - * XXX this will go away when the SADB is redone. - */ - if (*error == ENOENT) - *error = 0; - goto reinjected; - } else { /* sp == NULL */ - - if (*error != 0) { - /* - * Hack: -EINVAL is used to signal that a packet - * should be silently discarded. This is typically - * because we asked key management for an SA and - * it was delayed (e.g. kicked up to IKE). - */ - if (*error == -EINVAL) - *error = 0; - goto bad; - } - /* No IPsec processing for this packet. */ - } -done: - return (0); -reinjected: - return (-1); -bad: - if (sp != NULL) - KEY_FREESP(&sp); - return 1; -} diff --git a/sys/netinet/ip_ipsec.h b/sys/netinet/ip_ipsec.h deleted file mode 100644 index f499b74000ba..000000000000 --- a/sys/netinet/ip_ipsec.h +++ /dev/null @@ -1,40 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _NETINET_IP_IPSEC_H_ -#define _NETINET_IP_IPSEC_H_ - -int ip_ipsec_filtertunnel(struct mbuf *); -int ip_ipsec_fwd(struct mbuf *); -int ip_ipsec_input(struct mbuf *, int); -int ip_ipsec_mtu(struct mbuf *, int); -int ip_ipsec_output(struct mbuf **, struct inpcb *, int *); -#endif diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 81f8264ce5cf..fc229d1f078b 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -1,1459 +1,1433 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_ratelimit.h" #include "opt_ipsec.h" #include "opt_mbuf_stress_test.h" #include "opt_mpath.h" #include "opt_route.h" #include "opt_sctp.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #include #include #include #include #include #include #include #include #ifdef SCTP #include #include #endif -#ifdef IPSEC -#include -#include -#endif /* IPSEC*/ +#include #include #include #ifdef MBUF_STRESS_TEST static int mbuf_frag_size = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); #endif static void ip_mloopback(struct ifnet *, const struct mbuf *, int); extern int in_mcast_loop; extern struct protosw inetsw[]; static inline int ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp, struct sockaddr_in *dst, int *fibnum, int *error) { struct m_tag *fwd_tag = NULL; struct mbuf *m; struct in_addr odst; struct ip *ip; m = *mp; ip = mtod(m, struct ip *); /* Run through list of hooks for output packets. */ odst.s_addr = ip->ip_dst.s_addr; *error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, inp); m = *mp; if ((*error) != 0 || m == NULL) return 1; /* Finished */ ip = mtod(m, struct ip *); /* See if destination IP address was changed by packet filter. */ if (odst.s_addr != ip->ip_dst.s_addr) { m->m_flags |= M_SKIP_FIREWALL; /* If destination is now ourself drop to ip_input(). */ if (in_localip(ip->ip_dst)) { m->m_flags |= M_FASTFWD_OURS; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif *error = netisr_queue(NETISR_IP, m); return 1; /* Finished */ } bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; return -1; /* Reloop */ } /* See if fib was changed by packet filter. */ if ((*fibnum) != M_GETFIB(m)) { m->m_flags |= M_SKIP_FIREWALL; *fibnum = M_GETFIB(m); return -1; /* Reloop for FIB change */ } /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ if (m->m_flags & M_FASTFWD_OURS) { if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; *error = netisr_queue(NETISR_IP, m); return 1; /* Finished */ } /* Or forward to some other address? */ if ((m->m_flags & M_IP_NEXTHOP) && ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) { bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP_NEXTHOP; m_tag_delete(m, fwd_tag); return -1; /* Reloop for CHANGE of dst */ } return 0; } /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * If route ro is present and has ro_rt initialized, route lookup would be * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, * then result of route lookup is stored in ro->ro_rt. * * In the IP forwarding case, the packet will arrive with options already * inserted, so must have a NULL opt pointer. */ int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp) { struct rm_priotracker in_ifa_tracker; struct ip *ip; struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m0; int hlen = sizeof (struct ip); int mtu; int error = 0; struct sockaddr_in *dst; const struct sockaddr_in *gw; struct in_ifaddr *ia; int isbroadcast; uint16_t ip_len, ip_off; struct route iproute; struct rtentry *rte; /* cache for ro->ro_rt */ uint32_t fibnum; int have_ia_ref; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) int no_route_but_check_spd = 0; #endif M_ASSERTPKTHDR(m); if (inp != NULL) { INP_LOCK_ASSERT(inp); M_SETFIB(m, inp->inp_inc.inc_fibnum); if ((flags & IP_NODEFAULTFLOWID) == 0) { m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } } if (ro == NULL) { ro = &iproute; bzero(ro, sizeof (*ro)); } else ro->ro_flags |= RT_LLE_CACHE; #ifdef FLOWTABLE if (ro->ro_rt == NULL) (void )flowtable_lookup(AF_INET, m, ro); #endif if (opt) { int len = 0; m = ip_insertoptions(m, opt, &len); if (len != 0) hlen = len; /* ip->ip_hl is updated above */ } ip = mtod(m, struct ip *); ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { ip->ip_v = IPVERSION; ip->ip_hl = hlen >> 2; ip_fillid(ip); IPSTAT_INC(ips_localout); } else { /* Header already set, fetch hlen from there */ hlen = ip->ip_hl << 2; } /* * dst/gw handling: * * dst can be rewritten but always points to &ro->ro_dst. * gw is readonly but can point either to dst OR rt_gateway, * therefore we need restore gw if we're redoing lookup. */ gw = dst = (struct sockaddr_in *)&ro->ro_dst; fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); rte = ro->ro_rt; if (rte == NULL) { bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; } again: /* * Validate route against routing table additions; * a better/more specific route might have been added. */ if (inp) RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); /* * If there is a cached route, * check that it is to the same destination * and is still up. If not, free it and try again. * The address family should also be checked in case of sharing the * cache with IPv6. * Also check whether routing cache needs invalidation. */ rte = ro->ro_rt; if (rte && ((rte->rt_flags & RTF_UP) == 0 || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp) || dst->sin_family != AF_INET || dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { RTFREE(rte); rte = ro->ro_rt = (struct rtentry *)NULL; if (ro->ro_lle) LLE_FREE(ro->ro_lle); /* zeros ro_lle */ ro->ro_lle = (struct llentry *)NULL; } ia = NULL; have_ia_ref = 0; /* * If routing to interface only, short circuit routing lookup. * The use of an all-ones broadcast address implies this; an * interface is specified by the broadcast address of an interface, * or the destination address of a ptp interface. */ if (flags & IP_SENDONES) { if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst), M_GETFIB(m)))) == NULL && (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), M_GETFIB(m)))) == NULL) { IPSTAT_INC(ips_noroute); error = ENETUNREACH; goto bad; } have_ia_ref = 1; ip->ip_dst.s_addr = INADDR_BROADCAST; dst->sin_addr = ip->ip_dst; ifp = ia->ia_ifp; ip->ip_ttl = 1; isbroadcast = 1; } else if (flags & IP_ROUTETOIF) { if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), M_GETFIB(m)))) == NULL && (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0, M_GETFIB(m)))) == NULL) { IPSTAT_INC(ips_noroute); error = ENETUNREACH; goto bad; } have_ia_ref = 1; ifp = ia->ia_ifp; ip->ip_ttl = 1; isbroadcast = ifp->if_flags & IFF_BROADCAST ? in_ifaddr_broadcast(dst->sin_addr, ia) : 0; } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && imo != NULL && imo->imo_multicast_ifp != NULL) { /* * Bypass the normal routing lookup for multicast * packets if the interface is specified. */ ifp = imo->imo_multicast_ifp; IFP_TO_IA(ifp, ia, &in_ifa_tracker); if (ia) have_ia_ref = 1; isbroadcast = 0; /* fool gcc */ } else { /* * We want to do any cloning requested by the link layer, * as this is probably required in all cases for correct * operation (as it is for ARP). */ if (rte == NULL) { #ifdef RADIX_MPATH rtalloc_mpath_fib(ro, ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), fibnum); #else in_rtalloc_ign(ro, 0, fibnum); #endif rte = ro->ro_rt; } if (rte == NULL || (rte->rt_flags & RTF_UP) == 0 || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * There is no route for this packet, but it is * possible that a matching SPD entry exists. */ no_route_but_check_spd = 1; mtu = 0; /* Silence GCC warning. */ goto sendit; #endif IPSTAT_INC(ips_noroute); error = EHOSTUNREACH; goto bad; } ia = ifatoia(rte->rt_ifa); ifp = rte->rt_ifp; counter_u64_add(rte->rt_pksent, 1); rt_update_ro_flags(ro); if (rte->rt_flags & RTF_GATEWAY) gw = (struct sockaddr_in *)rte->rt_gateway; if (rte->rt_flags & RTF_HOST) isbroadcast = (rte->rt_flags & RTF_BROADCAST); else if (ifp->if_flags & IFF_BROADCAST) isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); else isbroadcast = 0; } /* * Calculate MTU. If we have a route that is up, use that, * otherwise use the interface's MTU. */ if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST))) mtu = rte->rt_mtu; else mtu = ifp->if_mtu; /* Catch a possible divide by zero later. */ KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p", __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp)); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { m->m_flags |= M_MCAST; /* * IP destination address is multicast. Make sure "gw" * still points to the address in "ro". (It may have been * changed to point to a gateway address, above.) */ gw = dst; /* * See if the caller provided any multicast options */ if (imo != NULL) { ip->ip_ttl = imo->imo_multicast_ttl; if (imo->imo_multicast_vif != -1) ip->ip_src.s_addr = ip_mcast_src ? ip_mcast_src(imo->imo_multicast_vif) : INADDR_ANY; } else ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; /* * Confirm that the outgoing interface supports multicast. */ if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { if ((ifp->if_flags & IFF_MULTICAST) == 0) { IPSTAT_INC(ips_noroute); error = ENETUNREACH; goto bad; } } /* * If source address not specified yet, use address * of outgoing interface. */ if (ip->ip_src.s_addr == INADDR_ANY) { /* Interface may have no addresses. */ if (ia != NULL) ip->ip_src = IA_SIN(ia)->sin_addr; } if ((imo == NULL && in_mcast_loop) || (imo && imo->imo_multicast_loop)) { /* * Loop back multicast datagram if not expressly * forbidden to do so, even if we are not a member * of the group; ip_input() will filter it later, * thus deferring a hash lookup and mutex acquisition * at the expense of a cheap copy using m_copym(). */ ip_mloopback(ifp, m, hlen); } else { /* * If we are acting as a multicast router, perform * multicast forwarding as if the packet had just * arrived on the interface to which we are about * to send. The multicast forwarding function * recursively calls this function, using the * IP_FORWARDING flag to prevent infinite recursion. * * Multicasts that are looped back by ip_mloopback(), * above, will be forwarded by the ip_input() routine, * if necessary. */ if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) { /* * If rsvp daemon is not running, do not * set ip_moptions. This ensures that the packet * is multicast and not just sent down one link * as prescribed by rsvpd. */ if (!V_rsvp_on) imo = NULL; if (ip_mforward && ip_mforward(ip, ifp, m, imo) != 0) { m_freem(m); goto done; } } } /* * Multicasts with a time-to-live of zero may be looped- * back, above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip_mloopback() will * loop back a copy. ip_input() will drop the copy if * this host does not belong to the destination group on * the loopback interface. */ if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { m_freem(m); goto done; } goto sendit; } /* * If the source address is not specified yet, use the address * of the outoing interface. */ if (ip->ip_src.s_addr == INADDR_ANY) { /* Interface may have no addresses. */ if (ia != NULL) { ip->ip_src = IA_SIN(ia)->sin_addr; } } /* * Look for broadcast address and * verify user is allowed to send * such a packet. */ if (isbroadcast) { if ((ifp->if_flags & IFF_BROADCAST) == 0) { error = EADDRNOTAVAIL; goto bad; } if ((flags & IP_ALLOWBROADCAST) == 0) { error = EACCES; goto bad; } /* don't allow broadcast messages to be fragmented */ if (ip_len > mtu) { error = EMSGSIZE; goto bad; } m->m_flags |= M_BCAST; } else { m->m_flags &= ~M_BCAST; } sendit: -#ifdef IPSEC - switch(ip_ipsec_output(&m, inp, &error)) { - case 1: - goto bad; - case -1: - goto done; - case 0: - default: - break; /* Continue with packet processing. */ +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) { + if (error == EINPROGRESS) + error = 0; + goto done; + } } /* * Check if there was a route for this packet; return error if not. */ if (no_route_but_check_spd) { IPSTAT_INC(ips_noroute); error = EHOSTUNREACH; goto bad; } /* Update variables that are affected by ipsec4_output(). */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; #endif /* IPSEC */ /* Jump over all PFIL processing if hooks are not active. */ if (PFIL_HOOKED(&V_inet_pfil_hook)) { switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) { case 1: /* Finished */ goto done; case 0: /* Continue normally */ ip = mtod(m, struct ip *); break; case -1: /* Need to try again */ /* Reset everything for a new round */ RO_RTFREE(ro); if (have_ia_ref) ifa_free(&ia->ia_ifa); ro->ro_prepend = NULL; rte = NULL; gw = dst; ip = mtod(m, struct ip *); goto again; } } /* 127/8 must not appear on wire - RFC1122. */ if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { if ((ifp->if_flags & IFF_LOOPBACK) == 0) { IPSTAT_INC(ips_badaddr); error = EADDRNOTAVAIL; goto bad; } } m->m_pkthdr.csum_flags |= CSUM_IP; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { in_delayed_cksum(m); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); m->m_pkthdr.csum_flags &= ~CSUM_SCTP; } #endif /* * If small enough for interface, or the interface will take * care of the fragmentation for us, we can just send directly. */ if (ip_len <= mtu || (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { ip->ip_sum = 0; if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { ip->ip_sum = in_cksum(m, hlen); m->m_pkthdr.csum_flags &= ~CSUM_IP; } /* * Record statistics for this interface address. * With CSUM_TSO the byte/packet count will be slightly * incorrect because we count the IP+TCP headers only * once instead of for every generated packet. */ if (!(flags & IP_FORWARDING) && ia) { if (m->m_pkthdr.csum_flags & CSUM_TSO) counter_u64_add(ia->ia_ifa.ifa_opackets, m->m_pkthdr.len / m->m_pkthdr.tso_segsz); else counter_u64_add(ia->ia_ifa.ifa_opackets, 1); counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } #ifdef MBUF_STRESS_TEST if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) m = m_fragment(m, M_NOWAIT, mbuf_frag_size); #endif /* * Reset layer specific mbuf flags * to avoid confusing lower layers. */ m_clrprotoflags(m); IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); #ifdef RATELIMIT if (inp != NULL) { if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) in_pcboutput_txrtlmt(inp, ifp, m); /* stamp send tag on mbuf */ m->m_pkthdr.snd_tag = inp->inp_snd_tag; } else { m->m_pkthdr.snd_tag = NULL; } #endif error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); #ifdef RATELIMIT /* check for route change */ if (error == EAGAIN) in_pcboutput_eagain(inp); #endif goto done; } /* Balk when DF bit is set or the interface didn't support TSO. */ if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { error = EMSGSIZE; IPSTAT_INC(ips_cantfrag); goto bad; } /* * Too large for interface; fragment if possible. If successful, * on return, m will point to a list of packets to be sent. */ error = ip_fragment(ip, &m, mtu, ifp->if_hwassist); if (error) goto bad; for (; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) { /* Record statistics for this interface address. */ if (ia != NULL) { counter_u64_add(ia->ia_ifa.ifa_opackets, 1); counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } /* * Reset layer specific mbuf flags * to avoid confusing upper layers. */ m_clrprotoflags(m); IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp, mtod(m, struct ip *), NULL); #ifdef RATELIMIT if (inp != NULL) { if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) in_pcboutput_txrtlmt(inp, ifp, m); /* stamp send tag on mbuf */ m->m_pkthdr.snd_tag = inp->inp_snd_tag; } else { m->m_pkthdr.snd_tag = NULL; } #endif error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); #ifdef RATELIMIT /* check for route change */ if (error == EAGAIN) in_pcboutput_eagain(inp); #endif } else m_freem(m); } if (error == 0) IPSTAT_INC(ips_fragmented); done: if (ro == &iproute) RO_RTFREE(ro); else if (rte == NULL) /* * If the caller supplied a route but somehow the reference * to it has been released need to prevent the caller * calling RTFREE on it again. */ ro->ro_rt = NULL; if (have_ia_ref) ifa_free(&ia->ia_ifa); return (error); bad: m_freem(m); goto done; } /* * Create a chain of fragments which fit the given mtu. m_frag points to the * mbuf to be fragmented; on return it points to the chain with the fragments. * Return 0 if no error. If error, m_frag may contain a partially built * chain of fragments that should be freed by the caller. * * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) */ int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, u_long if_hwassist_flags) { int error = 0; int hlen = ip->ip_hl << 2; int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ int off; struct mbuf *m0 = *m_frag; /* the original packet */ int firstlen; struct mbuf **mnext; int nfrags; uint16_t ip_len, ip_off; ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); if (ip_off & IP_DF) { /* Fragmentation not allowed */ IPSTAT_INC(ips_cantfrag); return EMSGSIZE; } /* * Must be able to put at least 8 bytes per fragment. */ if (len < 8) return EMSGSIZE; /* * If the interface will not calculate checksums on * fragmented packets, then do it here. */ if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(m0); m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } #ifdef SCTP if (m0->m_pkthdr.csum_flags & CSUM_SCTP) { sctp_delayed_cksum(m0, hlen); m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; } #endif if (len > PAGE_SIZE) { /* * Fragment large datagrams such that each segment * contains a multiple of PAGE_SIZE amount of data, * plus headers. This enables a receiver to perform * page-flipping zero-copy optimizations. * * XXX When does this help given that sender and receiver * could have different page sizes, and also mtu could * be less than the receiver's page size ? */ int newlen; off = MIN(mtu, m0->m_pkthdr.len); /* * firstlen (off - hlen) must be aligned on an * 8-byte boundary */ if (off < hlen) goto smart_frag_failure; off = ((off - hlen) & ~7) + hlen; newlen = (~PAGE_MASK) & mtu; if ((newlen + sizeof (struct ip)) > mtu) { /* we failed, go back the default */ smart_frag_failure: newlen = len; off = hlen + len; } len = newlen; } else { off = hlen + len; } firstlen = off - hlen; mnext = &m0->m_nextpkt; /* pointer to next packet */ /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto chain. * Here, m0 is the original packet, m is the fragment being created. * The fragments are linked off the m_nextpkt of the original * packet, which after processing serves as the first fragment. */ for (nfrags = 1; off < ip_len; off += len, nfrags++) { struct ip *mhip; /* ip header on the fragment */ struct mbuf *m; int mhlen = sizeof (struct ip); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { error = ENOBUFS; IPSTAT_INC(ips_odropped); goto done; } /* * Make sure the complete packet header gets copied * from the originating mbuf to the newly created * mbuf. This also ensures that existing firewall * classification(s), VLAN tags and so on get copied * to the resulting fragmented packet(s): */ if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) { m_free(m); error = ENOBUFS; IPSTAT_INC(ips_odropped); goto done; } /* * In the first mbuf, leave room for the link header, then * copy the original IP header including options. The payload * goes into an additional mbuf chain returned by m_copym(). */ m->m_data += max_linkhdr; mhip = mtod(m, struct ip *); *mhip = *ip; if (hlen > sizeof (struct ip)) { mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); mhip->ip_v = IPVERSION; mhip->ip_hl = mhlen >> 2; } m->m_len = mhlen; /* XXX do we need to add ip_off below ? */ mhip->ip_off = ((off - hlen) >> 3) + ip_off; if (off + len >= ip_len) len = ip_len - off; else mhip->ip_off |= IP_MF; mhip->ip_len = htons((u_short)(len + mhlen)); m->m_next = m_copym(m0, off, len, M_NOWAIT); if (m->m_next == NULL) { /* copy failed */ m_free(m); error = ENOBUFS; /* ??? */ IPSTAT_INC(ips_odropped); goto done; } m->m_pkthdr.len = mhlen + len; #ifdef MAC mac_netinet_fragment(m0, m); #endif mhip->ip_off = htons(mhip->ip_off); mhip->ip_sum = 0; if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { mhip->ip_sum = in_cksum(m, mhlen); m->m_pkthdr.csum_flags &= ~CSUM_IP; } *mnext = m; mnext = &m->m_nextpkt; } IPSTAT_ADD(ips_ofragments, nfrags); /* * Update first fragment by trimming what's been copied out * and updating header. */ m_adj(m0, hlen + firstlen - ip_len); m0->m_pkthdr.len = hlen + firstlen; ip->ip_len = htons((u_short)m0->m_pkthdr.len); ip->ip_off = htons(ip_off | IP_MF); ip->ip_sum = 0; if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { ip->ip_sum = in_cksum(m0, hlen); m0->m_pkthdr.csum_flags &= ~CSUM_IP; } done: *m_frag = m0; return error; } void in_delayed_cksum(struct mbuf *m) { struct ip *ip; uint16_t csum, offset, ip_len; ip = mtod(m, struct ip *); offset = ip->ip_hl << 2 ; ip_len = ntohs(ip->ip_len); csum = in_cksum_skip(m, ip_len, offset); if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) csum = 0xffff; offset += m->m_pkthdr.csum_data; /* checksum offset */ /* find the mbuf in the chain where the checksum starts*/ while ((m != NULL) && (offset >= m->m_len)) { offset -= m->m_len; m = m->m_next; } KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain.")); KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs.")); *(u_short *)(m->m_data + offset) = csum; } /* * IP socket option processing. */ int ip_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp = sotoinpcb(so); int error, optval; #ifdef RSS uint32_t rss_bucket; int retval; #endif error = optval = 0; if (sopt->sopt_level != IPPROTO_IP) { error = EINVAL; if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_dir == SOPT_SET) { switch (sopt->sopt_name) { case SO_REUSEADDR: INP_WLOCK(inp); if ((so->so_options & SO_REUSEADDR) != 0) inp->inp_flags2 |= INP_REUSEADDR; else inp->inp_flags2 &= ~INP_REUSEADDR; INP_WUNLOCK(inp); error = 0; break; case SO_REUSEPORT: INP_WLOCK(inp); if ((so->so_options & SO_REUSEPORT) != 0) inp->inp_flags2 |= INP_REUSEPORT; else inp->inp_flags2 &= ~INP_REUSEPORT; INP_WUNLOCK(inp); error = 0; break; case SO_SETFIB: INP_WLOCK(inp); inp->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(inp); error = 0; break; case SO_MAX_PACING_RATE: #ifdef RATELIMIT INP_WLOCK(inp); inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; INP_WUNLOCK(inp); error = 0; #else error = EOPNOTSUPP; #endif break; default: break; } } return (error); } switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { case IP_OPTIONS: #ifdef notyet case IP_RETOPTS: #endif { struct mbuf *m; if (sopt->sopt_valsize > MLEN) { error = EMSGSIZE; break; } m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); if (m == NULL) { error = ENOBUFS; break; } m->m_len = sopt->sopt_valsize; error = sooptcopyin(sopt, mtod(m, char *), m->m_len, m->m_len); if (error) { m_free(m); break; } INP_WLOCK(inp); error = ip_pcbopts(inp, sopt->sopt_name, m); INP_WUNLOCK(inp); return (error); } case IP_BINDANY: if (sopt->sopt_td != NULL) { error = priv_check(sopt->sopt_td, PRIV_NETINET_BINDANY); if (error) break; } /* FALLTHROUGH */ case IP_BINDMULTI: #ifdef RSS case IP_RSS_LISTEN_BUCKET: #endif case IP_TOS: case IP_TTL: case IP_MINTTL: case IP_RECVOPTS: case IP_RECVRETOPTS: case IP_RECVDSTADDR: case IP_RECVTTL: case IP_RECVIF: case IP_ONESBCAST: case IP_DONTFRAG: case IP_RECVTOS: case IP_RECVFLOWID: #ifdef RSS case IP_RECVRSSBUCKETID: #endif error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (sopt->sopt_name) { case IP_TOS: inp->inp_ip_tos = optval; break; case IP_TTL: inp->inp_ip_ttl = optval; break; case IP_MINTTL: if (optval >= 0 && optval <= MAXTTL) inp->inp_ip_minttl = optval; else error = EINVAL; break; #define OPTSET(bit) do { \ INP_WLOCK(inp); \ if (optval) \ inp->inp_flags |= bit; \ else \ inp->inp_flags &= ~bit; \ INP_WUNLOCK(inp); \ } while (0) #define OPTSET2(bit, val) do { \ INP_WLOCK(inp); \ if (val) \ inp->inp_flags2 |= bit; \ else \ inp->inp_flags2 &= ~bit; \ INP_WUNLOCK(inp); \ } while (0) case IP_RECVOPTS: OPTSET(INP_RECVOPTS); break; case IP_RECVRETOPTS: OPTSET(INP_RECVRETOPTS); break; case IP_RECVDSTADDR: OPTSET(INP_RECVDSTADDR); break; case IP_RECVTTL: OPTSET(INP_RECVTTL); break; case IP_RECVIF: OPTSET(INP_RECVIF); break; case IP_ONESBCAST: OPTSET(INP_ONESBCAST); break; case IP_DONTFRAG: OPTSET(INP_DONTFRAG); break; case IP_BINDANY: OPTSET(INP_BINDANY); break; case IP_RECVTOS: OPTSET(INP_RECVTOS); break; case IP_BINDMULTI: OPTSET2(INP_BINDMULTI, optval); break; case IP_RECVFLOWID: OPTSET2(INP_RECVFLOWID, optval); break; #ifdef RSS case IP_RSS_LISTEN_BUCKET: if ((optval >= 0) && (optval < rss_getnumbuckets())) { inp->inp_rss_listen_bucket = optval; OPTSET2(INP_RSS_BUCKET_SET, 1); } else { error = EINVAL; } break; case IP_RECVRSSBUCKETID: OPTSET2(INP_RECVRSSBUCKETID, optval); break; #endif } break; #undef OPTSET #undef OPTSET2 /* * Multicast socket options are processed by the in_mcast * module. */ case IP_MULTICAST_IF: case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: case IP_DROP_MEMBERSHIP: case IP_ADD_SOURCE_MEMBERSHIP: case IP_DROP_SOURCE_MEMBERSHIP: case IP_BLOCK_SOURCE: case IP_UNBLOCK_SOURCE: case IP_MSFILTER: case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: error = inp_setmoptions(inp, sopt); break; case IP_PORTRANGE: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; INP_WLOCK(inp); switch (optval) { case IP_PORTRANGE_DEFAULT: inp->inp_flags &= ~(INP_LOWPORT); inp->inp_flags &= ~(INP_HIGHPORT); break; case IP_PORTRANGE_HIGH: inp->inp_flags &= ~(INP_LOWPORT); inp->inp_flags |= INP_HIGHPORT; break; case IP_PORTRANGE_LOW: inp->inp_flags &= ~(INP_HIGHPORT); inp->inp_flags |= INP_LOWPORT; break; default: error = EINVAL; break; } INP_WUNLOCK(inp); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IP_IPSEC_POLICY: - { - caddr_t req; - struct mbuf *m; - - if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ + if (IPSEC_ENABLED(ipv4)) { + error = IPSEC_PCBCTL(ipv4, inp, sopt); break; - if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ - break; - req = mtod(m, caddr_t); - error = ipsec_set_policy(inp, sopt->sopt_name, req, - m->m_len, (sopt->sopt_td != NULL) ? - sopt->sopt_td->td_ucred : NULL); - m_freem(m); - break; - } + } + /* FALLTHROUGH */ #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (sopt->sopt_name) { case IP_OPTIONS: case IP_RETOPTS: if (inp->inp_options) error = sooptcopyout(sopt, mtod(inp->inp_options, char *), inp->inp_options->m_len); else sopt->sopt_valsize = 0; break; case IP_TOS: case IP_TTL: case IP_MINTTL: case IP_RECVOPTS: case IP_RECVRETOPTS: case IP_RECVDSTADDR: case IP_RECVTTL: case IP_RECVIF: case IP_PORTRANGE: case IP_ONESBCAST: case IP_DONTFRAG: case IP_BINDANY: case IP_RECVTOS: case IP_BINDMULTI: case IP_FLOWID: case IP_FLOWTYPE: case IP_RECVFLOWID: #ifdef RSS case IP_RSSBUCKETID: case IP_RECVRSSBUCKETID: #endif switch (sopt->sopt_name) { case IP_TOS: optval = inp->inp_ip_tos; break; case IP_TTL: optval = inp->inp_ip_ttl; break; case IP_MINTTL: optval = inp->inp_ip_minttl; break; #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) #define OPTBIT2(bit) (inp->inp_flags2 & bit ? 1 : 0) case IP_RECVOPTS: optval = OPTBIT(INP_RECVOPTS); break; case IP_RECVRETOPTS: optval = OPTBIT(INP_RECVRETOPTS); break; case IP_RECVDSTADDR: optval = OPTBIT(INP_RECVDSTADDR); break; case IP_RECVTTL: optval = OPTBIT(INP_RECVTTL); break; case IP_RECVIF: optval = OPTBIT(INP_RECVIF); break; case IP_PORTRANGE: if (inp->inp_flags & INP_HIGHPORT) optval = IP_PORTRANGE_HIGH; else if (inp->inp_flags & INP_LOWPORT) optval = IP_PORTRANGE_LOW; else optval = 0; break; case IP_ONESBCAST: optval = OPTBIT(INP_ONESBCAST); break; case IP_DONTFRAG: optval = OPTBIT(INP_DONTFRAG); break; case IP_BINDANY: optval = OPTBIT(INP_BINDANY); break; case IP_RECVTOS: optval = OPTBIT(INP_RECVTOS); break; case IP_FLOWID: optval = inp->inp_flowid; break; case IP_FLOWTYPE: optval = inp->inp_flowtype; break; case IP_RECVFLOWID: optval = OPTBIT2(INP_RECVFLOWID); break; #ifdef RSS case IP_RSSBUCKETID: retval = rss_hash2bucket(inp->inp_flowid, inp->inp_flowtype, &rss_bucket); if (retval == 0) optval = rss_bucket; else error = EINVAL; break; case IP_RECVRSSBUCKETID: optval = OPTBIT2(INP_RECVRSSBUCKETID); break; #endif case IP_BINDMULTI: optval = OPTBIT2(INP_BINDMULTI); break; } error = sooptcopyout(sopt, &optval, sizeof optval); break; /* * Multicast socket options are processed by the in_mcast * module. */ case IP_MULTICAST_IF: case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_MSFILTER: error = inp_getmoptions(inp, sopt); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IP_IPSEC_POLICY: - { - struct mbuf *m = NULL; - caddr_t req = NULL; - size_t len = 0; - - if (m != NULL) { - req = mtod(m, caddr_t); - len = m->m_len; + if (IPSEC_ENABLED(ipv4)) { + error = IPSEC_PCBCTL(ipv4, inp, sopt); + break; } - error = ipsec_get_policy(sotoinpcb(so), req, len, &m); - if (error == 0) - error = soopt_mcopyout(sopt, m); /* XXX */ - if (error == 0) - m_freem(m); - break; - } + /* FALLTHROUGH */ #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; } return (error); } /* * Routine called from ip_output() to loop back a copy of an IP multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be a loopback interface -- evil, but easier than * replicating that code here. */ static void ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen) { struct ip *ip; struct mbuf *copym; /* * Make a deep copy of the packet because we're going to * modify the pack in order to generate checksums. */ copym = m_dup(m, M_NOWAIT); if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen)) copym = m_pullup(copym, hlen); if (copym != NULL) { /* If needed, compute the checksum and mark it as valid. */ if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(copym); copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; copym->m_pkthdr.csum_data = 0xffff; } /* * We don't bother to fragment if the IP length is greater * than the interface's MTU. Can this possibly matter? */ ip = mtod(copym, struct ip *); ip->ip_sum = 0; ip->ip_sum = in_cksum(copym, hlen); if_simloop(ifp, copym, AF_INET, 0); } } diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index c8f944c9e252..89df6b734ae9 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -1,1139 +1,1138 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef IPSEC -#include -#endif /*IPSEC*/ +#include #include #include VNET_DEFINE(int, ip_defttl) = IPDEFTTL; SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_defttl), 0, "Maximum TTL on IP packets"); VNET_DEFINE(struct inpcbhead, ripcb); VNET_DEFINE(struct inpcbinfo, ripcbinfo); #define V_ripcb VNET(ripcb) #define V_ripcbinfo VNET(ripcbinfo) /* * Control and data hooks for ipfw, dummynet, divert and so on. * The data hooks are not used here but it is convenient * to keep them all in one place. */ VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL; VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL; int (*ip_dn_ctl_ptr)(struct sockopt *); int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *); void (*ip_divert_ptr)(struct mbuf *, int); int (*ng_ipfw_input_p)(struct mbuf **, int, struct ip_fw_args *, int); #ifdef INET /* * Hooks for multicast routing. They all default to NULL, so leave them not * initialized and rely on BSS being set to 0. */ /* * The socket used to communicate with the multicast routing daemon. */ VNET_DEFINE(struct socket *, ip_mrouter); /* * The various mrouter and rsvp functions. */ int (*ip_mrouter_set)(struct socket *, struct sockopt *); int (*ip_mrouter_get)(struct socket *, struct sockopt *); int (*ip_mrouter_done)(void); int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *); int (*mrt_ioctl)(u_long, caddr_t, int); int (*legal_vif_num)(int); u_long (*ip_mcast_src)(int); int (*rsvp_input_p)(struct mbuf **, int *, int); int (*ip_rsvp_vif)(struct socket *, struct sockopt *); void (*ip_rsvp_force_done)(struct socket *); #endif /* INET */ extern struct protosw inetsw[]; u_long rip_sendspace = 9216; SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); u_long rip_recvspace = 9216; SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams"); /* * Hash functions */ #define INP_PCBHASH_RAW_SIZE 256 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \ (((proto) + (laddr) + (faddr)) % (mask) + 1) #ifdef INET static void rip_inshash(struct inpcb *inp) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbhead *pcbhash; int hash; INP_INFO_WLOCK_ASSERT(pcbinfo); INP_WLOCK_ASSERT(inp); if (inp->inp_ip_p != 0 && inp->inp_laddr.s_addr != INADDR_ANY && inp->inp_faddr.s_addr != INADDR_ANY) { hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr, inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask); } else hash = 0; pcbhash = &pcbinfo->ipi_hashbase[hash]; LIST_INSERT_HEAD(pcbhash, inp, inp_hash); } static void rip_delhash(struct inpcb *inp) { INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); LIST_REMOVE(inp, inp_hash); } #endif /* INET */ /* * Raw interface to IP protocol. */ /* * Initialize raw connection block q. */ static void rip_zone_change(void *tag) { uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets); } static int rip_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp = mem; INP_LOCK_INIT(inp, "inp", "rawinp"); return (0); } void rip_init(void) { in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE, 1, "ripcb", rip_inpcb_init, NULL, 0, IPI_HASHFIELDS_NONE); EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, EVENTHANDLER_PRI_ANY); } #ifdef VIMAGE static void rip_destroy(void *unused __unused) { in_pcbinfo_destroy(&V_ripcbinfo); } VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL); #endif #ifdef INET static int rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n, struct sockaddr_in *ripsrc) { int policyfail = 0; INP_LOCK_ASSERT(last); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* check AH/ESP integrity. */ - if (ipsec4_in_reject(n, last)) { - policyfail = 1; + if (IPSEC_ENABLED(ipv4)) { + if (IPSEC_CHECK_POLICY(ipv4, n, last) != 0) + policyfail = 1; } #endif /* IPSEC */ #ifdef MAC if (!policyfail && mac_inpcb_check_deliver(last, n) != 0) policyfail = 1; #endif /* Check the minimum TTL for socket. */ if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl) policyfail = 1; if (!policyfail) { struct mbuf *opts = NULL; struct socket *so; so = last->inp_socket; if ((last->inp_flags & INP_CONTROLOPTS) || (so->so_options & (SO_TIMESTAMP | SO_BINTIME))) ip_savecontrol(last, &opts, ip, n); SOCKBUF_LOCK(&so->so_rcv); if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)ripsrc, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) m_freem(opts); SOCKBUF_UNLOCK(&so->so_rcv); } else sorwakeup_locked(so); } else m_freem(n); return (policyfail); } /* * Setup generic address and protocol structures for raw_input routine, then * pass them along with mbuf chain. */ int rip_input(struct mbuf **mp, int *offp, int proto) { struct ifnet *ifp; struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct inpcb *inp, *last; struct sockaddr_in ripsrc; int hash; *mp = NULL; bzero(&ripsrc, sizeof(ripsrc)); ripsrc.sin_len = sizeof(ripsrc); ripsrc.sin_family = AF_INET; ripsrc.sin_addr = ip->ip_src; last = NULL; ifp = m->m_pkthdr.rcvif; hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr, ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask); INP_INFO_RLOCK(&V_ripcbinfo); LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) { if (inp->inp_ip_p != proto) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; if (inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; if (jailed_without_vnet(inp->inp_cred)) { /* * XXX: If faddr was bound to multicast group, * jailed raw socket will drop datagram. */ if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) continue; } if (last != NULL) { struct mbuf *n; n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) (void) rip_append(last, ip, n, &ripsrc); /* XXX count dropped packet */ INP_RUNLOCK(last); } INP_RLOCK(inp); last = inp; } LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) { if (inp->inp_ip_p && inp->inp_ip_p != proto) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (!in_nullhost(inp->inp_laddr) && !in_hosteq(inp->inp_laddr, ip->ip_dst)) continue; if (!in_nullhost(inp->inp_faddr) && !in_hosteq(inp->inp_faddr, ip->ip_src)) continue; if (jailed_without_vnet(inp->inp_cred)) { /* * Allow raw socket in jail to receive multicast; * assume process had PRIV_NETINET_RAW at attach, * and fall through into normal filter path if so. */ if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) continue; } /* * If this raw socket has multicast state, and we * have received a multicast, check if this socket * should receive it, as multicast filtering is now * the responsibility of the transport layer. */ if (inp->inp_moptions != NULL && IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { /* * If the incoming datagram is for IGMP, allow it * through unconditionally to the raw socket. * * In the case of IGMPv2, we may not have explicitly * joined the group, and may have set IFF_ALLMULTI * on the interface. imo_multi_filter() may discard * control traffic we actually need to see. * * Userland multicast routing daemons should continue * filter the control traffic appropriately. */ int blocked; blocked = MCAST_PASS; if (proto != IPPROTO_IGMP) { struct sockaddr_in group; bzero(&group, sizeof(struct sockaddr_in)); group.sin_len = sizeof(struct sockaddr_in); group.sin_family = AF_INET; group.sin_addr = ip->ip_dst; blocked = imo_multi_filter(inp->inp_moptions, ifp, (struct sockaddr *)&group, (struct sockaddr *)&ripsrc); } if (blocked != MCAST_PASS) { IPSTAT_INC(ips_notmember); continue; } } if (last != NULL) { struct mbuf *n; n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) (void) rip_append(last, ip, n, &ripsrc); /* XXX count dropped packet */ INP_RUNLOCK(last); } INP_RLOCK(inp); last = inp; } INP_INFO_RUNLOCK(&V_ripcbinfo); if (last != NULL) { if (rip_append(last, ip, m, &ripsrc) != 0) IPSTAT_INC(ips_delivered); INP_RUNLOCK(last); } else { if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { IPSTAT_INC(ips_noproto); IPSTAT_DEC(ips_delivered); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); } else { m_freem(m); } } return (IPPROTO_DONE); } /* * Generate IP header and pass packet to ip_output. Tack on options user may * have setup with control call. */ int rip_output(struct mbuf *m, struct socket *so, ...) { struct ip *ip; int error; struct inpcb *inp = sotoinpcb(so); va_list ap; u_long dst; int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST; va_start(ap, so); dst = va_arg(ap, u_long); va_end(ap); /* * If the user handed us a complete IP packet, use it. Otherwise, * allocate an mbuf for a header and fill it in. */ if ((inp->inp_flags & INP_HDRINCL) == 0) { if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } M_PREPEND(m, sizeof(struct ip), M_NOWAIT); if (m == NULL) return(ENOBUFS); INP_RLOCK(inp); ip = mtod(m, struct ip *); ip->ip_tos = inp->inp_ip_tos; if (inp->inp_flags & INP_DONTFRAG) ip->ip_off = htons(IP_DF); else ip->ip_off = htons(0); ip->ip_p = inp->inp_ip_p; ip->ip_len = htons(m->m_pkthdr.len); ip->ip_src = inp->inp_laddr; ip->ip_dst.s_addr = dst; if (jailed(inp->inp_cred)) { /* * prison_local_ip4() would be good enough but would * let a source of INADDR_ANY pass, which we do not * want to see from jails. */ if (ip->ip_src.s_addr == INADDR_ANY) { error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src, inp->inp_cred); } else { error = prison_local_ip4(inp->inp_cred, &ip->ip_src); } if (error != 0) { INP_RUNLOCK(inp); m_freem(m); return (error); } } ip->ip_ttl = inp->inp_ip_ttl; } else { if (m->m_pkthdr.len > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } INP_RLOCK(inp); ip = mtod(m, struct ip *); error = prison_check_ip4(inp->inp_cred, &ip->ip_src); if (error != 0) { INP_RUNLOCK(inp); m_freem(m); return (error); } /* * Don't allow both user specified and setsockopt options, * and don't allow packet length sizes that will crash. */ if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) || (ntohs(ip->ip_len) != m->m_pkthdr.len) || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) { INP_RUNLOCK(inp); m_freem(m); return (EINVAL); } /* * This doesn't allow application to specify ID of zero, * but we got this limitation from the beginning of history. */ if (ip->ip_id == 0) ip_fillid(ip); /* * XXX prevent ip_output from overwriting header fields. */ flags |= IP_RAWOUTPUT; IPSTAT_INC(ips_rawout); } if (inp->inp_flags & INP_ONESBCAST) flags |= IP_SENDONES; #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif error = ip_output(m, inp->inp_options, NULL, flags, inp->inp_moptions, inp); INP_RUNLOCK(inp); return (error); } /* * Raw IP socket option processing. * * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could * only be created by a privileged process, and as such, socket option * operations to manage system properties on any raw socket were allowed to * take place without explicit additional access control checks. However, * raw sockets can now also be created in jail(), and therefore explicit * checks are now required. Likewise, raw sockets can be used by a process * after it gives up privilege, so some caution is required. For options * passed down to the IP layer via ip_ctloutput(), checks are assumed to be * performed in ip_ctloutput() and therefore no check occurs here. * Unilaterally checking priv_check() here breaks normal IP socket option * operations on raw sockets. * * When adding new socket options here, make sure to add access control * checks here as necessary. * * XXX-BZ inp locking? */ int rip_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp = sotoinpcb(so); int error, optval; if (sopt->sopt_level != IPPROTO_IP) { if ((sopt->sopt_level == SOL_SOCKET) && (sopt->sopt_name == SO_SETFIB)) { inp->inp_inc.inc_fibnum = so->so_fibnum; return (0); } return (EINVAL); } error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case IP_HDRINCL: optval = inp->inp_flags & INP_HDRINCL; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IP_FW3: /* generic ipfw v.3 functions */ case IP_FW_ADD: /* ADD actually returns the body... */ case IP_FW_GET: case IP_FW_TABLE_GETSIZE: case IP_FW_TABLE_LIST: case IP_FW_NAT_GET_CONFIG: case IP_FW_NAT_GET_LOG: if (V_ip_fw_ctl_ptr != NULL) error = V_ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ case IP_DUMMYNET_GET: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT; break ; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_mrouter_get ? ip_mrouter_get(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case IP_HDRINCL: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval) inp->inp_flags |= INP_HDRINCL; else inp->inp_flags &= ~INP_HDRINCL; break; case IP_FW3: /* generic ipfw v.3 functions */ case IP_FW_ADD: case IP_FW_DEL: case IP_FW_FLUSH: case IP_FW_ZERO: case IP_FW_RESETLOG: case IP_FW_TABLE_ADD: case IP_FW_TABLE_DEL: case IP_FW_TABLE_FLUSH: case IP_FW_NAT_CFG: case IP_FW_NAT_DEL: if (V_ip_fw_ctl_ptr != NULL) error = V_ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ case IP_DUMMYNET_CONFIGURE: case IP_DUMMYNET_DEL: case IP_DUMMYNET_FLUSH: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT ; break ; case IP_RSVP_ON: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_init(so); break; case IP_RSVP_OFF: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_done(); break; case IP_RSVP_VIF_ON: case IP_RSVP_VIF_OFF: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_vif ? ip_rsvp_vif(so, sopt) : EINVAL; break; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_mrouter_set ? ip_mrouter_set(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; } return (error); } /* * This function exists solely to receive the PRC_IFDOWN messages which are * sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, and calls * in_ifadown() to remove all routes corresponding to that address. It also * receives the PRC_IFUP messages from if_up() and reinstalls the interface * routes. */ void rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct rm_priotracker in_ifa_tracker; struct in_ifaddr *ia; struct ifnet *ifp; int err; int flags; switch (cmd) { case PRC_IFDOWN: IN_IFADDR_RLOCK(&in_ifa_tracker); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa && (ia->ia_flags & IFA_ROUTE)) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * in_scrubprefix() kills the interface route. */ in_scrubprefix(ia, 0); /* * in_ifadown gets rid of all the rest of the * routes. This is not quite the right thing * to do, but at least if we are running a * routing process they will come back. */ in_ifadown(&ia->ia_ifa, 0); ifa_free(&ia->ia_ifa); break; } } if (ia == NULL) /* If ia matched, already unlocked. */ IN_IFADDR_RUNLOCK(&in_ifa_tracker); break; case PRC_IFUP: IN_IFADDR_RLOCK(&in_ifa_tracker); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa) break; } if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return; } ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); flags = RTF_UP; ifp = ia->ia_ifa.ifa_ifp; if ((ifp->if_flags & IFF_LOOPBACK) || (ifp->if_flags & IFF_POINTOPOINT)) flags |= RTF_HOST; err = ifa_del_loopback_route((struct ifaddr *)ia, sa); err = rtinit(&ia->ia_ifa, RTM_ADD, flags); if (err == 0) ia->ia_flags |= IFA_ROUTE; err = ifa_add_loopback_route((struct ifaddr *)ia, sa); ifa_free(&ia->ia_ifa); break; } } static int rip_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("rip_attach: inp != NULL")); error = priv_check(td, PRIV_NETINET_RAW); if (error) return (error); if (proto >= IPPROTO_MAX || proto < 0) return EPROTONOSUPPORT; error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return (error); INP_INFO_WLOCK(&V_ripcbinfo); error = in_pcballoc(so, &V_ripcbinfo); if (error) { INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV4; inp->inp_ip_p = proto; inp->inp_ip_ttl = V_ip_defttl; rip_inshash(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); INP_WUNLOCK(inp); return (0); } static void rip_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_detach: inp == NULL")); KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, ("rip_detach: not closed")); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); if (so == V_ip_mrouter && ip_mrouter_done) ip_mrouter_done(); if (ip_rsvp_force_done) ip_rsvp_force_done(so); if (so == V_ip_rsvpd) ip_rsvp_done(); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); } static void rip_dodisconnect(struct socket *so, struct inpcb *inp) { struct inpcbinfo *pcbinfo; pcbinfo = inp->inp_pcbinfo; INP_INFO_WLOCK(pcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_faddr.s_addr = INADDR_ANY; rip_inshash(inp); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; SOCK_UNLOCK(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(pcbinfo); } static void rip_abort(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_abort: inp == NULL")); rip_dodisconnect(so, inp); } static void rip_close(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_close: inp == NULL")); rip_dodisconnect(so, inp); } static int rip_disconnect(struct socket *so) { struct inpcb *inp; if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_disconnect: inp == NULL")); rip_dodisconnect(so, inp); return (0); } static int rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; int error; if (nam->sa_len != sizeof(*addr)) return (EINVAL); error = prison_check_ip4(td->td_ucred, &addr->sin_addr); if (error != 0) return (error); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_bind: inp == NULL")); if (TAILQ_EMPTY(&V_ifnet) || (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) || (addr->sin_addr.s_addr && (inp->inp_flags & INP_BINDANY) == 0 && ifa_ifwithaddr_check((struct sockaddr *)addr) == 0)) return (EADDRNOTAVAIL); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_laddr = addr->sin_addr; rip_inshash(inp); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; if (nam->sa_len != sizeof(*addr)) return (EINVAL); if (TAILQ_EMPTY(&V_ifnet)) return (EADDRNOTAVAIL); if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) return (EAFNOSUPPORT); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_connect: inp == NULL")); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_faddr = addr->sin_addr; rip_inshash(inp); soisconnected(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return (0); } static int rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp; u_long dst; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_send: inp == NULL")); /* * Note: 'dst' reads below are unlocked. */ if (so->so_state & SS_ISCONNECTED) { if (nam) { m_freem(m); return (EISCONN); } dst = inp->inp_faddr.s_addr; /* Unlocked read. */ } else { if (nam == NULL) { m_freem(m); return (ENOTCONN); } dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; } return (rip_output(m, so, dst)); } #endif /* INET */ static int rip_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = V_ripcbinfo.ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); } if (req->newptr != 0) return (EPERM); /* * OK, now we're committed to doing something. */ INP_INFO_RLOCK(&V_ripcbinfo); gencnt = V_ripcbinfo.ipi_gencnt; n = V_ripcbinfo.ipi_count; INP_INFO_RUNLOCK(&V_ripcbinfo); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == NULL) return (ENOMEM); INP_INFO_RLOCK(&V_ripcbinfo); for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseeinpcb(req->td->td_ucred, inp) == 0) { in_pcbref(inp); inp_list[i++] = inp; } INP_WUNLOCK(inp); } INP_INFO_RUNLOCK(&V_ripcbinfo); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; bzero(&xi, sizeof(xi)); xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else INP_RUNLOCK(inp); } INP_INFO_WLOCK(&V_ripcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } INP_INFO_WUNLOCK(&V_ripcbinfo); if (!error) { /* * Give the user an updated idea of our state. If the * generation differs from what we told her before, she knows * that something happened while we were processing this * request, and it might be necessary to retry. */ INP_INFO_RLOCK(&V_ripcbinfo); xig.xig_gen = V_ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_ripcbinfo.ipi_count; INP_INFO_RUNLOCK(&V_ripcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); #ifdef INET struct pr_usrreqs rip_usrreqs = { .pru_abort = rip_abort, .pru_attach = rip_attach, .pru_bind = rip_bind, .pru_connect = rip_connect, .pru_control = in_control, .pru_detach = rip_detach, .pru_disconnect = rip_disconnect, .pru_peeraddr = in_getpeeraddr, .pru_send = rip_send, .pru_shutdown = rip_shutdown, .pru_sockaddr = in_getsockaddr, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = rip_close, }; #endif /* INET */ diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c index 5aa56ba809cc..038e6b0f817d 100644 --- a/sys/netinet/sctp_input.c +++ b/sys/netinet/sctp_input.c @@ -1,6237 +1,6243 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #endif #include static void sctp_stop_all_cookie_timers(struct sctp_tcb *stcb) { struct sctp_nets *net; /* * This now not only stops all cookie timers it also stops any INIT * timers as well. This will make sure that the timers are stopped * in all collision cases. */ SCTP_TCB_LOCK_ASSERT(stcb); TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (net->rxt_timer.type == SCTP_TIMER_TYPE_COOKIE) { sctp_timer_stop(SCTP_TIMER_TYPE_COOKIE, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_1); } else if (net->rxt_timer.type == SCTP_TIMER_TYPE_INIT) { sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_2); } } } /* INIT handler */ static void sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_init_chunk *cp, struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_no_unlock, uint8_t mflowtype, uint32_t mflowid, uint32_t vrf_id, uint16_t port) { struct sctp_init *init; struct mbuf *op_err; SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_init: handling INIT tcb:%p\n", (void *)stcb); if (stcb == NULL) { SCTP_INP_RLOCK(inp); } /* validate length */ if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_chunk)) { op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, ""); sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, port); if (stcb) *abort_no_unlock = 1; goto outnow; } /* validate parameters */ init = &cp->init; if (init->initiate_tag == 0) { /* protocol error... send abort */ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, ""); sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, port); if (stcb) *abort_no_unlock = 1; goto outnow; } if (ntohl(init->a_rwnd) < SCTP_MIN_RWND) { /* invalid parameter... send abort */ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, ""); sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, port); if (stcb) *abort_no_unlock = 1; goto outnow; } if (init->num_inbound_streams == 0) { /* protocol error... send abort */ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, ""); sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, port); if (stcb) *abort_no_unlock = 1; goto outnow; } if (init->num_outbound_streams == 0) { /* protocol error... send abort */ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, ""); sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, port); if (stcb) *abort_no_unlock = 1; goto outnow; } if (sctp_validate_init_auth_params(m, offset + sizeof(*cp), offset + ntohs(cp->ch.chunk_length))) { /* auth parameter(s) error... send abort */ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), "Problem with AUTH parameters"); sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, port); if (stcb) *abort_no_unlock = 1; goto outnow; } /* We are only accepting if we have a socket with positive * so_qlimit. */ if ((stcb == NULL) && ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (inp->sctp_socket == NULL) || (inp->sctp_socket->so_qlimit == 0))) { /* * FIX ME ?? What about TCP model and we have a * match/restart case? Actually no fix is needed. the lookup * will always find the existing assoc so stcb would not be * NULL. It may be questionable to do this since we COULD * just send back the INIT-ACK and hope that the app did * accept()'s by the time the COOKIE was sent. But there is * a price to pay for COOKIE generation and I don't want to * pay it on the chance that the app will actually do some * accepts(). The App just looses and should NOT be in this * state :-) */ if (SCTP_BASE_SYSCTL(sctp_blackhole) == 0) { op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), "No listener"); sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err, mflowtype, mflowid, inp->fibnum, vrf_id, port); } goto outnow; } if ((stcb != NULL) && (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT)) { SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending SHUTDOWN-ACK\n"); sctp_send_shutdown_ack(stcb, NULL); sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED); } else { SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n"); sctp_send_initiate_ack(inp, stcb, net, m, iphlen, offset, src, dst, sh, cp, mflowtype, mflowid, vrf_id, port, ((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED)); } outnow: if (stcb == NULL) { SCTP_INP_RUNLOCK(inp); } } /* * process peer "INIT/INIT-ACK" chunk returns value < 0 on error */ int sctp_is_there_unsent_data(struct sctp_tcb *stcb, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { int unsent_data; unsigned int i; struct sctp_stream_queue_pending *sp; struct sctp_association *asoc; /* * This function returns if any stream has true unsent data on it. * Note that as it looks through it will clean up any places that * have old data that has been sent but left at top of stream queue. */ asoc = &stcb->asoc; unsent_data = 0; SCTP_TCB_SEND_LOCK(stcb); if (!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) { /* Check to see if some data queued */ for (i = 0; i < stcb->asoc.streamoutcnt; i++) { /* sa_ignore FREED_MEMORY */ sp = TAILQ_FIRST(&stcb->asoc.strmout[i].outqueue); if (sp == NULL) { continue; } if ((sp->msg_is_complete) && (sp->length == 0) && (sp->sender_all_done)) { /* * We are doing differed cleanup. Last time * through when we took all the data the * sender_all_done was not set. */ if (sp->put_last_out == 0) { SCTP_PRINTF("Gak, put out entire msg with NO end!-1\n"); SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d\n", sp->sender_all_done, sp->length, sp->msg_is_complete, sp->put_last_out); } atomic_subtract_int(&stcb->asoc.stream_queue_cnt, 1); TAILQ_REMOVE(&stcb->asoc.strmout[i].outqueue, sp, next); stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, &asoc->strmout[i], sp, 1); if (sp->net) { sctp_free_remote_addr(sp->net); sp->net = NULL; } if (sp->data) { sctp_m_freem(sp->data); sp->data = NULL; } sctp_free_a_strmoq(stcb, sp, so_locked); if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) { unsent_data++; } } else { unsent_data++; } if (unsent_data > 0) { break; } } } SCTP_TCB_SEND_UNLOCK(stcb); return (unsent_data); } static int sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb) { struct sctp_init *init; struct sctp_association *asoc; struct sctp_nets *lnet; unsigned int i; init = &cp->init; asoc = &stcb->asoc; /* save off parameters */ asoc->peer_vtag = ntohl(init->initiate_tag); asoc->peers_rwnd = ntohl(init->a_rwnd); /* init tsn's */ asoc->highest_tsn_inside_map = asoc->asconf_seq_in = ntohl(init->initial_tsn) - 1; if (!TAILQ_EMPTY(&asoc->nets)) { /* update any ssthresh's that may have a default */ TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) { lnet->ssthresh = asoc->peers_rwnd; if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_INITIALIZATION); } } } SCTP_TCB_SEND_LOCK(stcb); if (asoc->pre_open_streams > ntohs(init->num_inbound_streams)) { unsigned int newcnt; struct sctp_stream_out *outs; struct sctp_stream_queue_pending *sp, *nsp; struct sctp_tmit_chunk *chk, *nchk; /* abandon the upper streams */ newcnt = ntohs(init->num_inbound_streams); TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) { if (chk->rec.data.sid >= newcnt) { TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next); asoc->send_queue_cnt--; if (asoc->strmout[chk->rec.data.sid].chunks_on_queues > 0) { asoc->strmout[chk->rec.data.sid].chunks_on_queues--; #ifdef INVARIANTS } else { panic("No chunks on the queues for sid %u.", chk->rec.data.sid); #endif } if (chk->data != NULL) { sctp_free_bufspace(stcb, asoc, chk, 1); sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb, 0, chk, SCTP_SO_NOT_LOCKED); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } } sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); /* sa_ignore FREED_MEMORY */ } } if (asoc->strmout) { for (i = newcnt; i < asoc->pre_open_streams; i++) { outs = &asoc->strmout[i]; TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) { atomic_subtract_int(&stcb->asoc.stream_queue_cnt, 1); TAILQ_REMOVE(&outs->outqueue, sp, next); stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, 1); sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb, 0, sp, SCTP_SO_NOT_LOCKED); if (sp->data) { sctp_m_freem(sp->data); sp->data = NULL; } if (sp->net) { sctp_free_remote_addr(sp->net); sp->net = NULL; } /* Free the chunk */ sctp_free_a_strmoq(stcb, sp, SCTP_SO_NOT_LOCKED); /* sa_ignore FREED_MEMORY */ } outs->state = SCTP_STREAM_CLOSED; } } /* cut back the count */ asoc->pre_open_streams = newcnt; } SCTP_TCB_SEND_UNLOCK(stcb); asoc->streamoutcnt = asoc->pre_open_streams; if (asoc->strmout) { for (i = 0; i < asoc->streamoutcnt; i++) { asoc->strmout[i].state = SCTP_STREAM_OPEN; } } /* EY - nr_sack: initialize highest tsn in nr_mapping_array */ asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(0, 5, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT); } /* This is the next one we expect */ asoc->str_reset_seq_in = asoc->asconf_seq_in + 1; asoc->mapping_array_base_tsn = ntohl(init->initial_tsn); asoc->tsn_last_delivered = asoc->cumulative_tsn = asoc->asconf_seq_in; asoc->advanced_peer_ack_point = asoc->last_acked_seq; /* open the requested streams */ if (asoc->strmin != NULL) { /* Free the old ones */ for (i = 0; i < asoc->streamincnt; i++) { sctp_clean_up_stream(stcb, &asoc->strmin[i].inqueue); sctp_clean_up_stream(stcb, &asoc->strmin[i].uno_inqueue); } SCTP_FREE(asoc->strmin, SCTP_M_STRMI); } if (asoc->max_inbound_streams > ntohs(init->num_outbound_streams)) { asoc->streamincnt = ntohs(init->num_outbound_streams); } else { asoc->streamincnt = asoc->max_inbound_streams; } SCTP_MALLOC(asoc->strmin, struct sctp_stream_in *, asoc->streamincnt * sizeof(struct sctp_stream_in), SCTP_M_STRMI); if (asoc->strmin == NULL) { /* we didn't get memory for the streams! */ SCTPDBG(SCTP_DEBUG_INPUT2, "process_init: couldn't get memory for the streams!\n"); return (-1); } for (i = 0; i < asoc->streamincnt; i++) { asoc->strmin[i].sid = i; asoc->strmin[i].last_mid_delivered = 0xffffffff; TAILQ_INIT(&asoc->strmin[i].inqueue); TAILQ_INIT(&asoc->strmin[i].uno_inqueue); asoc->strmin[i].pd_api_started = 0; asoc->strmin[i].delivery_started = 0; } /* * load_address_from_init will put the addresses into the * association when the COOKIE is processed or the INIT-ACK is * processed. Both types of COOKIE's existing and new call this * routine. It will remove addresses that are no longer in the * association (for the restarting case where addresses are * removed). Up front when the INIT arrives we will discard it if it * is a restart and new addresses have been added. */ /* sa_ignore MEMLEAK */ return (0); } /* * INIT-ACK message processing/consumption returns value < 0 on error */ static int sctp_process_init_ack(struct mbuf *m, int iphlen, int offset, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_no_unlock, uint8_t mflowtype, uint32_t mflowid, uint32_t vrf_id) { struct sctp_association *asoc; struct mbuf *op_err; int retval, abort_flag; uint32_t initack_limit; int nat_friendly = 0; /* First verify that we have no illegal param's */ abort_flag = 0; op_err = sctp_arethere_unrecognized_parameters(m, (offset + sizeof(struct sctp_init_chunk)), &abort_flag, (struct sctp_chunkhdr *)cp, &nat_friendly); if (abort_flag) { /* Send an abort and notify peer */ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); *abort_no_unlock = 1; return (-1); } asoc = &stcb->asoc; asoc->peer_supports_nat = (uint8_t)nat_friendly; /* process the peer's parameters in the INIT-ACK */ retval = sctp_process_init((struct sctp_init_chunk *)cp, stcb); if (retval < 0) { return (retval); } initack_limit = offset + ntohs(cp->ch.chunk_length); /* load all addresses */ if ((retval = sctp_load_addresses_from_init(stcb, m, (offset + sizeof(struct sctp_init_chunk)), initack_limit, src, dst, NULL, stcb->asoc.port))) { op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), "Problem with address parameters"); SCTPDBG(SCTP_DEBUG_INPUT1, "Load addresses from INIT causes an abort %d\n", retval); sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, net->port); *abort_no_unlock = 1; return (-1); } /* if the peer doesn't support asconf, flush the asconf queue */ if (asoc->asconf_supported == 0) { struct sctp_asconf_addr *param, *nparam; TAILQ_FOREACH_SAFE(param, &asoc->asconf_queue, next, nparam) { TAILQ_REMOVE(&asoc->asconf_queue, param, next); SCTP_FREE(param, SCTP_M_ASC_ADDR); } } stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs, stcb->asoc.local_hmacs); if (op_err) { sctp_queue_op_err(stcb, op_err); /* queuing will steal away the mbuf chain to the out queue */ op_err = NULL; } /* extract the cookie and queue it to "echo" it back... */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; net->error_count = 0; /* * Cancel the INIT timer, We do this first before queueing the * cookie. We always cancel at the primary to assue that we are * canceling the timer started by the INIT which always goes to the * primary. */ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, stcb, asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3); /* calculate the RTO */ net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered, sctp_align_safe_nocopy, SCTP_RTT_FROM_NON_DATA); retval = sctp_send_cookie_echo(m, offset, stcb, net); if (retval < 0) { /* * No cookie, we probably should send a op error. But in any * case if there is no cookie in the INIT-ACK, we can * abandon the peer, its broke. */ if (retval == -3) { uint16_t len; len = (uint16_t)(sizeof(struct sctp_error_missing_param) + sizeof(uint16_t)); /* We abort with an error of missing mandatory param */ op_err = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA); if (op_err != NULL) { struct sctp_error_missing_param *cause; SCTP_BUF_LEN(op_err) = len; cause = mtod(op_err, struct sctp_error_missing_param *); /* Subtract the reserved param */ cause->cause.code = htons(SCTP_CAUSE_MISSING_PARAM); cause->cause.length = htons(len); cause->num_missing_params = htonl(1); cause->type[0] = htons(SCTP_STATE_COOKIE); } sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, net->port); *abort_no_unlock = 1; } return (retval); } return (0); } static void sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp, struct sctp_tcb *stcb, struct sctp_nets *net) { union sctp_sockstore store; struct sctp_nets *r_net, *f_net; struct timeval tv; int req_prim = 0; uint16_t old_error_counter; if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_heartbeat_chunk)) { /* Invalid length */ return; } memset(&store, 0, sizeof(store)); switch (cp->heartbeat.hb_info.addr_family) { #ifdef INET case AF_INET: if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in)) { store.sin.sin_family = cp->heartbeat.hb_info.addr_family; store.sin.sin_len = cp->heartbeat.hb_info.addr_len; store.sin.sin_port = stcb->rport; memcpy(&store.sin.sin_addr, cp->heartbeat.hb_info.address, sizeof(store.sin.sin_addr)); } else { return; } break; #endif #ifdef INET6 case AF_INET6: if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in6)) { store.sin6.sin6_family = cp->heartbeat.hb_info.addr_family; store.sin6.sin6_len = cp->heartbeat.hb_info.addr_len; store.sin6.sin6_port = stcb->rport; memcpy(&store.sin6.sin6_addr, cp->heartbeat.hb_info.address, sizeof(struct in6_addr)); } else { return; } break; #endif default: return; } r_net = sctp_findnet(stcb, &store.sa); if (r_net == NULL) { SCTPDBG(SCTP_DEBUG_INPUT1, "Huh? I can't find the address I sent it to, discard\n"); return; } if ((r_net && (r_net->dest_state & SCTP_ADDR_UNCONFIRMED)) && (r_net->heartbeat_random1 == cp->heartbeat.hb_info.random_value1) && (r_net->heartbeat_random2 == cp->heartbeat.hb_info.random_value2)) { /* * If the its a HB and it's random value is correct when can * confirm the destination. */ r_net->dest_state &= ~SCTP_ADDR_UNCONFIRMED; if (r_net->dest_state & SCTP_ADDR_REQ_PRIMARY) { stcb->asoc.primary_destination = r_net; r_net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY; f_net = TAILQ_FIRST(&stcb->asoc.nets); if (f_net != r_net) { /* * first one on the list is NOT the primary * sctp_cmpaddr() is much more efficient if * the primary is the first on the list, * make it so. */ TAILQ_REMOVE(&stcb->asoc.nets, r_net, sctp_next); TAILQ_INSERT_HEAD(&stcb->asoc.nets, r_net, sctp_next); } req_prim = 1; } sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, stcb, 0, (void *)r_net, SCTP_SO_NOT_LOCKED); sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net); } old_error_counter = r_net->error_count; r_net->error_count = 0; r_net->hb_responded = 1; tv.tv_sec = cp->heartbeat.hb_info.time_value_1; tv.tv_usec = cp->heartbeat.hb_info.time_value_2; /* Now lets do a RTO with this */ r_net->RTO = sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv, sctp_align_safe_nocopy, SCTP_RTT_FROM_NON_DATA); if (!(r_net->dest_state & SCTP_ADDR_REACHABLE)) { r_net->dest_state |= SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, (void *)r_net, SCTP_SO_NOT_LOCKED); } if (r_net->dest_state & SCTP_ADDR_PF) { r_net->dest_state &= ~SCTP_ADDR_PF; stcb->asoc.cc_functions.sctp_cwnd_update_exit_pf(stcb, net); } if (old_error_counter > 0) { sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_5); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net); } if (r_net == stcb->asoc.primary_destination) { if (stcb->asoc.alternate) { /* release the alternate, primary is good */ sctp_free_remote_addr(stcb->asoc.alternate); stcb->asoc.alternate = NULL; } } /* Mobility adaptation */ if (req_prim) { if ((sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE) || sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_FASTHANDOFF)) && sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_PRIM_DELETED)) { sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6); if (sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_FASTHANDOFF)) { sctp_assoc_immediate_retrans(stcb, stcb->asoc.primary_destination); } if (sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) { sctp_move_chunks_from_net(stcb, stcb->asoc.deleted_primary); } sctp_delete_prim_timer(stcb->sctp_ep, stcb, stcb->asoc.deleted_primary); } } } static int sctp_handle_nat_colliding_state(struct sctp_tcb *stcb) { /* * return 0 means we want you to proceed with the abort non-zero * means no abort processing */ struct sctpasochead *head; if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) { /* generate a new vtag and send init */ LIST_REMOVE(stcb, sctp_asocs); stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1); head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))]; /* * put it in the bucket in the vtag hash of assoc's for the * system */ LIST_INSERT_HEAD(head, stcb, sctp_asocs); sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED); return (1); } if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) { /* * treat like a case where the cookie expired i.e.: - dump * current cookie. - generate a new vtag. - resend init. */ /* generate a new vtag and send init */ LIST_REMOVE(stcb, sctp_asocs); stcb->asoc.state &= ~SCTP_STATE_COOKIE_ECHOED; stcb->asoc.state |= SCTP_STATE_COOKIE_WAIT; sctp_stop_all_cookie_timers(stcb); sctp_toss_old_cookies(stcb, &stcb->asoc); stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1); head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))]; /* * put it in the bucket in the vtag hash of assoc's for the * system */ LIST_INSERT_HEAD(head, stcb, sctp_asocs); sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED); return (1); } return (0); } static int sctp_handle_nat_missing_state(struct sctp_tcb *stcb, struct sctp_nets *net) { /* * return 0 means we want you to proceed with the abort non-zero * means no abort processing */ if (stcb->asoc.auth_supported == 0) { SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_nat_missing_state: Peer does not support AUTH, cannot send an asconf\n"); return (0); } sctp_asconf_send_nat_state_update(stcb, net); return (1); } static void sctp_handle_abort(struct sctp_abort_chunk *abort, struct sctp_tcb *stcb, struct sctp_nets *net) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif uint16_t len; uint16_t error; SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: handling ABORT\n"); if (stcb == NULL) return; len = ntohs(abort->ch.chunk_length); if (len > sizeof(struct sctp_chunkhdr)) { /* * Need to check the cause codes for our two magic nat * aborts which don't kill the assoc necessarily. */ struct sctp_gen_error_cause *cause; cause = (struct sctp_gen_error_cause *)(abort + 1); error = ntohs(cause->code); if (error == SCTP_CAUSE_NAT_COLLIDING_STATE) { SCTPDBG(SCTP_DEBUG_INPUT2, "Received Colliding state abort flags:%x\n", abort->ch.chunk_flags); if (sctp_handle_nat_colliding_state(stcb)) { return; } } else if (error == SCTP_CAUSE_NAT_MISSING_STATE) { SCTPDBG(SCTP_DEBUG_INPUT2, "Received missing state abort flags:%x\n", abort->ch.chunk_flags); if (sctp_handle_nat_missing_state(stcb, net)) { return; } } } else { error = 0; } /* stop any receive timers */ sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_7); /* notify user of the abort and clean up... */ sctp_abort_notification(stcb, 1, error, abort, SCTP_SO_NOT_LOCKED); /* free the tcb */ SCTP_STAT_INCR_COUNTER32(sctps_aborted); if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } #ifdef SCTP_ASOCLOG_OF_TSNS sctp_print_out_track_log(stcb); #endif #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif stcb->asoc.state |= SCTP_STATE_WAS_ABORTED; (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: finished\n"); } static void sctp_start_net_timers(struct sctp_tcb *stcb) { uint32_t cnt_hb_sent; struct sctp_nets *net; cnt_hb_sent = 0; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { /* * For each network start: 1) A pmtu timer. 2) A HB timer 3) * If the dest in unconfirmed send a hb as well if under * max_hb_burst have been sent. */ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb, net); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) && (cnt_hb_sent < SCTP_BASE_SYSCTL(sctp_hb_maxburst))) { sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED); cnt_hb_sent++; } } if (cnt_hb_sent) { sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_COOKIE_ACK, SCTP_SO_NOT_LOCKED); } } static void sctp_handle_shutdown(struct sctp_shutdown_chunk *cp, struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_flag) { struct sctp_association *asoc; int some_on_streamwheel; int old_state; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_shutdown: handling SHUTDOWN\n"); if (stcb == NULL) return; asoc = &stcb->asoc; if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) || (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) { return; } if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_shutdown_chunk)) { /* Shutdown NOT the expected size */ return; } old_state = SCTP_GET_STATE(asoc); sctp_update_acked(stcb, cp, abort_flag); if (*abort_flag) { return; } if (asoc->control_pdapi) { /* * With a normal shutdown we assume the end of last record. */ SCTP_INP_READ_LOCK(stcb->sctp_ep); if (asoc->control_pdapi->on_strm_q) { struct sctp_stream_in *strm; strm = &asoc->strmin[asoc->control_pdapi->sinfo_stream]; if (asoc->control_pdapi->on_strm_q == SCTP_ON_UNORDERED) { /* Unordered */ TAILQ_REMOVE(&strm->uno_inqueue, asoc->control_pdapi, next_instrm); asoc->control_pdapi->on_strm_q = 0; } else if (asoc->control_pdapi->on_strm_q == SCTP_ON_ORDERED) { /* Ordered */ TAILQ_REMOVE(&strm->inqueue, asoc->control_pdapi, next_instrm); asoc->control_pdapi->on_strm_q = 0; #ifdef INVARIANTS } else { panic("Unknown state on ctrl:%p on_strm_q:%d", asoc->control_pdapi, asoc->control_pdapi->on_strm_q); #endif } } asoc->control_pdapi->end_added = 1; asoc->control_pdapi->pdapi_aborted = 1; asoc->control_pdapi = NULL; SCTP_INP_READ_UNLOCK(stcb->sctp_ep); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { /* assoc was freed while we were unlocked */ SCTP_SOCKET_UNLOCK(so, 1); return; } #endif if (stcb->sctp_socket) { sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket); } #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } /* goto SHUTDOWN_RECEIVED state to block new requests */ if (stcb->sctp_socket) { if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) && (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) && (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) { SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_RECEIVED); SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); /* * notify upper layer that peer has initiated a * shutdown */ sctp_ulp_notify(SCTP_NOTIFY_PEER_SHUTDOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED); /* reset time */ (void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered); } } if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) { /* * stop the shutdown timer, since we WILL move to * SHUTDOWN-ACK-SENT. */ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9); } /* Now is there unsent data on a stream somewhere? */ some_on_streamwheel = sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED); if (!TAILQ_EMPTY(&asoc->send_queue) || !TAILQ_EMPTY(&asoc->sent_queue) || some_on_streamwheel) { /* By returning we will push more data out */ return; } else { /* no outstanding data to send, so move on... */ /* send SHUTDOWN-ACK */ /* move to SHUTDOWN-ACK-SENT state */ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING); if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) { SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT); sctp_stop_timers_for_shutdown(stcb); sctp_send_shutdown_ack(stcb, net); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net); } else if (old_state == SCTP_STATE_SHUTDOWN_ACK_SENT) { sctp_send_shutdown_ack(stcb, net); } } } static void sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp SCTP_UNUSED, struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_association *asoc; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(stcb->sctp_ep); #endif SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_shutdown_ack: handling SHUTDOWN ACK\n"); if (stcb == NULL) return; asoc = &stcb->asoc; /* process according to association state */ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) || (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) { /* unexpected SHUTDOWN-ACK... do OOTB handling... */ sctp_send_shutdown_complete(stcb, net, 1); SCTP_TCB_UNLOCK(stcb); return; } if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) && (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) { /* unexpected SHUTDOWN-ACK... so ignore... */ SCTP_TCB_UNLOCK(stcb); return; } if (asoc->control_pdapi) { /* * With a normal shutdown we assume the end of last record. */ SCTP_INP_READ_LOCK(stcb->sctp_ep); asoc->control_pdapi->end_added = 1; asoc->control_pdapi->pdapi_aborted = 1; asoc->control_pdapi = NULL; SCTP_INP_READ_UNLOCK(stcb->sctp_ep); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { /* assoc was freed while we were unlocked */ SCTP_SOCKET_UNLOCK(so, 1); return; } #endif sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } #ifdef INVARIANTS if (!TAILQ_EMPTY(&asoc->send_queue) || !TAILQ_EMPTY(&asoc->sent_queue) || sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED)) { panic("Queues are not empty when handling SHUTDOWN-ACK"); } #endif /* stop the timer */ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_10); /* send SHUTDOWN-COMPLETE */ sctp_send_shutdown_complete(stcb, net, 0); /* notify upper layer protocol */ if (stcb->sctp_socket) { if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { stcb->sctp_socket->so_snd.sb_cc = 0; } sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED); } SCTP_STAT_INCR_COUNTER32(sctps_shutdown); /* free the TCB but first save off the ep */ #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_11); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } /* * Skip past the param header and then we will find the chunk that caused the * problem. There are two possibilities ASCONF or FWD-TSN other than that and * our peer must be broken. */ static void sctp_process_unrecog_chunk(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr, struct sctp_nets *net) { struct sctp_chunkhdr *chk; chk = (struct sctp_chunkhdr *)((caddr_t)phdr + sizeof(*phdr)); switch (chk->chunk_type) { case SCTP_ASCONF_ACK: case SCTP_ASCONF: sctp_asconf_cleanup(stcb, net); break; case SCTP_IFORWARD_CUM_TSN: case SCTP_FORWARD_CUM_TSN: stcb->asoc.prsctp_supported = 0; break; default: SCTPDBG(SCTP_DEBUG_INPUT2, "Peer does not support chunk type %d(%x)??\n", chk->chunk_type, (uint32_t)chk->chunk_type); break; } } /* * Skip past the param header and then we will find the param that caused the * problem. There are a number of param's in a ASCONF OR the prsctp param * these will turn of specific features. * XXX: Is this the right thing to do? */ static void sctp_process_unrecog_param(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr) { struct sctp_paramhdr *pbad; pbad = phdr + 1; switch (ntohs(pbad->param_type)) { /* pr-sctp draft */ case SCTP_PRSCTP_SUPPORTED: stcb->asoc.prsctp_supported = 0; break; case SCTP_SUPPORTED_CHUNK_EXT: break; /* draft-ietf-tsvwg-addip-sctp */ case SCTP_HAS_NAT_SUPPORT: stcb->asoc.peer_supports_nat = 0; break; case SCTP_ADD_IP_ADDRESS: case SCTP_DEL_IP_ADDRESS: case SCTP_SET_PRIM_ADDR: stcb->asoc.asconf_supported = 0; break; case SCTP_SUCCESS_REPORT: case SCTP_ERROR_CAUSE_IND: SCTPDBG(SCTP_DEBUG_INPUT2, "Huh, the peer does not support success? or error cause?\n"); SCTPDBG(SCTP_DEBUG_INPUT2, "Turning off ASCONF to this strange peer\n"); stcb->asoc.asconf_supported = 0; break; default: SCTPDBG(SCTP_DEBUG_INPUT2, "Peer does not support param type %d(%x)??\n", pbad->param_type, (uint32_t)pbad->param_type); break; } } static int sctp_handle_error(struct sctp_chunkhdr *ch, struct sctp_tcb *stcb, struct sctp_nets *net) { int chklen; struct sctp_paramhdr *phdr; uint16_t error, error_type; uint16_t error_len; struct sctp_association *asoc; int adjust; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif /* parse through all of the errors and process */ asoc = &stcb->asoc; phdr = (struct sctp_paramhdr *)((caddr_t)ch + sizeof(struct sctp_chunkhdr)); chklen = ntohs(ch->chunk_length) - sizeof(struct sctp_chunkhdr); error = 0; while ((size_t)chklen >= sizeof(struct sctp_paramhdr)) { /* Process an Error Cause */ error_type = ntohs(phdr->param_type); error_len = ntohs(phdr->param_length); if ((error_len > chklen) || (error_len == 0)) { /* invalid param length for this param */ SCTPDBG(SCTP_DEBUG_INPUT1, "Bogus length in error param- chunk left:%d errorlen:%d\n", chklen, error_len); return (0); } if (error == 0) { /* report the first error cause */ error = error_type; } switch (error_type) { case SCTP_CAUSE_INVALID_STREAM: case SCTP_CAUSE_MISSING_PARAM: case SCTP_CAUSE_INVALID_PARAM: case SCTP_CAUSE_NO_USER_DATA: SCTPDBG(SCTP_DEBUG_INPUT1, "Software error we got a %d back? We have a bug :/ (or do they?)\n", error_type); break; case SCTP_CAUSE_NAT_COLLIDING_STATE: SCTPDBG(SCTP_DEBUG_INPUT2, "Received Colliding state abort flags:%x\n", ch->chunk_flags); if (sctp_handle_nat_colliding_state(stcb)) { return (0); } break; case SCTP_CAUSE_NAT_MISSING_STATE: SCTPDBG(SCTP_DEBUG_INPUT2, "Received missing state abort flags:%x\n", ch->chunk_flags); if (sctp_handle_nat_missing_state(stcb, net)) { return (0); } break; case SCTP_CAUSE_STALE_COOKIE: /* * We only act if we have echoed a cookie and are * waiting. */ if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) { int *p; p = (int *)((caddr_t)phdr + sizeof(*phdr)); /* Save the time doubled */ asoc->cookie_preserve_req = ntohl(*p) << 1; asoc->stale_cookie_count++; if (asoc->stale_cookie_count > asoc->max_init_times) { sctp_abort_notification(stcb, 0, 0, NULL, SCTP_SO_NOT_LOCKED); /* now free the asoc */ #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_12); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif return (-1); } /* blast back to INIT state */ sctp_toss_old_cookies(stcb, &stcb->asoc); asoc->state &= ~SCTP_STATE_COOKIE_ECHOED; asoc->state |= SCTP_STATE_COOKIE_WAIT; sctp_stop_all_cookie_timers(stcb); sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED); } break; case SCTP_CAUSE_UNRESOLVABLE_ADDR: /* * Nothing we can do here, we don't do hostname * addresses so if the peer does not like my IPv6 * (or IPv4 for that matter) it does not matter. If * they don't support that type of address, they can * NOT possibly get that packet type... i.e. with no * IPv6 you can't receive a IPv6 packet. so we can * safely ignore this one. If we ever added support * for HOSTNAME Addresses, then we would need to do * something here. */ break; case SCTP_CAUSE_UNRECOG_CHUNK: sctp_process_unrecog_chunk(stcb, phdr, net); break; case SCTP_CAUSE_UNRECOG_PARAM: sctp_process_unrecog_param(stcb, phdr); break; case SCTP_CAUSE_COOKIE_IN_SHUTDOWN: /* * We ignore this since the timer will drive out a * new cookie anyway and there timer will drive us * to send a SHUTDOWN_COMPLETE. We can't send one * here since we don't have their tag. */ break; case SCTP_CAUSE_DELETING_LAST_ADDR: case SCTP_CAUSE_RESOURCE_SHORTAGE: case SCTP_CAUSE_DELETING_SRC_ADDR: /* * We should NOT get these here, but in a * ASCONF-ACK. */ SCTPDBG(SCTP_DEBUG_INPUT2, "Peer sends ASCONF errors in a Operational Error?<%d>?\n", error_type); break; case SCTP_CAUSE_OUT_OF_RESC: /* * And what, pray tell do we do with the fact that * the peer is out of resources? Not really sure we * could do anything but abort. I suspect this * should have came WITH an abort instead of in a * OP-ERROR. */ break; default: SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_handle_error: unknown error type = 0x%xh\n", error_type); break; } adjust = SCTP_SIZE32(error_len); chklen -= adjust; phdr = (struct sctp_paramhdr *)((caddr_t)phdr + adjust); } sctp_ulp_notify(SCTP_NOTIFY_REMOTE_ERROR, stcb, error, ch, SCTP_SO_NOT_LOCKED); return (0); } static int sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_no_unlock, uint8_t mflowtype, uint32_t mflowid, uint32_t vrf_id) { struct sctp_init_ack *init_ack; struct mbuf *op_err; SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_init_ack: handling INIT-ACK\n"); if (stcb == NULL) { SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_init_ack: TCB is null\n"); return (-1); } if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_ack_chunk)) { /* Invalid length */ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, ""); sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, net->port); *abort_no_unlock = 1; return (-1); } init_ack = &cp->init; /* validate parameters */ if (init_ack->initiate_tag == 0) { /* protocol error... send an abort */ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, ""); sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, net->port); *abort_no_unlock = 1; return (-1); } if (ntohl(init_ack->a_rwnd) < SCTP_MIN_RWND) { /* protocol error... send an abort */ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, ""); sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, net->port); *abort_no_unlock = 1; return (-1); } if (init_ack->num_inbound_streams == 0) { /* protocol error... send an abort */ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, ""); sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, net->port); *abort_no_unlock = 1; return (-1); } if (init_ack->num_outbound_streams == 0) { /* protocol error... send an abort */ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, ""); sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, net->port); *abort_no_unlock = 1; return (-1); } /* process according to association state... */ switch (stcb->asoc.state & SCTP_STATE_MASK) { case SCTP_STATE_COOKIE_WAIT: /* this is the expected state for this chunk */ /* process the INIT-ACK parameters */ if (stcb->asoc.primary_destination->dest_state & SCTP_ADDR_UNCONFIRMED) { /* * The primary is where we sent the INIT, we can * always consider it confirmed when the INIT-ACK is * returned. Do this before we load addresses * though. */ stcb->asoc.primary_destination->dest_state &= ~SCTP_ADDR_UNCONFIRMED; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, stcb, 0, (void *)stcb->asoc.primary_destination, SCTP_SO_NOT_LOCKED); } if (sctp_process_init_ack(m, iphlen, offset, src, dst, sh, cp, stcb, net, abort_no_unlock, mflowtype, mflowid, vrf_id) < 0) { /* error in parsing parameters */ return (-1); } /* update our state */ SCTPDBG(SCTP_DEBUG_INPUT2, "moving to COOKIE-ECHOED state\n"); SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_ECHOED); /* reset the RTO calc */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); /* * collapse the init timer back in case of a exponential * backoff */ sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, stcb->sctp_ep, stcb, net); /* * the send at the end of the inbound data processing will * cause the cookie to be sent */ break; case SCTP_STATE_SHUTDOWN_SENT: /* incorrect state... discard */ break; case SCTP_STATE_COOKIE_ECHOED: /* incorrect state... discard */ break; case SCTP_STATE_OPEN: /* incorrect state... discard */ break; case SCTP_STATE_EMPTY: case SCTP_STATE_INUSE: default: /* incorrect state... discard */ return (-1); break; } SCTPDBG(SCTP_DEBUG_INPUT1, "Leaving handle-init-ack end\n"); return (0); } static struct sctp_tcb * sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len, struct sctp_inpcb *inp, struct sctp_nets **netp, struct sockaddr *init_src, int *notification, int auth_skipped, uint32_t auth_offset, uint32_t auth_len, uint8_t mflowtype, uint32_t mflowid, uint32_t vrf_id, uint16_t port); /* * handle a state cookie for an existing association m: input packet mbuf * chain-- assumes a pullup on IP/SCTP/COOKIE-ECHO chunk note: this is a * "split" mbuf and the cookie signature does not exist offset: offset into * mbuf to the cookie-echo chunk */ static struct sctp_tcb * sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len, struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets **netp, struct sockaddr *init_src, int *notification, int auth_skipped, uint32_t auth_offset, uint32_t auth_len, uint8_t mflowtype, uint32_t mflowid, uint32_t vrf_id, uint16_t port) { struct sctp_association *asoc; struct sctp_init_chunk *init_cp, init_buf; struct sctp_init_ack_chunk *initack_cp, initack_buf; struct sctp_nets *net; struct mbuf *op_err; int init_offset, initack_offset, i; int retval; int spec_flag = 0; uint32_t how_indx; #if defined(SCTP_DETAILED_STR_STATS) int j; #endif net = *netp; /* I know that the TCB is non-NULL from the caller */ asoc = &stcb->asoc; for (how_indx = 0; how_indx < sizeof(asoc->cookie_how); how_indx++) { if (asoc->cookie_how[how_indx] == 0) break; } if (how_indx < sizeof(asoc->cookie_how)) { asoc->cookie_how[how_indx] = 1; } if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) { /* SHUTDOWN came in after sending INIT-ACK */ sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination); op_err = sctp_generate_cause(SCTP_CAUSE_COOKIE_IN_SHUTDOWN, ""); sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err, mflowtype, mflowid, inp->fibnum, vrf_id, net->port); if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 2; return (NULL); } /* * find and validate the INIT chunk in the cookie (peer's info) the * INIT should start after the cookie-echo header struct (chunk * header, state cookie header struct) */ init_offset = offset += sizeof(struct sctp_cookie_echo_chunk); init_cp = (struct sctp_init_chunk *) sctp_m_getptr(m, init_offset, sizeof(struct sctp_init_chunk), (uint8_t *)&init_buf); if (init_cp == NULL) { /* could not pull a INIT chunk in cookie */ return (NULL); } if (init_cp->ch.chunk_type != SCTP_INITIATION) { return (NULL); } /* * find and validate the INIT-ACK chunk in the cookie (my info) the * INIT-ACK follows the INIT chunk */ initack_offset = init_offset + SCTP_SIZE32(ntohs(init_cp->ch.chunk_length)); initack_cp = (struct sctp_init_ack_chunk *) sctp_m_getptr(m, initack_offset, sizeof(struct sctp_init_ack_chunk), (uint8_t *)&initack_buf); if (initack_cp == NULL) { /* could not pull INIT-ACK chunk in cookie */ return (NULL); } if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) { return (NULL); } if ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) && (ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag)) { /* * case D in Section 5.2.4 Table 2: MMAA process accordingly * to get into the OPEN state */ if (ntohl(initack_cp->init.initial_tsn) != asoc->init_seq_number) { /*- * Opps, this means that we somehow generated two vtag's * the same. I.e. we did: * Us Peer * <---INIT(tag=a)------ * ----INIT-ACK(tag=t)--> * ----INIT(tag=t)------> *1 * <---INIT-ACK(tag=a)--- * <----CE(tag=t)------------- *2 * * At point *1 we should be generating a different * tag t'. Which means we would throw away the CE and send * ours instead. Basically this is case C (throw away side). */ if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 17; return (NULL); } switch (SCTP_GET_STATE(asoc)) { case SCTP_STATE_COOKIE_WAIT: case SCTP_STATE_COOKIE_ECHOED: /* * INIT was sent but got a COOKIE_ECHO with the * correct tags... just accept it...but we must * process the init so that we can make sure we have * the right seq no's. */ /* First we must process the INIT !! */ retval = sctp_process_init(init_cp, stcb); if (retval < 0) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 3; return (NULL); } /* we have already processed the INIT so no problem */ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_13); sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_14); /* update current state */ if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) SCTP_STAT_INCR_COUNTER32(sctps_activeestab); else SCTP_STAT_INCR_COUNTER32(sctps_collisionestab); SCTP_SET_STATE(asoc, SCTP_STATE_OPEN); if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) { sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); } SCTP_STAT_INCR_GAUGE32(sctps_currestab); sctp_stop_all_cookie_timers(stcb); if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) && (inp->sctp_socket->so_qlimit == 0) ) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif /* * Here is where collision would go if we * did a connect() and instead got a * init/init-ack/cookie done before the * init-ack came back.. */ stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_add_int(&stcb->asoc.refcnt, -1); if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { SCTP_SOCKET_UNLOCK(so, 1); return (NULL); } #endif soisconnected(stcb->sctp_socket); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } /* notify upper layer */ *notification = SCTP_NOTIFY_ASSOC_UP; /* * since we did not send a HB make sure we don't * double things */ net->hb_responded = 1; net->RTO = sctp_calculate_rto(stcb, asoc, net, &cookie->time_entered, sctp_align_unsafe_makecopy, SCTP_RTT_FROM_NON_DATA); if (stcb->asoc.sctp_autoclose_ticks && (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE))) { sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL); } break; default: /* * we're in the OPEN state (or beyond), so peer must * have simply lost the COOKIE-ACK */ break; } /* end switch */ sctp_stop_all_cookie_timers(stcb); /* * We ignore the return code here.. not sure if we should * somehow abort.. but we do have an existing asoc. This * really should not fail. */ if (sctp_load_addresses_from_init(stcb, m, init_offset + sizeof(struct sctp_init_chunk), initack_offset, src, dst, init_src, stcb->asoc.port)) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 4; return (NULL); } /* respond with a COOKIE-ACK */ sctp_toss_old_cookies(stcb, asoc); sctp_send_cookie_ack(stcb); if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 5; return (stcb); } if (ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag && ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag && cookie->tie_tag_my_vtag == 0 && cookie->tie_tag_peer_vtag == 0) { /* * case C in Section 5.2.4 Table 2: XMOO silently discard */ if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 6; return (NULL); } /* * If nat support, and the below and stcb is established, send back * a ABORT(colliding state) if we are established. */ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) && (asoc->peer_supports_nat) && ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) && ((ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) || (asoc->peer_vtag == 0)))) { /* * Special case - Peer's support nat. We may have two init's * that we gave out the same tag on since one was not * established.. i.e. we get INIT from host-1 behind the nat * and we respond tag-a, we get a INIT from host-2 behind * the nat and we get tag-a again. Then we bring up host-1 * (or 2's) assoc, Then comes the cookie from hsot-2 (or 1). * Now we have colliding state. We must send an abort here * with colliding state indication. */ op_err = sctp_generate_cause(SCTP_CAUSE_NAT_COLLIDING_STATE, ""); sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err, mflowtype, mflowid, inp->fibnum, vrf_id, port); return (NULL); } if ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) && ((ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) || (asoc->peer_vtag == 0))) { /* * case B in Section 5.2.4 Table 2: MXAA or MOAA my info * should be ok, re-accept peer info */ if (ntohl(initack_cp->init.initial_tsn) != asoc->init_seq_number) { /* * Extension of case C. If we hit this, then the * random number generator returned the same vtag * when we first sent our INIT-ACK and when we later * sent our INIT. The side with the seq numbers that * are different will be the one that normnally * would have hit case C. This in effect "extends" * our vtags in this collision case to be 64 bits. * The same collision could occur aka you get both * vtag and seq number the same twice in a row.. but * is much less likely. If it did happen then we * would proceed through and bring up the assoc.. we * may end up with the wrong stream setup however.. * which would be bad.. but there is no way to * tell.. until we send on a stream that does not * exist :-) */ if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 7; return (NULL); } if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 8; sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15); sctp_stop_all_cookie_timers(stcb); /* * since we did not send a HB make sure we don't double * things */ net->hb_responded = 1; if (stcb->asoc.sctp_autoclose_ticks && sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) { sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL); } asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd); asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams); if (ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) { /* * Ok the peer probably discarded our data (if we * echoed a cookie+data). So anything on the * sent_queue should be marked for retransmit, we * may not get something to kick us so it COULD * still take a timeout to move these.. but it can't * hurt to mark them. */ struct sctp_tmit_chunk *chk; TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) { if (chk->sent < SCTP_DATAGRAM_RESEND) { chk->sent = SCTP_DATAGRAM_RESEND; sctp_flight_size_decrease(chk); sctp_total_flight_decrease(stcb, chk); sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); spec_flag++; } } } /* process the INIT info (peer's info) */ retval = sctp_process_init(init_cp, stcb); if (retval < 0) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 9; return (NULL); } if (sctp_load_addresses_from_init(stcb, m, init_offset + sizeof(struct sctp_init_chunk), initack_offset, src, dst, init_src, stcb->asoc.port)) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 10; return (NULL); } if ((asoc->state & SCTP_STATE_COOKIE_WAIT) || (asoc->state & SCTP_STATE_COOKIE_ECHOED)) { *notification = SCTP_NOTIFY_ASSOC_UP; if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) && (inp->sctp_socket->so_qlimit == 0)) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_add_int(&stcb->asoc.refcnt, -1); if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { SCTP_SOCKET_UNLOCK(so, 1); return (NULL); } #endif soisconnected(stcb->sctp_socket); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) SCTP_STAT_INCR_COUNTER32(sctps_activeestab); else SCTP_STAT_INCR_COUNTER32(sctps_collisionestab); SCTP_STAT_INCR_GAUGE32(sctps_currestab); } else if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) { SCTP_STAT_INCR_COUNTER32(sctps_restartestab); } else { SCTP_STAT_INCR_COUNTER32(sctps_collisionestab); } SCTP_SET_STATE(asoc, SCTP_STATE_OPEN); if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) { sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); } sctp_stop_all_cookie_timers(stcb); sctp_toss_old_cookies(stcb, asoc); sctp_send_cookie_ack(stcb); if (spec_flag) { /* * only if we have retrans set do we do this. What * this call does is get only the COOKIE-ACK out and * then when we return the normal call to * sctp_chunk_output will get the retrans out behind * this. */ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_COOKIE_ACK, SCTP_SO_NOT_LOCKED); } if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 11; return (stcb); } if ((ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag && ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) && cookie->tie_tag_my_vtag == asoc->my_vtag_nonce && cookie->tie_tag_peer_vtag == asoc->peer_vtag_nonce && cookie->tie_tag_peer_vtag != 0) { struct sctpasochead *head; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif if (asoc->peer_supports_nat) { /* * This is a gross gross hack. Just call the * cookie_new code since we are allowing a duplicate * association. I hope this works... */ return (sctp_process_cookie_new(m, iphlen, offset, src, dst, sh, cookie, cookie_len, inp, netp, init_src, notification, auth_skipped, auth_offset, auth_len, mflowtype, mflowid, vrf_id, port)); } /* * case A in Section 5.2.4 Table 2: XXMM (peer restarted) */ /* temp code */ if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 12; sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16); sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_17); /* notify upper layer */ *notification = SCTP_NOTIFY_ASSOC_RESTART; atomic_add_int(&stcb->asoc.refcnt, 1); if ((SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN) && (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) && (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) { SCTP_STAT_INCR_GAUGE32(sctps_currestab); } if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) { SCTP_STAT_INCR_GAUGE32(sctps_restartestab); } else if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) { SCTP_STAT_INCR_GAUGE32(sctps_collisionestab); } if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) { SCTP_SET_STATE(asoc, SCTP_STATE_OPEN); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); } else if (!(asoc->state & SCTP_STATE_SHUTDOWN_SENT)) { /* move to OPEN state, if not in SHUTDOWN_SENT */ SCTP_SET_STATE(asoc, SCTP_STATE_OPEN); } asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams); asoc->init_seq_number = ntohl(initack_cp->init.initial_tsn); asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number; asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1; asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1; asoc->str_reset_seq_in = asoc->init_seq_number; asoc->advanced_peer_ack_point = asoc->last_acked_seq; if (asoc->mapping_array) { memset(asoc->mapping_array, 0, asoc->mapping_array_size); } if (asoc->nr_mapping_array) { memset(asoc->nr_mapping_array, 0, asoc->mapping_array_size); } SCTP_TCB_UNLOCK(stcb); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); SCTP_SOCKET_LOCK(so, 1); #endif SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(stcb->sctp_ep); SCTP_TCB_LOCK(stcb); atomic_add_int(&stcb->asoc.refcnt, -1); /* send up all the data */ SCTP_TCB_SEND_LOCK(stcb); sctp_report_all_outbound(stcb, 0, 1, SCTP_SO_LOCKED); for (i = 0; i < stcb->asoc.streamoutcnt; i++) { stcb->asoc.strmout[i].chunks_on_queues = 0; #if defined(SCTP_DETAILED_STR_STATS) for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) { asoc->strmout[i].abandoned_sent[j] = 0; asoc->strmout[i].abandoned_unsent[j] = 0; } #else asoc->strmout[i].abandoned_sent[0] = 0; asoc->strmout[i].abandoned_unsent[0] = 0; #endif stcb->asoc.strmout[i].sid = i; stcb->asoc.strmout[i].next_mid_ordered = 0; stcb->asoc.strmout[i].next_mid_unordered = 0; stcb->asoc.strmout[i].last_msg_incomplete = 0; } /* process the INIT-ACK info (my info) */ asoc->my_vtag = ntohl(initack_cp->init.initiate_tag); asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd); /* pull from vtag hash */ LIST_REMOVE(stcb, sctp_asocs); /* re-insert to new vtag position */ head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))]; /* * put it in the bucket in the vtag hash of assoc's for the * system */ LIST_INSERT_HEAD(head, stcb, sctp_asocs); SCTP_TCB_SEND_UNLOCK(stcb); SCTP_INP_WUNLOCK(stcb->sctp_ep); SCTP_INP_INFO_WUNLOCK(); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif asoc->total_flight = 0; asoc->total_flight_count = 0; /* process the INIT info (peer's info) */ retval = sctp_process_init(init_cp, stcb); if (retval < 0) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 13; return (NULL); } /* * since we did not send a HB make sure we don't double * things */ net->hb_responded = 1; if (sctp_load_addresses_from_init(stcb, m, init_offset + sizeof(struct sctp_init_chunk), initack_offset, src, dst, init_src, stcb->asoc.port)) { if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 14; return (NULL); } /* respond with a COOKIE-ACK */ sctp_stop_all_cookie_timers(stcb); sctp_toss_old_cookies(stcb, asoc); sctp_send_cookie_ack(stcb); if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 15; return (stcb); } if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 16; /* all other cases... */ return (NULL); } /* * handle a state cookie for a new association m: input packet mbuf chain-- * assumes a pullup on IP/SCTP/COOKIE-ECHO chunk note: this is a "split" mbuf * and the cookie signature does not exist offset: offset into mbuf to the * cookie-echo chunk length: length of the cookie chunk to: where the init * was from returns a new TCB */ static struct sctp_tcb * sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len, struct sctp_inpcb *inp, struct sctp_nets **netp, struct sockaddr *init_src, int *notification, int auth_skipped, uint32_t auth_offset, uint32_t auth_len, uint8_t mflowtype, uint32_t mflowid, uint32_t vrf_id, uint16_t port) { struct sctp_tcb *stcb; struct sctp_init_chunk *init_cp, init_buf; struct sctp_init_ack_chunk *initack_cp, initack_buf; union sctp_sockstore store; struct sctp_association *asoc; int init_offset, initack_offset, initack_limit; int retval; int error = 0; uint8_t auth_chunk_buf[SCTP_PARAM_BUFFER_SIZE]; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; so = SCTP_INP_SO(inp); #endif /* * find and validate the INIT chunk in the cookie (peer's info) the * INIT should start after the cookie-echo header struct (chunk * header, state cookie header struct) */ init_offset = offset + sizeof(struct sctp_cookie_echo_chunk); init_cp = (struct sctp_init_chunk *) sctp_m_getptr(m, init_offset, sizeof(struct sctp_init_chunk), (uint8_t *)&init_buf); if (init_cp == NULL) { /* could not pull a INIT chunk in cookie */ SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: could not pull INIT chunk hdr\n"); return (NULL); } if (init_cp->ch.chunk_type != SCTP_INITIATION) { SCTPDBG(SCTP_DEBUG_INPUT1, "HUH? process_cookie_new: could not find INIT chunk!\n"); return (NULL); } initack_offset = init_offset + SCTP_SIZE32(ntohs(init_cp->ch.chunk_length)); /* * find and validate the INIT-ACK chunk in the cookie (my info) the * INIT-ACK follows the INIT chunk */ initack_cp = (struct sctp_init_ack_chunk *) sctp_m_getptr(m, initack_offset, sizeof(struct sctp_init_ack_chunk), (uint8_t *)&initack_buf); if (initack_cp == NULL) { /* could not pull INIT-ACK chunk in cookie */ SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: could not pull INIT-ACK chunk hdr\n"); return (NULL); } if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) { return (NULL); } /* * NOTE: We can't use the INIT_ACK's chk_length to determine the * "initack_limit" value. This is because the chk_length field * includes the length of the cookie, but the cookie is omitted when * the INIT and INIT_ACK are tacked onto the cookie... */ initack_limit = offset + cookie_len; /* * now that we know the INIT/INIT-ACK are in place, create a new TCB * and popluate */ /* * Here we do a trick, we set in NULL for the proc/thread argument. * We do this since in effect we only use the p argument when the * socket is unbound and we must do an implicit bind. Since we are * getting a cookie, we cannot be unbound. */ stcb = sctp_aloc_assoc(inp, init_src, &error, ntohl(initack_cp->init.initiate_tag), vrf_id, ntohs(initack_cp->init.num_outbound_streams), port, (struct thread *)NULL ); if (stcb == NULL) { struct mbuf *op_err; /* memory problem? */ SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: no room for another TCB!\n"); op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, ""); sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, port); return (NULL); } /* get the correct sctp_nets */ if (netp) *netp = sctp_findnet(stcb, init_src); asoc = &stcb->asoc; /* get scope variables out of cookie */ asoc->scope.ipv4_local_scope = cookie->ipv4_scope; asoc->scope.site_scope = cookie->site_scope; asoc->scope.local_scope = cookie->local_scope; asoc->scope.loopback_scope = cookie->loopback_scope; if ((asoc->scope.ipv4_addr_legal != cookie->ipv4_addr_legal) || (asoc->scope.ipv6_addr_legal != cookie->ipv6_addr_legal)) { struct mbuf *op_err; /* * Houston we have a problem. The EP changed while the * cookie was in flight. Only recourse is to abort the * association. */ atomic_add_int(&stcb->asoc.refcnt, 1); op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, ""); sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, port); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_18); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif atomic_subtract_int(&stcb->asoc.refcnt, 1); return (NULL); } /* process the INIT-ACK info (my info) */ asoc->my_vtag = ntohl(initack_cp->init.initiate_tag); asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd); asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams); asoc->init_seq_number = ntohl(initack_cp->init.initial_tsn); asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number; asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1; asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1; asoc->str_reset_seq_in = asoc->init_seq_number; asoc->advanced_peer_ack_point = asoc->last_acked_seq; /* process the INIT info (peer's info) */ if (netp) retval = sctp_process_init(init_cp, stcb); else retval = 0; if (retval < 0) { atomic_add_int(&stcb->asoc.refcnt, 1); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_19); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif atomic_subtract_int(&stcb->asoc.refcnt, 1); return (NULL); } /* load all addresses */ if (sctp_load_addresses_from_init(stcb, m, init_offset + sizeof(struct sctp_init_chunk), initack_offset, src, dst, init_src, port)) { atomic_add_int(&stcb->asoc.refcnt, 1); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_20); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif atomic_subtract_int(&stcb->asoc.refcnt, 1); return (NULL); } /* * verify any preceding AUTH chunk that was skipped */ /* pull the local authentication parameters from the cookie/init-ack */ sctp_auth_get_cookie_params(stcb, m, initack_offset + sizeof(struct sctp_init_ack_chunk), initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk))); if (auth_skipped) { struct sctp_auth_chunk *auth; auth = (struct sctp_auth_chunk *) sctp_m_getptr(m, auth_offset, auth_len, auth_chunk_buf); if ((auth == NULL) || sctp_handle_auth(stcb, auth, m, auth_offset)) { /* auth HMAC failed, dump the assoc and packet */ SCTPDBG(SCTP_DEBUG_AUTH1, "COOKIE-ECHO: AUTH failed\n"); atomic_add_int(&stcb->asoc.refcnt, 1); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_21); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif atomic_subtract_int(&stcb->asoc.refcnt, 1); return (NULL); } else { /* remaining chunks checked... good to go */ stcb->asoc.authenticated = 1; } } /* update current state */ SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n"); SCTP_SET_STATE(asoc, SCTP_STATE_OPEN); if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) { sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); } sctp_stop_all_cookie_timers(stcb); SCTP_STAT_INCR_COUNTER32(sctps_passiveestab); SCTP_STAT_INCR_GAUGE32(sctps_currestab); /* * if we're doing ASCONFs, check to see if we have any new local * addresses that need to get added to the peer (eg. addresses * changed while cookie echo in flight). This needs to be done * after we go to the OPEN state to do the correct asconf * processing. else, make sure we have the correct addresses in our * lists */ /* warning, we re-use sin, sin6, sa_store here! */ /* pull in local_address (our "from" address) */ switch (cookie->laddr_type) { #ifdef INET case SCTP_IPV4_ADDRESS: /* source addr is IPv4 */ memset(&store.sin, 0, sizeof(struct sockaddr_in)); store.sin.sin_family = AF_INET; store.sin.sin_len = sizeof(struct sockaddr_in); store.sin.sin_addr.s_addr = cookie->laddress[0]; break; #endif #ifdef INET6 case SCTP_IPV6_ADDRESS: /* source addr is IPv6 */ memset(&store.sin6, 0, sizeof(struct sockaddr_in6)); store.sin6.sin6_family = AF_INET6; store.sin6.sin6_len = sizeof(struct sockaddr_in6); store.sin6.sin6_scope_id = cookie->scope_id; memcpy(&store.sin6.sin6_addr, cookie->laddress, sizeof(struct in6_addr)); break; #endif default: atomic_add_int(&stcb->asoc.refcnt, 1); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif atomic_subtract_int(&stcb->asoc.refcnt, 1); return (NULL); } /* set up to notify upper layer */ *notification = SCTP_NOTIFY_ASSOC_UP; if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) && (inp->sctp_socket->so_qlimit == 0)) { /* * This is an endpoint that called connect() how it got a * cookie that is NEW is a bit of a mystery. It must be that * the INIT was sent, but before it got there.. a complete * INIT/INIT-ACK/COOKIE arrived. But of course then it * should have went to the other code.. not here.. oh well.. * a bit of protection is worth having.. */ stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { SCTP_SOCKET_UNLOCK(so, 1); return (NULL); } #endif soisconnected(stcb->sctp_socket); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && (inp->sctp_socket->so_qlimit)) { /* * We don't want to do anything with this one. Since it is * the listening guy. The timer will get started for * accepted connections in the caller. */ ; } /* since we did not send a HB make sure we don't double things */ if ((netp) && (*netp)) (*netp)->hb_responded = 1; if (stcb->asoc.sctp_autoclose_ticks && sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) { sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL); } (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); if ((netp != NULL) && (*netp != NULL)) { /* calculate the RTT and set the encaps port */ (*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp, &cookie->time_entered, sctp_align_unsafe_makecopy, SCTP_RTT_FROM_NON_DATA); } /* respond with a COOKIE-ACK */ sctp_send_cookie_ack(stcb); /* * check the address lists for any ASCONFs that need to be sent * AFTER the cookie-ack is sent */ sctp_check_address_list(stcb, m, initack_offset + sizeof(struct sctp_init_ack_chunk), initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)), &store.sa, cookie->local_scope, cookie->site_scope, cookie->ipv4_scope, cookie->loopback_scope); return (stcb); } /* * CODE LIKE THIS NEEDS TO RUN IF the peer supports the NAT extension, i.e * we NEED to make sure we are not already using the vtag. If so we * need to send back an ABORT-TRY-AGAIN-WITH-NEW-TAG No middle box bit! head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(tag, SCTP_BASE_INFO(hashasocmark))]; LIST_FOREACH(stcb, head, sctp_asocs) { if ((stcb->asoc.my_vtag == tag) && (stcb->rport == rport) && (inp == stcb->sctp_ep)) { -- SEND ABORT - TRY AGAIN -- } } */ /* * handles a COOKIE-ECHO message stcb: modified to either a new or left as * existing (non-NULL) TCB */ static struct mbuf * sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_cookie_echo_chunk *cp, struct sctp_inpcb **inp_p, struct sctp_tcb **stcb, struct sctp_nets **netp, int auth_skipped, uint32_t auth_offset, uint32_t auth_len, struct sctp_tcb **locked_tcb, uint8_t mflowtype, uint32_t mflowid, uint32_t vrf_id, uint16_t port) { struct sctp_state_cookie *cookie; struct sctp_tcb *l_stcb = *stcb; struct sctp_inpcb *l_inp; struct sockaddr *to; struct sctp_pcb *ep; struct mbuf *m_sig; uint8_t calc_sig[SCTP_SIGNATURE_SIZE], tmp_sig[SCTP_SIGNATURE_SIZE]; uint8_t *sig; uint8_t cookie_ok = 0; unsigned int sig_offset, cookie_offset; unsigned int cookie_len; struct timeval now; struct timeval time_expires; int notification = 0; struct sctp_nets *netl; int had_a_existing_tcb = 0; int send_int_conf = 0; #ifdef INET struct sockaddr_in sin; #endif #ifdef INET6 struct sockaddr_in6 sin6; #endif SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_cookie: handling COOKIE-ECHO\n"); if (inp_p == NULL) { return (NULL); } cookie = &cp->cookie; cookie_offset = offset + sizeof(struct sctp_chunkhdr); cookie_len = ntohs(cp->ch.chunk_length); if ((cookie->peerport != sh->src_port) || (cookie->myport != sh->dest_port) || (cookie->my_vtag != sh->v_tag)) { /* * invalid ports or bad tag. Note that we always leave the * v_tag in the header in network order and when we stored * it in the my_vtag slot we also left it in network order. * This maintains the match even though it may be in the * opposite byte order of the machine :-> */ return (NULL); } if (cookie_len < sizeof(struct sctp_cookie_echo_chunk) + sizeof(struct sctp_init_chunk) + sizeof(struct sctp_init_ack_chunk) + SCTP_SIGNATURE_SIZE) { /* cookie too small */ return (NULL); } /* * split off the signature into its own mbuf (since it should not be * calculated in the sctp_hmac_m() call). */ sig_offset = offset + cookie_len - SCTP_SIGNATURE_SIZE; m_sig = m_split(m, sig_offset, M_NOWAIT); if (m_sig == NULL) { /* out of memory or ?? */ return (NULL); } #ifdef SCTP_MBUF_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(m_sig, SCTP_MBUF_SPLIT); } #endif /* * compute the signature/digest for the cookie */ ep = &(*inp_p)->sctp_ep; l_inp = *inp_p; if (l_stcb) { SCTP_TCB_UNLOCK(l_stcb); } SCTP_INP_RLOCK(l_inp); if (l_stcb) { SCTP_TCB_LOCK(l_stcb); } /* which cookie is it? */ if ((cookie->time_entered.tv_sec < (long)ep->time_of_secret_change) && (ep->current_secret_number != ep->last_secret_number)) { /* it's the old cookie */ (void)sctp_hmac_m(SCTP_HMAC, (uint8_t *)ep->secret_key[(int)ep->last_secret_number], SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0); } else { /* it's the current cookie */ (void)sctp_hmac_m(SCTP_HMAC, (uint8_t *)ep->secret_key[(int)ep->current_secret_number], SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0); } /* get the signature */ SCTP_INP_RUNLOCK(l_inp); sig = (uint8_t *)sctp_m_getptr(m_sig, 0, SCTP_SIGNATURE_SIZE, (uint8_t *)&tmp_sig); if (sig == NULL) { /* couldn't find signature */ sctp_m_freem(m_sig); return (NULL); } /* compare the received digest with the computed digest */ if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) != 0) { /* try the old cookie? */ if ((cookie->time_entered.tv_sec == (long)ep->time_of_secret_change) && (ep->current_secret_number != ep->last_secret_number)) { /* compute digest with old */ (void)sctp_hmac_m(SCTP_HMAC, (uint8_t *)ep->secret_key[(int)ep->last_secret_number], SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0); /* compare */ if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) == 0) cookie_ok = 1; } } else { cookie_ok = 1; } /* * Now before we continue we must reconstruct our mbuf so that * normal processing of any other chunks will work. */ { struct mbuf *m_at; m_at = m; while (SCTP_BUF_NEXT(m_at) != NULL) { m_at = SCTP_BUF_NEXT(m_at); } SCTP_BUF_NEXT(m_at) = m_sig; } if (cookie_ok == 0) { SCTPDBG(SCTP_DEBUG_INPUT2, "handle_cookie_echo: cookie signature validation failed!\n"); SCTPDBG(SCTP_DEBUG_INPUT2, "offset = %u, cookie_offset = %u, sig_offset = %u\n", (uint32_t)offset, cookie_offset, sig_offset); return (NULL); } /* * check the cookie timestamps to be sure it's not stale */ (void)SCTP_GETTIME_TIMEVAL(&now); /* Expire time is in Ticks, so we convert to seconds */ time_expires.tv_sec = cookie->time_entered.tv_sec + TICKS_TO_SEC(cookie->cookie_life); time_expires.tv_usec = cookie->time_entered.tv_usec; /* * TODO sctp_constants.h needs alternative time macros when _KERNEL * is undefined. */ if (timevalcmp(&now, &time_expires, >)) { /* cookie is stale! */ struct mbuf *op_err; struct sctp_error_stale_cookie *cause; uint32_t tim; op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_stale_cookie), 0, M_NOWAIT, 1, MT_DATA); if (op_err == NULL) { /* FOOBAR */ return (NULL); } /* Set the len */ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_stale_cookie); cause = mtod(op_err, struct sctp_error_stale_cookie *); cause->cause.code = htons(SCTP_CAUSE_STALE_COOKIE); cause->cause.length = htons((sizeof(struct sctp_paramhdr) + (sizeof(uint32_t)))); /* seconds to usec */ tim = (now.tv_sec - time_expires.tv_sec) * 1000000; /* add in usec */ if (tim == 0) tim = now.tv_usec - cookie->time_entered.tv_usec; cause->stale_time = htonl(tim); sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err, mflowtype, mflowid, l_inp->fibnum, vrf_id, port); return (NULL); } /* * Now we must see with the lookup address if we have an existing * asoc. This will only happen if we were in the COOKIE-WAIT state * and a INIT collided with us and somewhere the peer sent the * cookie on another address besides the single address our assoc * had for him. In this case we will have one of the tie-tags set at * least AND the address field in the cookie can be used to look it * up. */ to = NULL; switch (cookie->addr_type) { #ifdef INET6 case SCTP_IPV6_ADDRESS: memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(sin6); sin6.sin6_port = sh->src_port; sin6.sin6_scope_id = cookie->scope_id; memcpy(&sin6.sin6_addr.s6_addr, cookie->address, sizeof(sin6.sin6_addr.s6_addr)); to = (struct sockaddr *)&sin6; break; #endif #ifdef INET case SCTP_IPV4_ADDRESS: memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_port = sh->src_port; sin.sin_addr.s_addr = cookie->address[0]; to = (struct sockaddr *)&sin; break; #endif default: /* This should not happen */ return (NULL); } if (*stcb == NULL) { /* Yep, lets check */ *stcb = sctp_findassociation_ep_addr(inp_p, to, netp, dst, NULL); if (*stcb == NULL) { /* * We should have only got back the same inp. If we * got back a different ep we have a problem. The * original findep got back l_inp and now */ if (l_inp != *inp_p) { SCTP_PRINTF("Bad problem find_ep got a diff inp then special_locate?\n"); } } else { if (*locked_tcb == NULL) { /* * In this case we found the assoc only * after we locked the create lock. This * means we are in a colliding case and we * must make sure that we unlock the tcb if * its one of the cases where we throw away * the incoming packets. */ *locked_tcb = *stcb; /* * We must also increment the inp ref count * since the ref_count flags was set when we * did not find the TCB, now we found it * which reduces the refcount.. we must * raise it back out to balance it all :-) */ SCTP_INP_INCR_REF((*stcb)->sctp_ep); if ((*stcb)->sctp_ep != l_inp) { SCTP_PRINTF("Huh? ep:%p diff then l_inp:%p?\n", (void *)(*stcb)->sctp_ep, (void *)l_inp); } } } } cookie_len -= SCTP_SIGNATURE_SIZE; if (*stcb == NULL) { /* this is the "normal" case... get a new TCB */ *stcb = sctp_process_cookie_new(m, iphlen, offset, src, dst, sh, cookie, cookie_len, *inp_p, netp, to, ¬ification, auth_skipped, auth_offset, auth_len, mflowtype, mflowid, vrf_id, port); } else { /* this is abnormal... cookie-echo on existing TCB */ had_a_existing_tcb = 1; *stcb = sctp_process_cookie_existing(m, iphlen, offset, src, dst, sh, cookie, cookie_len, *inp_p, *stcb, netp, to, ¬ification, auth_skipped, auth_offset, auth_len, mflowtype, mflowid, vrf_id, port); } if (*stcb == NULL) { /* still no TCB... must be bad cookie-echo */ return (NULL); } if (*netp != NULL) { (*netp)->flowtype = mflowtype; (*netp)->flowid = mflowid; } /* * Ok, we built an association so confirm the address we sent the * INIT-ACK to. */ netl = sctp_findnet(*stcb, to); /* * This code should in theory NOT run but */ if (netl == NULL) { /* TSNH! Huh, why do I need to add this address here? */ if (sctp_add_remote_addr(*stcb, to, NULL, port, SCTP_DONOT_SETSCOPE, SCTP_IN_COOKIE_PROC)) { return (NULL); } netl = sctp_findnet(*stcb, to); } if (netl) { if (netl->dest_state & SCTP_ADDR_UNCONFIRMED) { netl->dest_state &= ~SCTP_ADDR_UNCONFIRMED; (void)sctp_set_primary_addr((*stcb), (struct sockaddr *)NULL, netl); send_int_conf = 1; } } sctp_start_net_timers(*stcb); if ((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { if (!had_a_existing_tcb || (((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) { /* * If we have a NEW cookie or the connect never * reached the connected state during collision we * must do the TCP accept thing. */ struct socket *so, *oso; struct sctp_inpcb *inp; if (notification == SCTP_NOTIFY_ASSOC_RESTART) { /* * For a restart we will keep the same * socket, no need to do anything. I THINK!! */ sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED); if (send_int_conf) { sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED); } return (m); } oso = (*inp_p)->sctp_socket; atomic_add_int(&(*stcb)->asoc.refcnt, 1); SCTP_TCB_UNLOCK((*stcb)); CURVNET_SET(oso->so_vnet); so = sonewconn(oso, 0 ); CURVNET_RESTORE(); SCTP_TCB_LOCK((*stcb)); atomic_subtract_int(&(*stcb)->asoc.refcnt, 1); if (so == NULL) { struct mbuf *op_err; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *pcb_so; #endif /* Too many sockets */ SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: no room for another socket!\n"); op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, ""); sctp_abort_association(*inp_p, NULL, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, port); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) pcb_so = SCTP_INP_SO(*inp_p); atomic_add_int(&(*stcb)->asoc.refcnt, 1); SCTP_TCB_UNLOCK((*stcb)); SCTP_SOCKET_LOCK(pcb_so, 1); SCTP_TCB_LOCK((*stcb)); atomic_subtract_int(&(*stcb)->asoc.refcnt, 1); #endif (void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(pcb_so, 1); #endif return (NULL); } inp = (struct sctp_inpcb *)so->so_pcb; SCTP_INP_INCR_REF(inp); /* * We add the unbound flag here so that if we get an * soabort() before we get the move_pcb done, we * will properly cleanup. */ inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE | SCTP_PCB_FLAGS_CONNECTED | SCTP_PCB_FLAGS_IN_TCPPOOL | SCTP_PCB_FLAGS_UNBOUND | (SCTP_PCB_COPY_FLAGS & (*inp_p)->sctp_flags) | SCTP_PCB_FLAGS_DONT_WAKE); inp->sctp_features = (*inp_p)->sctp_features; inp->sctp_mobility_features = (*inp_p)->sctp_mobility_features; inp->sctp_socket = so; inp->sctp_frag_point = (*inp_p)->sctp_frag_point; inp->max_cwnd = (*inp_p)->max_cwnd; inp->sctp_cmt_on_off = (*inp_p)->sctp_cmt_on_off; inp->ecn_supported = (*inp_p)->ecn_supported; inp->prsctp_supported = (*inp_p)->prsctp_supported; inp->auth_supported = (*inp_p)->auth_supported; inp->asconf_supported = (*inp_p)->asconf_supported; inp->reconfig_supported = (*inp_p)->reconfig_supported; inp->nrsack_supported = (*inp_p)->nrsack_supported; inp->pktdrop_supported = (*inp_p)->pktdrop_supported; inp->partial_delivery_point = (*inp_p)->partial_delivery_point; inp->sctp_context = (*inp_p)->sctp_context; inp->local_strreset_support = (*inp_p)->local_strreset_support; inp->fibnum = (*inp_p)->fibnum; inp->inp_starting_point_for_iterator = NULL; /* * copy in the authentication parameters from the * original endpoint */ if (inp->sctp_ep.local_hmacs) sctp_free_hmaclist(inp->sctp_ep.local_hmacs); inp->sctp_ep.local_hmacs = sctp_copy_hmaclist((*inp_p)->sctp_ep.local_hmacs); if (inp->sctp_ep.local_auth_chunks) sctp_free_chunklist(inp->sctp_ep.local_auth_chunks); inp->sctp_ep.local_auth_chunks = sctp_copy_chunklist((*inp_p)->sctp_ep.local_auth_chunks); /* * Now we must move it from one hash table to * another and get the tcb in the right place. */ /* * This is where the one-2-one socket is put into * the accept state waiting for the accept! */ if (*stcb) { (*stcb)->asoc.state |= SCTP_STATE_IN_ACCEPT_QUEUE; } sctp_move_pcb_and_assoc(*inp_p, inp, *stcb); atomic_add_int(&(*stcb)->asoc.refcnt, 1); SCTP_TCB_UNLOCK((*stcb)); sctp_pull_off_control_to_new_inp((*inp_p), inp, *stcb, 0); SCTP_TCB_LOCK((*stcb)); atomic_subtract_int(&(*stcb)->asoc.refcnt, 1); /* * now we must check to see if we were aborted while * the move was going on and the lock/unlock * happened. */ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* * yep it was, we leave the assoc attached * to the socket since the sctp_inpcb_free() * call will send an abort for us. */ SCTP_INP_DECR_REF(inp); return (NULL); } SCTP_INP_DECR_REF(inp); /* Switch over to the new guy */ *inp_p = inp; sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED); if (send_int_conf) { sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED); } /* * Pull it from the incomplete queue and wake the * guy */ #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) atomic_add_int(&(*stcb)->asoc.refcnt, 1); SCTP_TCB_UNLOCK((*stcb)); SCTP_SOCKET_LOCK(so, 1); #endif soisconnected(so); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_TCB_LOCK((*stcb)); atomic_subtract_int(&(*stcb)->asoc.refcnt, 1); SCTP_SOCKET_UNLOCK(so, 1); #endif return (m); } } if (notification) { sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED); } if (send_int_conf) { sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED); } return (m); } static void sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED, struct sctp_tcb *stcb, struct sctp_nets *net) { /* cp must not be used, others call this without a c-ack :-) */ struct sctp_association *asoc; SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_cookie_ack: handling COOKIE-ACK\n"); if ((stcb == NULL) || (net == NULL)) { return; } asoc = &stcb->asoc; sctp_stop_all_cookie_timers(stcb); /* process according to association state */ if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) { /* state change only needed when I am in right state */ SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n"); SCTP_SET_STATE(asoc, SCTP_STATE_OPEN); sctp_start_net_timers(stcb); if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) { sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); } /* update RTO */ SCTP_STAT_INCR_COUNTER32(sctps_activeestab); SCTP_STAT_INCR_GAUGE32(sctps_currestab); if (asoc->overall_error_count == 0) { net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered, sctp_align_safe_nocopy, SCTP_RTT_FROM_NON_DATA); } (void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered); sctp_ulp_notify(SCTP_NOTIFY_ASSOC_UP, stcb, 0, NULL, SCTP_SO_NOT_LOCKED); if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif if ((stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) == 0) { soisconnected(stcb->sctp_socket); } #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } /* * since we did not send a HB make sure we don't double * things */ net->hb_responded = 1; if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) { /* * We don't need to do the asconf thing, nor hb or * autoclose if the socket is closed. */ goto closed_socket; } sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net); if (stcb->asoc.sctp_autoclose_ticks && sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTOCLOSE)) { sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, stcb->sctp_ep, stcb, NULL); } /* * send ASCONF if parameters are pending and ASCONFs are * allowed (eg. addresses changed when init/cookie echo were * in flight) */ if ((sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_DO_ASCONF)) && (stcb->asoc.asconf_supported == 1) && (!TAILQ_EMPTY(&stcb->asoc.asconf_queue))) { #ifdef SCTP_TIMER_BASED_ASCONF sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, stcb->asoc.primary_destination); #else sctp_send_asconf(stcb, stcb->asoc.primary_destination, SCTP_ADDR_NOT_LOCKED); #endif } } closed_socket: /* Toss the cookie if I can */ sctp_toss_old_cookies(stcb, asoc); if (!TAILQ_EMPTY(&asoc->sent_queue)) { /* Restart the timer if we have pending data */ struct sctp_tmit_chunk *chk; chk = TAILQ_FIRST(&asoc->sent_queue); sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, chk->whoTo); } } static void sctp_handle_ecn_echo(struct sctp_ecne_chunk *cp, struct sctp_tcb *stcb) { struct sctp_nets *net; struct sctp_tmit_chunk *lchk; struct sctp_ecne_chunk bkup; uint8_t override_bit; uint32_t tsn, window_data_tsn; int len; unsigned int pkt_cnt; len = ntohs(cp->ch.chunk_length); if ((len != sizeof(struct sctp_ecne_chunk)) && (len != sizeof(struct old_sctp_ecne_chunk))) { return; } if (len == sizeof(struct old_sctp_ecne_chunk)) { /* Its the old format */ memcpy(&bkup, cp, sizeof(struct old_sctp_ecne_chunk)); bkup.num_pkts_since_cwr = htonl(1); cp = &bkup; } SCTP_STAT_INCR(sctps_recvecne); tsn = ntohl(cp->tsn); pkt_cnt = ntohl(cp->num_pkts_since_cwr); lchk = TAILQ_LAST(&stcb->asoc.send_queue, sctpchunk_listhead); if (lchk == NULL) { window_data_tsn = stcb->asoc.sending_seq - 1; } else { window_data_tsn = lchk->rec.data.tsn; } /* Find where it was sent to if possible. */ net = NULL; TAILQ_FOREACH(lchk, &stcb->asoc.sent_queue, sctp_next) { if (lchk->rec.data.tsn == tsn) { net = lchk->whoTo; net->ecn_prev_cwnd = lchk->rec.data.cwnd_at_send; break; } if (SCTP_TSN_GT(lchk->rec.data.tsn, tsn)) { break; } } if (net == NULL) { /* * What to do. A previous send of a CWR was possibly lost. * See how old it is, we may have it marked on the actual * net. */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (tsn == net->last_cwr_tsn) { /* Found him, send it off */ break; } } if (net == NULL) { /* * If we reach here, we need to send a special CWR * that says hey, we did this a long time ago and * you lost the response. */ net = TAILQ_FIRST(&stcb->asoc.nets); if (net == NULL) { /* TSNH */ return; } override_bit = SCTP_CWR_REDUCE_OVERRIDE; } else { override_bit = 0; } } else { override_bit = 0; } if (SCTP_TSN_GT(tsn, net->cwr_window_tsn) && ((override_bit & SCTP_CWR_REDUCE_OVERRIDE) == 0)) { /* * JRS - Use the congestion control given in the pluggable * CC module */ stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net, 0, pkt_cnt); /* * We reduce once every RTT. So we will only lower cwnd at * the next sending seq i.e. the window_data_tsn */ net->cwr_window_tsn = window_data_tsn; net->ecn_ce_pkt_cnt += pkt_cnt; net->lost_cnt = pkt_cnt; net->last_cwr_tsn = tsn; } else { override_bit |= SCTP_CWR_IN_SAME_WINDOW; if (SCTP_TSN_GT(tsn, net->last_cwr_tsn) && ((override_bit & SCTP_CWR_REDUCE_OVERRIDE) == 0)) { /* * Another loss in the same window update how many * marks/packets lost we have had. */ int cnt = 1; if (pkt_cnt > net->lost_cnt) { /* Should be the case */ cnt = (pkt_cnt - net->lost_cnt); net->ecn_ce_pkt_cnt += cnt; } net->lost_cnt = pkt_cnt; net->last_cwr_tsn = tsn; /* * Most CC functions will ignore this call, since we * are in-window yet of the initial CE the peer saw. */ stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net, 1, cnt); } } /* * We always send a CWR this way if our previous one was lost our * peer will get an update, or if it is not time again to reduce we * still get the cwr to the peer. Note we set the override when we * could not find the TSN on the chunk or the destination network. */ sctp_send_cwr(stcb, net, net->last_cwr_tsn, override_bit); } static void sctp_handle_ecn_cwr(struct sctp_cwr_chunk *cp, struct sctp_tcb *stcb, struct sctp_nets *net) { /* * Here we get a CWR from the peer. We must look in the outqueue and * make sure that we have a covered ECNE in the control chunk part. * If so remove it. */ struct sctp_tmit_chunk *chk; struct sctp_ecne_chunk *ecne; int override; uint32_t cwr_tsn; cwr_tsn = ntohl(cp->tsn); override = cp->ch.chunk_flags & SCTP_CWR_REDUCE_OVERRIDE; TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) { if (chk->rec.chunk_id.id != SCTP_ECN_ECHO) { continue; } if ((override == 0) && (chk->whoTo != net)) { /* Must be from the right src unless override is set */ continue; } ecne = mtod(chk->data, struct sctp_ecne_chunk *); if (SCTP_TSN_GE(cwr_tsn, ntohl(ecne->tsn))) { /* this covers this ECNE, we can remove it */ stcb->asoc.ecn_echo_cnt_onq--; TAILQ_REMOVE(&stcb->asoc.control_send_queue, chk, sctp_next); sctp_m_freem(chk->data); chk->data = NULL; stcb->asoc.ctrl_queue_cnt--; sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); if (override == 0) { break; } } } } static void sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSED, struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_association *asoc; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_shutdown_complete: handling SHUTDOWN-COMPLETE\n"); if (stcb == NULL) return; asoc = &stcb->asoc; /* process according to association state */ if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) { /* unexpected SHUTDOWN-COMPLETE... so ignore... */ SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_shutdown_complete: not in SCTP_STATE_SHUTDOWN_ACK_SENT --- ignore\n"); SCTP_TCB_UNLOCK(stcb); return; } /* notify upper layer protocol */ if (stcb->sctp_socket) { sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED); } #ifdef INVARIANTS if (!TAILQ_EMPTY(&asoc->send_queue) || !TAILQ_EMPTY(&asoc->sent_queue) || sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED)) { panic("Queues are not empty when handling SHUTDOWN-COMPLETE"); } #endif /* stop the timer */ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_24); SCTP_STAT_INCR_COUNTER32(sctps_shutdown); /* free the TCB */ SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_shutdown_complete: calls free-asoc\n"); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(stcb->sctp_ep); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_25); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif return; } static int process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, struct sctp_nets *net, uint8_t flg) { switch (desc->chunk_type) { case SCTP_DATA: /* find the tsn to resend (possibly */ { uint32_t tsn; struct sctp_tmit_chunk *tp1; tsn = ntohl(desc->tsn_ifany); TAILQ_FOREACH(tp1, &stcb->asoc.sent_queue, sctp_next) { if (tp1->rec.data.tsn == tsn) { /* found it */ break; } if (SCTP_TSN_GT(tp1->rec.data.tsn, tsn)) { /* not found */ tp1 = NULL; break; } } if (tp1 == NULL) { /* * Do it the other way , aka without paying * attention to queue seq order. */ SCTP_STAT_INCR(sctps_pdrpdnfnd); TAILQ_FOREACH(tp1, &stcb->asoc.sent_queue, sctp_next) { if (tp1->rec.data.tsn == tsn) { /* found it */ break; } } } if (tp1 == NULL) { SCTP_STAT_INCR(sctps_pdrptsnnf); } if ((tp1) && (tp1->sent < SCTP_DATAGRAM_ACKED)) { uint8_t *ddp; if (((flg & SCTP_BADCRC) == 0) && ((flg & SCTP_FROM_MIDDLE_BOX) == 0)) { return (0); } if ((stcb->asoc.peers_rwnd == 0) && ((flg & SCTP_FROM_MIDDLE_BOX) == 0)) { SCTP_STAT_INCR(sctps_pdrpdiwnp); return (0); } if (stcb->asoc.peers_rwnd == 0 && (flg & SCTP_FROM_MIDDLE_BOX)) { SCTP_STAT_INCR(sctps_pdrpdizrw); return (0); } ddp = (uint8_t *)(mtod(tp1->data, caddr_t)+ sizeof(struct sctp_data_chunk)); { unsigned int iii; for (iii = 0; iii < sizeof(desc->data_bytes); iii++) { if (ddp[iii] != desc->data_bytes[iii]) { SCTP_STAT_INCR(sctps_pdrpbadd); return (-1); } } } if (tp1->do_rtt) { /* * this guy had a RTO calculation * pending on it, cancel it */ if (tp1->whoTo->rto_needed == 0) { tp1->whoTo->rto_needed = 1; } tp1->do_rtt = 0; } SCTP_STAT_INCR(sctps_pdrpmark); if (tp1->sent != SCTP_DATAGRAM_RESEND) sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); /* * mark it as if we were doing a FR, since * we will be getting gap ack reports behind * the info from the router. */ tp1->rec.data.doing_fast_retransmit = 1; /* * mark the tsn with what sequences can * cause a new FR. */ if (TAILQ_EMPTY(&stcb->asoc.send_queue)) { tp1->rec.data.fast_retran_tsn = stcb->asoc.sending_seq; } else { tp1->rec.data.fast_retran_tsn = (TAILQ_FIRST(&stcb->asoc.send_queue))->rec.data.tsn; } /* restart the timer */ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, tp1->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_26); sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, tp1->whoTo); /* fix counts and things */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PDRP, tp1->whoTo->flight_size, tp1->book_size, (uint32_t)(uintptr_t)stcb, tp1->rec.data.tsn); } if (tp1->sent < SCTP_DATAGRAM_RESEND) { sctp_flight_size_decrease(tp1); sctp_total_flight_decrease(stcb, tp1); } tp1->sent = SCTP_DATAGRAM_RESEND; } { /* audit code */ unsigned int audit; audit = 0; TAILQ_FOREACH(tp1, &stcb->asoc.sent_queue, sctp_next) { if (tp1->sent == SCTP_DATAGRAM_RESEND) audit++; } TAILQ_FOREACH(tp1, &stcb->asoc.control_send_queue, sctp_next) { if (tp1->sent == SCTP_DATAGRAM_RESEND) audit++; } if (audit != stcb->asoc.sent_queue_retran_cnt) { SCTP_PRINTF("**Local Audit finds cnt:%d asoc cnt:%d\n", audit, stcb->asoc.sent_queue_retran_cnt); #ifndef SCTP_AUDITING_ENABLED stcb->asoc.sent_queue_retran_cnt = audit; #endif } } } break; case SCTP_ASCONF: { struct sctp_tmit_chunk *asconf; TAILQ_FOREACH(asconf, &stcb->asoc.control_send_queue, sctp_next) { if (asconf->rec.chunk_id.id == SCTP_ASCONF) { break; } } if (asconf) { if (asconf->sent != SCTP_DATAGRAM_RESEND) sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); asconf->sent = SCTP_DATAGRAM_RESEND; asconf->snd_count--; } } break; case SCTP_INITIATION: /* resend the INIT */ stcb->asoc.dropped_special_cnt++; if (stcb->asoc.dropped_special_cnt < SCTP_RETRY_DROPPED_THRESH) { /* * If we can get it in, in a few attempts we do * this, otherwise we let the timer fire. */ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27); sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED); } break; case SCTP_SELECTIVE_ACK: case SCTP_NR_SELECTIVE_ACK: /* resend the sack */ sctp_send_sack(stcb, SCTP_SO_NOT_LOCKED); break; case SCTP_HEARTBEAT_REQUEST: /* resend a demand HB */ if ((stcb->asoc.overall_error_count + 3) < stcb->asoc.max_send_times) { /* * Only retransmit if we KNOW we wont destroy the * tcb */ sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED); } break; case SCTP_SHUTDOWN: sctp_send_shutdown(stcb, net); break; case SCTP_SHUTDOWN_ACK: sctp_send_shutdown_ack(stcb, net); break; case SCTP_COOKIE_ECHO: { struct sctp_tmit_chunk *cookie; cookie = NULL; TAILQ_FOREACH(cookie, &stcb->asoc.control_send_queue, sctp_next) { if (cookie->rec.chunk_id.id == SCTP_COOKIE_ECHO) { break; } } if (cookie) { if (cookie->sent != SCTP_DATAGRAM_RESEND) sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); cookie->sent = SCTP_DATAGRAM_RESEND; sctp_stop_all_cookie_timers(stcb); } } break; case SCTP_COOKIE_ACK: sctp_send_cookie_ack(stcb); break; case SCTP_ASCONF_ACK: /* resend last asconf ack */ sctp_send_asconf_ack(stcb); break; case SCTP_IFORWARD_CUM_TSN: case SCTP_FORWARD_CUM_TSN: send_forward_tsn(stcb, &stcb->asoc); break; /* can't do anything with these */ case SCTP_PACKET_DROPPED: case SCTP_INITIATION_ACK: /* this should not happen */ case SCTP_HEARTBEAT_ACK: case SCTP_ABORT_ASSOCIATION: case SCTP_OPERATION_ERROR: case SCTP_SHUTDOWN_COMPLETE: case SCTP_ECN_ECHO: case SCTP_ECN_CWR: default: break; } return (0); } void sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t *list) { uint32_t i; uint16_t temp; /* * We set things to 0xffffffff since this is the last delivered * sequence and we will be sending in 0 after the reset. */ if (number_entries) { for (i = 0; i < number_entries; i++) { temp = ntohs(list[i]); if (temp >= stcb->asoc.streamincnt) { continue; } stcb->asoc.strmin[temp].last_mid_delivered = 0xffffffff; } } else { list = NULL; for (i = 0; i < stcb->asoc.streamincnt; i++) { stcb->asoc.strmin[i].last_mid_delivered = 0xffffffff; } } sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_RECV, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED); } static void sctp_reset_out_streams(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t *list) { uint32_t i; uint16_t temp; if (number_entries > 0) { for (i = 0; i < number_entries; i++) { temp = ntohs(list[i]); if (temp >= stcb->asoc.streamoutcnt) { /* no such stream */ continue; } stcb->asoc.strmout[temp].next_mid_ordered = 0; stcb->asoc.strmout[temp].next_mid_unordered = 0; } } else { for (i = 0; i < stcb->asoc.streamoutcnt; i++) { stcb->asoc.strmout[i].next_mid_ordered = 0; stcb->asoc.strmout[i].next_mid_unordered = 0; } } sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED); } static void sctp_reset_clear_pending(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t *list) { uint32_t i; uint16_t temp; if (number_entries > 0) { for (i = 0; i < number_entries; i++) { temp = ntohs(list[i]); if (temp >= stcb->asoc.streamoutcnt) { /* no such stream */ continue; } stcb->asoc.strmout[temp].state = SCTP_STREAM_OPEN; } } else { for (i = 0; i < stcb->asoc.streamoutcnt; i++) { stcb->asoc.strmout[i].state = SCTP_STREAM_OPEN; } } } struct sctp_stream_reset_request * sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chunk **bchk) { struct sctp_association *asoc; struct sctp_chunkhdr *ch; struct sctp_stream_reset_request *r; struct sctp_tmit_chunk *chk; int len, clen; asoc = &stcb->asoc; if (TAILQ_EMPTY(&stcb->asoc.control_send_queue)) { asoc->stream_reset_outstanding = 0; return (NULL); } if (stcb->asoc.str_reset == NULL) { asoc->stream_reset_outstanding = 0; return (NULL); } chk = stcb->asoc.str_reset; if (chk->data == NULL) { return (NULL); } if (bchk) { /* he wants a copy of the chk pointer */ *bchk = chk; } clen = chk->send_size; ch = mtod(chk->data, struct sctp_chunkhdr *); r = (struct sctp_stream_reset_request *)(ch + 1); if (ntohl(r->request_seq) == seq) { /* found it */ return (r); } len = SCTP_SIZE32(ntohs(r->ph.param_length)); if (clen > (len + (int)sizeof(struct sctp_chunkhdr))) { /* move to the next one, there can only be a max of two */ r = (struct sctp_stream_reset_request *)((caddr_t)r + len); if (ntohl(r->request_seq) == seq) { return (r); } } /* that seq is not here */ return (NULL); } static void sctp_clean_up_stream_reset(struct sctp_tcb *stcb) { struct sctp_association *asoc; struct sctp_tmit_chunk *chk = stcb->asoc.str_reset; if (stcb->asoc.str_reset == NULL) { return; } asoc = &stcb->asoc; sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_28); TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } asoc->ctrl_queue_cnt--; sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); /* sa_ignore NO_NULL_CHK */ stcb->asoc.str_reset = NULL; } static int sctp_handle_stream_reset_response(struct sctp_tcb *stcb, uint32_t seq, uint32_t action, struct sctp_stream_reset_response *respin) { uint16_t type; int lparm_len; struct sctp_association *asoc = &stcb->asoc; struct sctp_tmit_chunk *chk; struct sctp_stream_reset_request *req_param; struct sctp_stream_reset_out_request *req_out_param; struct sctp_stream_reset_in_request *req_in_param; uint32_t number_entries; if (asoc->stream_reset_outstanding == 0) { /* duplicate */ return (0); } if (seq == stcb->asoc.str_reset_seq_out) { req_param = sctp_find_stream_reset(stcb, seq, &chk); if (req_param != NULL) { stcb->asoc.str_reset_seq_out++; type = ntohs(req_param->ph.param_type); lparm_len = ntohs(req_param->ph.param_length); if (type == SCTP_STR_RESET_OUT_REQUEST) { int no_clear = 0; req_out_param = (struct sctp_stream_reset_out_request *)req_param; number_entries = (lparm_len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t); asoc->stream_reset_out_is_outstanding = 0; if (asoc->stream_reset_outstanding) asoc->stream_reset_outstanding--; if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) { /* do it */ sctp_reset_out_streams(stcb, number_entries, req_out_param->list_of_streams); } else if (action == SCTP_STREAM_RESET_RESULT_DENIED) { sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_OUT, stcb, number_entries, req_out_param->list_of_streams, SCTP_SO_NOT_LOCKED); } else if (action == SCTP_STREAM_RESET_RESULT_IN_PROGRESS) { /* * Set it up so we don't stop * retransmitting */ asoc->stream_reset_outstanding++; stcb->asoc.str_reset_seq_out--; asoc->stream_reset_out_is_outstanding = 1; no_clear = 1; } else { sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, req_out_param->list_of_streams, SCTP_SO_NOT_LOCKED); } if (no_clear == 0) { sctp_reset_clear_pending(stcb, number_entries, req_out_param->list_of_streams); } } else if (type == SCTP_STR_RESET_IN_REQUEST) { req_in_param = (struct sctp_stream_reset_in_request *)req_param; number_entries = (lparm_len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t); if (asoc->stream_reset_outstanding) asoc->stream_reset_outstanding--; if (action == SCTP_STREAM_RESET_RESULT_DENIED) { sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_IN, stcb, number_entries, req_in_param->list_of_streams, SCTP_SO_NOT_LOCKED); } else if (action != SCTP_STREAM_RESET_RESULT_PERFORMED) { sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_IN, stcb, number_entries, req_in_param->list_of_streams, SCTP_SO_NOT_LOCKED); } } else if (type == SCTP_STR_RESET_ADD_OUT_STREAMS) { /* Ok we now may have more streams */ int num_stream; num_stream = stcb->asoc.strm_pending_add_size; if (num_stream > (stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt)) { /* TSNH */ num_stream = stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt; } stcb->asoc.strm_pending_add_size = 0; if (asoc->stream_reset_outstanding) asoc->stream_reset_outstanding--; if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) { /* Put the new streams into effect */ int i; for (i = asoc->streamoutcnt; i < (asoc->streamoutcnt + num_stream); i++) { asoc->strmout[i].state = SCTP_STREAM_OPEN; } asoc->streamoutcnt += num_stream; sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, 0); } else if (action == SCTP_STREAM_RESET_RESULT_DENIED) { sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, SCTP_STREAM_CHANGE_DENIED); } else { sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, SCTP_STREAM_CHANGE_FAILED); } } else if (type == SCTP_STR_RESET_ADD_IN_STREAMS) { if (asoc->stream_reset_outstanding) asoc->stream_reset_outstanding--; if (action == SCTP_STREAM_RESET_RESULT_DENIED) { sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, SCTP_STREAM_CHANGE_DENIED); } else if (action != SCTP_STREAM_RESET_RESULT_PERFORMED) { sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, SCTP_STREAM_CHANGE_FAILED); } } else if (type == SCTP_STR_RESET_TSN_REQUEST) { /** * a) Adopt the new in tsn. * b) reset the map * c) Adopt the new out-tsn */ struct sctp_stream_reset_response_tsn *resp; struct sctp_forward_tsn_chunk fwdtsn; int abort_flag = 0; if (respin == NULL) { /* huh ? */ return (0); } if (ntohs(respin->ph.param_length) < sizeof(struct sctp_stream_reset_response_tsn)) { return (0); } if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) { resp = (struct sctp_stream_reset_response_tsn *)respin; asoc->stream_reset_outstanding--; fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk)); fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN; fwdtsn.new_cumulative_tsn = htonl(ntohl(resp->senders_next_tsn) - 1); sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0); if (abort_flag) { return (1); } stcb->asoc.highest_tsn_inside_map = (ntohl(resp->senders_next_tsn) - 1); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(0, 7, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT); } stcb->asoc.tsn_last_delivered = stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map; stcb->asoc.mapping_array_base_tsn = ntohl(resp->senders_next_tsn); memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size); stcb->asoc.highest_tsn_inside_nr_map = stcb->asoc.highest_tsn_inside_map; memset(stcb->asoc.nr_mapping_array, 0, stcb->asoc.mapping_array_size); stcb->asoc.sending_seq = ntohl(resp->receivers_next_tsn); stcb->asoc.last_acked_seq = stcb->asoc.cumulative_tsn; sctp_reset_out_streams(stcb, 0, (uint16_t *)NULL); sctp_reset_in_stream(stcb, 0, (uint16_t *)NULL); sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1), 0); } else if (action == SCTP_STREAM_RESET_RESULT_DENIED) { sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1), SCTP_ASSOC_RESET_DENIED); } else { sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1), SCTP_ASSOC_RESET_FAILED); } } /* get rid of the request and get the request flags */ if (asoc->stream_reset_outstanding == 0) { sctp_clean_up_stream_reset(stcb); } } } if (asoc->stream_reset_outstanding == 0) { sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_NOT_LOCKED); } return (0); } static void sctp_handle_str_reset_request_in(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk, struct sctp_stream_reset_in_request *req, int trunc) { uint32_t seq; int len, i; int number_entries; uint16_t temp; /* * peer wants me to send a str-reset to him for my outgoing seq's if * seq_in is right. */ struct sctp_association *asoc = &stcb->asoc; seq = ntohl(req->request_seq); if (asoc->str_reset_seq_in == seq) { asoc->last_reset_action[1] = asoc->last_reset_action[0]; if (!(asoc->local_strreset_support & SCTP_ENABLE_RESET_STREAM_REQ)) { asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } else if (trunc) { /* Can't do it, since they exceeded our buffer size */ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } else if (stcb->asoc.stream_reset_out_is_outstanding == 0) { len = ntohs(req->ph.param_length); number_entries = ((len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t)); if (number_entries) { for (i = 0; i < number_entries; i++) { temp = ntohs(req->list_of_streams[i]); if (temp >= stcb->asoc.streamoutcnt) { asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; goto bad_boy; } req->list_of_streams[i] = temp; } for (i = 0; i < number_entries; i++) { if (stcb->asoc.strmout[req->list_of_streams[i]].state == SCTP_STREAM_OPEN) { stcb->asoc.strmout[req->list_of_streams[i]].state = SCTP_STREAM_RESET_PENDING; } } } else { /* Its all */ for (i = 0; i < stcb->asoc.streamoutcnt; i++) { if (stcb->asoc.strmout[i].state == SCTP_STREAM_OPEN) stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_PENDING; } } asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED; } else { /* Can't do it, since we have sent one out */ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS; } bad_boy: sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); asoc->str_reset_seq_in++; } else if (asoc->str_reset_seq_in - 1 == seq) { sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); } else if (asoc->str_reset_seq_in - 2 == seq) { sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]); } else { sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO); } sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_NOT_LOCKED); } static int sctp_handle_str_reset_request_tsn(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk, struct sctp_stream_reset_tsn_request *req) { /* reset all in and out and update the tsn */ /* * A) reset my str-seq's on in and out. B) Select a receive next, * and set cum-ack to it. Also process this selected number as a * fwd-tsn as well. C) set in the response my next sending seq. */ struct sctp_forward_tsn_chunk fwdtsn; struct sctp_association *asoc = &stcb->asoc; int abort_flag = 0; uint32_t seq; seq = ntohl(req->request_seq); if (asoc->str_reset_seq_in == seq) { asoc->last_reset_action[1] = stcb->asoc.last_reset_action[0]; if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) { asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } else { fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk)); fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN; fwdtsn.ch.chunk_flags = 0; fwdtsn.new_cumulative_tsn = htonl(stcb->asoc.highest_tsn_inside_map + 1); sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0); if (abort_flag) { return (1); } asoc->highest_tsn_inside_map += SCTP_STREAM_RESET_TSN_DELTA; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(0, 10, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT); } asoc->tsn_last_delivered = asoc->cumulative_tsn = asoc->highest_tsn_inside_map; asoc->mapping_array_base_tsn = asoc->highest_tsn_inside_map + 1; memset(asoc->mapping_array, 0, asoc->mapping_array_size); asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map; memset(asoc->nr_mapping_array, 0, asoc->mapping_array_size); atomic_add_int(&asoc->sending_seq, 1); /* save off historical data for retrans */ asoc->last_sending_seq[1] = asoc->last_sending_seq[0]; asoc->last_sending_seq[0] = asoc->sending_seq; asoc->last_base_tsnsent[1] = asoc->last_base_tsnsent[0]; asoc->last_base_tsnsent[0] = asoc->mapping_array_base_tsn; sctp_reset_out_streams(stcb, 0, (uint16_t *)NULL); sctp_reset_in_stream(stcb, 0, (uint16_t *)NULL); asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED; sctp_notify_stream_reset_tsn(stcb, asoc->sending_seq, (asoc->mapping_array_base_tsn + 1), 0); } sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[0], asoc->last_sending_seq[0], asoc->last_base_tsnsent[0]); asoc->str_reset_seq_in++; } else if (asoc->str_reset_seq_in - 1 == seq) { sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[0], asoc->last_sending_seq[0], asoc->last_base_tsnsent[0]); } else if (asoc->str_reset_seq_in - 2 == seq) { sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[1], asoc->last_sending_seq[1], asoc->last_base_tsnsent[1]); } else { sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO); } return (0); } static void sctp_handle_str_reset_request_out(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk, struct sctp_stream_reset_out_request *req, int trunc) { uint32_t seq, tsn; int number_entries, len; struct sctp_association *asoc = &stcb->asoc; seq = ntohl(req->request_seq); /* now if its not a duplicate we process it */ if (asoc->str_reset_seq_in == seq) { len = ntohs(req->ph.param_length); number_entries = ((len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t)); /* * the sender is resetting, handle the list issue.. we must * a) verify if we can do the reset, if so no problem b) If * we can't do the reset we must copy the request. c) queue * it, and setup the data in processor to trigger it off * when needed and dequeue all the queued data. */ tsn = ntohl(req->send_reset_at_tsn); /* move the reset action back one */ asoc->last_reset_action[1] = asoc->last_reset_action[0]; if (!(asoc->local_strreset_support & SCTP_ENABLE_RESET_STREAM_REQ)) { asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } else if (trunc) { asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } else if (SCTP_TSN_GE(asoc->cumulative_tsn, tsn)) { /* we can do it now */ sctp_reset_in_stream(stcb, number_entries, req->list_of_streams); asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED; } else { /* * we must queue it up and thus wait for the TSN's * to arrive that are at or before tsn */ struct sctp_stream_reset_list *liste; int siz; siz = sizeof(struct sctp_stream_reset_list) + (number_entries * sizeof(uint16_t)); SCTP_MALLOC(liste, struct sctp_stream_reset_list *, siz, SCTP_M_STRESET); if (liste == NULL) { /* gak out of memory */ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); return; } liste->seq = seq; liste->tsn = tsn; liste->number_entries = number_entries; memcpy(&liste->list_of_streams, req->list_of_streams, number_entries * sizeof(uint16_t)); TAILQ_INSERT_TAIL(&asoc->resetHead, liste, next_resp); asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_IN_PROGRESS; } sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); asoc->str_reset_seq_in++; } else if ((asoc->str_reset_seq_in - 1) == seq) { /* * one seq back, just echo back last action since my * response was lost. */ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); } else if ((asoc->str_reset_seq_in - 2) == seq) { /* * two seq back, just echo back last action since my * response was lost. */ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]); } else { sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO); } } static void sctp_handle_str_reset_add_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk, struct sctp_stream_reset_add_strm *str_add) { /* * Peer is requesting to add more streams. If its within our * max-streams we will allow it. */ uint32_t num_stream, i; uint32_t seq; struct sctp_association *asoc = &stcb->asoc; struct sctp_queued_to_read *ctl, *nctl; /* Get the number. */ seq = ntohl(str_add->request_seq); num_stream = ntohs(str_add->number_of_streams); /* Now what would be the new total? */ if (asoc->str_reset_seq_in == seq) { num_stream += stcb->asoc.streamincnt; stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0]; if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) { asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } else if ((num_stream > stcb->asoc.max_inbound_streams) || (num_stream > 0xffff)) { /* We must reject it they ask for to many */ denied: stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } else { /* Ok, we can do that :-) */ struct sctp_stream_in *oldstrm; /* save off the old */ oldstrm = stcb->asoc.strmin; SCTP_MALLOC(stcb->asoc.strmin, struct sctp_stream_in *, (num_stream * sizeof(struct sctp_stream_in)), SCTP_M_STRMI); if (stcb->asoc.strmin == NULL) { stcb->asoc.strmin = oldstrm; goto denied; } /* copy off the old data */ for (i = 0; i < stcb->asoc.streamincnt; i++) { TAILQ_INIT(&stcb->asoc.strmin[i].inqueue); TAILQ_INIT(&stcb->asoc.strmin[i].uno_inqueue); stcb->asoc.strmin[i].sid = i; stcb->asoc.strmin[i].last_mid_delivered = oldstrm[i].last_mid_delivered; stcb->asoc.strmin[i].delivery_started = oldstrm[i].delivery_started; stcb->asoc.strmin[i].pd_api_started = oldstrm[i].pd_api_started; /* now anything on those queues? */ TAILQ_FOREACH_SAFE(ctl, &oldstrm[i].inqueue, next_instrm, nctl) { TAILQ_REMOVE(&oldstrm[i].inqueue, ctl, next_instrm); TAILQ_INSERT_TAIL(&stcb->asoc.strmin[i].inqueue, ctl, next_instrm); } TAILQ_FOREACH_SAFE(ctl, &oldstrm[i].uno_inqueue, next_instrm, nctl) { TAILQ_REMOVE(&oldstrm[i].uno_inqueue, ctl, next_instrm); TAILQ_INSERT_TAIL(&stcb->asoc.strmin[i].uno_inqueue, ctl, next_instrm); } } /* Init the new streams */ for (i = stcb->asoc.streamincnt; i < num_stream; i++) { TAILQ_INIT(&stcb->asoc.strmin[i].inqueue); TAILQ_INIT(&stcb->asoc.strmin[i].uno_inqueue); stcb->asoc.strmin[i].sid = i; stcb->asoc.strmin[i].last_mid_delivered = 0xffffffff; stcb->asoc.strmin[i].pd_api_started = 0; stcb->asoc.strmin[i].delivery_started = 0; } SCTP_FREE(oldstrm, SCTP_M_STRMI); /* update the size */ stcb->asoc.streamincnt = num_stream; stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED; sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, 0); } sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); asoc->str_reset_seq_in++; } else if ((asoc->str_reset_seq_in - 1) == seq) { /* * one seq back, just echo back last action since my * response was lost. */ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); } else if ((asoc->str_reset_seq_in - 2) == seq) { /* * two seq back, just echo back last action since my * response was lost. */ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]); } else { sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO); } } static void sctp_handle_str_reset_add_out_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk, struct sctp_stream_reset_add_strm *str_add) { /* * Peer is requesting to add more streams. If its within our * max-streams we will allow it. */ uint16_t num_stream; uint32_t seq; struct sctp_association *asoc = &stcb->asoc; /* Get the number. */ seq = ntohl(str_add->request_seq); num_stream = ntohs(str_add->number_of_streams); /* Now what would be the new total? */ if (asoc->str_reset_seq_in == seq) { stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0]; if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) { asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } else if (stcb->asoc.stream_reset_outstanding) { /* We must reject it we have something pending */ stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS; } else { /* Ok, we can do that :-) */ int mychk; mychk = stcb->asoc.streamoutcnt; mychk += num_stream; if (mychk < 0x10000) { stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED; if (sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 1, num_stream, 0, 1)) { stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } } else { stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED; } } sctp_add_stream_reset_result(chk, seq, stcb->asoc.last_reset_action[0]); asoc->str_reset_seq_in++; } else if ((asoc->str_reset_seq_in - 1) == seq) { /* * one seq back, just echo back last action since my * response was lost. */ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]); } else if ((asoc->str_reset_seq_in - 2) == seq) { /* * two seq back, just echo back last action since my * response was lost. */ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]); } else { sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO); } } #ifdef __GNUC__ __attribute__((noinline)) #endif static int sctp_handle_stream_reset(struct sctp_tcb *stcb, struct mbuf *m, int offset, struct sctp_chunkhdr *ch_req) { uint16_t remaining_length, param_len, ptype; struct sctp_paramhdr pstore; uint8_t cstore[SCTP_CHUNK_BUFFER_SIZE]; uint32_t seq = 0; int num_req = 0; int trunc = 0; struct sctp_tmit_chunk *chk; struct sctp_chunkhdr *ch; struct sctp_paramhdr *ph; int ret_code = 0; int num_param = 0; /* now it may be a reset or a reset-response */ remaining_length = ntohs(ch_req->chunk_length) - sizeof(struct sctp_chunkhdr); /* setup for adding the response */ sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { return (ret_code); } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_STREAM_RESET; chk->rec.chunk_id.can_take_data = 0; chk->flags = 0; chk->asoc = &stcb->asoc; chk->no_fr_allowed = 0; chk->book_size = chk->send_size = sizeof(struct sctp_chunkhdr); chk->book_size_scale = 0; chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA); if (chk->data == NULL) { strres_nochunk: if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return (ret_code); } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); /* setup chunk parameters */ chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->whoTo = NULL; ch = mtod(chk->data, struct sctp_chunkhdr *); ch->chunk_type = SCTP_STREAM_RESET; ch->chunk_flags = 0; ch->chunk_length = htons(chk->send_size); SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size); offset += sizeof(struct sctp_chunkhdr); while (remaining_length >= sizeof(struct sctp_paramhdr)) { ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, sizeof(pstore), (uint8_t *)&pstore); if (ph == NULL) { /* TSNH */ break; } param_len = ntohs(ph->param_length); if ((param_len > remaining_length) || (param_len < (sizeof(struct sctp_paramhdr) + sizeof(uint32_t)))) { /* bad parameter length */ break; } ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, min(param_len, sizeof(cstore)), (uint8_t *)&cstore); if (ph == NULL) { /* TSNH */ break; } ptype = ntohs(ph->param_type); num_param++; if (param_len > sizeof(cstore)) { trunc = 1; } else { trunc = 0; } if (num_param > SCTP_MAX_RESET_PARAMS) { /* hit the max of parameters already sorry.. */ break; } if (ptype == SCTP_STR_RESET_OUT_REQUEST) { struct sctp_stream_reset_out_request *req_out; if (param_len < sizeof(struct sctp_stream_reset_out_request)) { break; } req_out = (struct sctp_stream_reset_out_request *)ph; num_req++; if (stcb->asoc.stream_reset_outstanding) { seq = ntohl(req_out->response_seq); if (seq == stcb->asoc.str_reset_seq_out) { /* implicit ack */ (void)sctp_handle_stream_reset_response(stcb, seq, SCTP_STREAM_RESET_RESULT_PERFORMED, NULL); } } sctp_handle_str_reset_request_out(stcb, chk, req_out, trunc); } else if (ptype == SCTP_STR_RESET_ADD_OUT_STREAMS) { struct sctp_stream_reset_add_strm *str_add; if (param_len < sizeof(struct sctp_stream_reset_add_strm)) { break; } str_add = (struct sctp_stream_reset_add_strm *)ph; num_req++; sctp_handle_str_reset_add_strm(stcb, chk, str_add); } else if (ptype == SCTP_STR_RESET_ADD_IN_STREAMS) { struct sctp_stream_reset_add_strm *str_add; if (param_len < sizeof(struct sctp_stream_reset_add_strm)) { break; } str_add = (struct sctp_stream_reset_add_strm *)ph; num_req++; sctp_handle_str_reset_add_out_strm(stcb, chk, str_add); } else if (ptype == SCTP_STR_RESET_IN_REQUEST) { struct sctp_stream_reset_in_request *req_in; num_req++; req_in = (struct sctp_stream_reset_in_request *)ph; sctp_handle_str_reset_request_in(stcb, chk, req_in, trunc); } else if (ptype == SCTP_STR_RESET_TSN_REQUEST) { struct sctp_stream_reset_tsn_request *req_tsn; num_req++; req_tsn = (struct sctp_stream_reset_tsn_request *)ph; if (sctp_handle_str_reset_request_tsn(stcb, chk, req_tsn)) { ret_code = 1; goto strres_nochunk; } /* no more */ break; } else if (ptype == SCTP_STR_RESET_RESPONSE) { struct sctp_stream_reset_response *resp; uint32_t result; if (param_len < sizeof(struct sctp_stream_reset_response)) { break; } resp = (struct sctp_stream_reset_response *)ph; seq = ntohl(resp->response_seq); result = ntohl(resp->result); if (sctp_handle_stream_reset_response(stcb, seq, result, resp)) { ret_code = 1; goto strres_nochunk; } } else { break; } offset += SCTP_SIZE32(param_len); if (remaining_length >= SCTP_SIZE32(param_len)) { remaining_length -= SCTP_SIZE32(param_len); } else { remaining_length = 0; } } if (num_req == 0) { /* we have no response free the stuff */ goto strres_nochunk; } /* ok we have a chunk to link in */ TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next); stcb->asoc.ctrl_queue_cnt++; return (ret_code); } /* * Handle a router or endpoints report of a packet loss, there are two ways * to handle this, either we get the whole packet and must disect it * ourselves (possibly with truncation and or corruption) or it is a summary * from a middle box that did the disectting for us. */ static void sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp, struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t limit) { uint32_t bottle_bw, on_queue; uint16_t trunc_len; unsigned int chlen; unsigned int at; struct sctp_chunk_desc desc; struct sctp_chunkhdr *ch; chlen = ntohs(cp->ch.chunk_length); chlen -= sizeof(struct sctp_pktdrop_chunk); /* XXX possible chlen underflow */ if (chlen == 0) { ch = NULL; if (cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX) SCTP_STAT_INCR(sctps_pdrpbwrpt); } else { ch = (struct sctp_chunkhdr *)(cp->data + sizeof(struct sctphdr)); chlen -= sizeof(struct sctphdr); /* XXX possible chlen underflow */ memset(&desc, 0, sizeof(desc)); } trunc_len = (uint16_t)ntohs(cp->trunc_len); if (trunc_len > limit) { trunc_len = limit; } /* now the chunks themselves */ while ((ch != NULL) && (chlen >= sizeof(struct sctp_chunkhdr))) { desc.chunk_type = ch->chunk_type; /* get amount we need to move */ at = ntohs(ch->chunk_length); if (at < sizeof(struct sctp_chunkhdr)) { /* corrupt chunk, maybe at the end? */ SCTP_STAT_INCR(sctps_pdrpcrupt); break; } if (trunc_len == 0) { /* we are supposed to have all of it */ if (at > chlen) { /* corrupt skip it */ SCTP_STAT_INCR(sctps_pdrpcrupt); break; } } else { /* is there enough of it left ? */ if (desc.chunk_type == SCTP_DATA) { if (chlen < (sizeof(struct sctp_data_chunk) + sizeof(desc.data_bytes))) { break; } } else { if (chlen < sizeof(struct sctp_chunkhdr)) { break; } } } if (desc.chunk_type == SCTP_DATA) { /* can we get out the tsn? */ if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX)) SCTP_STAT_INCR(sctps_pdrpmbda); if (chlen >= (sizeof(struct sctp_data_chunk) + sizeof(uint32_t))) { /* yep */ struct sctp_data_chunk *dcp; uint8_t *ddp; unsigned int iii; dcp = (struct sctp_data_chunk *)ch; ddp = (uint8_t *)(dcp + 1); for (iii = 0; iii < sizeof(desc.data_bytes); iii++) { desc.data_bytes[iii] = ddp[iii]; } desc.tsn_ifany = dcp->dp.tsn; } else { /* nope we are done. */ SCTP_STAT_INCR(sctps_pdrpnedat); break; } } else { if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX)) SCTP_STAT_INCR(sctps_pdrpmbct); } if (process_chunk_drop(stcb, &desc, net, cp->ch.chunk_flags)) { SCTP_STAT_INCR(sctps_pdrppdbrk); break; } if (SCTP_SIZE32(at) > chlen) { break; } chlen -= SCTP_SIZE32(at); if (chlen < sizeof(struct sctp_chunkhdr)) { /* done, none left */ break; } ch = (struct sctp_chunkhdr *)((caddr_t)ch + SCTP_SIZE32(at)); } /* Now update any rwnd --- possibly */ if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX) == 0) { /* From a peer, we get a rwnd report */ uint32_t a_rwnd; SCTP_STAT_INCR(sctps_pdrpfehos); bottle_bw = ntohl(cp->bottle_bw); on_queue = ntohl(cp->current_onq); if (bottle_bw && on_queue) { /* a rwnd report is in here */ if (bottle_bw > on_queue) a_rwnd = bottle_bw - on_queue; else a_rwnd = 0; if (a_rwnd == 0) stcb->asoc.peers_rwnd = 0; else { if (a_rwnd > stcb->asoc.total_flight) { stcb->asoc.peers_rwnd = a_rwnd - stcb->asoc.total_flight; } else { stcb->asoc.peers_rwnd = 0; } if (stcb->asoc.peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) { /* SWS sender side engages */ stcb->asoc.peers_rwnd = 0; } } } } else { SCTP_STAT_INCR(sctps_pdrpfmbox); } /* now middle boxes in sat networks get a cwnd bump */ if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX) && (stcb->asoc.sat_t3_loss_recovery == 0) && (stcb->asoc.sat_network)) { /* * This is debatable but for sat networks it makes sense * Note if a T3 timer has went off, we will prohibit any * changes to cwnd until we exit the t3 loss recovery. */ stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped(stcb, net, cp, &bottle_bw, &on_queue); } } /* * handles all control chunks in a packet inputs: - m: mbuf chain, assumed to * still contain IP/SCTP header - stcb: is the tcb found for this packet - * offset: offset into the mbuf chain to first chunkhdr - length: is the * length of the complete packet outputs: - length: modified to remaining * length after control processing - netp: modified to new sctp_nets after * cookie-echo processing - return NULL to discard the packet (ie. no asoc, * bad packet,...) otherwise return the tcb for this packet */ #ifdef __GNUC__ __attribute__((noinline)) #endif static struct sctp_tcb * sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets **netp, int *fwd_tsn_seen, uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum, uint32_t vrf_id, uint16_t port) { struct sctp_association *asoc; struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; uint32_t vtag_in; int num_chunks = 0; /* number of control chunks processed */ uint32_t chk_length; int ret; int abort_no_unlock = 0; int ecne_seen = 0; /* * How big should this be, and should it be alloc'd? Lets try the * d-mtu-ceiling for now (2k) and that should hopefully work ... * until we get into jumbo grams and such.. */ uint8_t chunk_buf[SCTP_CHUNK_BUFFER_SIZE]; struct sctp_tcb *locked_tcb = stcb; int got_auth = 0; uint32_t auth_offset = 0, auth_len = 0; int auth_skipped = 0; int asconf_cnt = 0; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_process_control: iphlen=%u, offset=%u, length=%u stcb:%p\n", iphlen, *offset, length, (void *)stcb); /* validate chunk header length... */ if (ntohs(ch->chunk_length) < sizeof(*ch)) { SCTPDBG(SCTP_DEBUG_INPUT1, "Invalid header length %d\n", ntohs(ch->chunk_length)); if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } /* * validate the verification tag */ vtag_in = ntohl(sh->v_tag); if (locked_tcb) { SCTP_TCB_LOCK_ASSERT(locked_tcb); } if (ch->chunk_type == SCTP_INITIATION) { SCTPDBG(SCTP_DEBUG_INPUT1, "Its an INIT of len:%d vtag:%x\n", ntohs(ch->chunk_length), vtag_in); if (vtag_in != 0) { /* protocol error- silently discard... */ SCTP_STAT_INCR(sctps_badvtag); if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } } else if (ch->chunk_type != SCTP_COOKIE_ECHO) { /* * If there is no stcb, skip the AUTH chunk and process * later after a stcb is found (to validate the lookup was * valid. */ if ((ch->chunk_type == SCTP_AUTHENTICATION) && (stcb == NULL) && (inp->auth_supported == 1)) { /* save this chunk for later processing */ auth_skipped = 1; auth_offset = *offset; auth_len = ntohs(ch->chunk_length); /* (temporarily) move past this chunk */ *offset += SCTP_SIZE32(auth_len); if (*offset >= length) { /* no more data left in the mbuf chain */ *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset, sizeof(struct sctp_chunkhdr), chunk_buf); } if (ch == NULL) { /* Help */ *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } if (ch->chunk_type == SCTP_COOKIE_ECHO) { goto process_control_chunks; } /* * first check if it's an ASCONF with an unknown src addr we * need to look inside to find the association */ if (ch->chunk_type == SCTP_ASCONF && stcb == NULL) { struct sctp_chunkhdr *asconf_ch = ch; uint32_t asconf_offset = 0, asconf_len = 0; /* inp's refcount may be reduced */ SCTP_INP_INCR_REF(inp); asconf_offset = *offset; do { asconf_len = ntohs(asconf_ch->chunk_length); if (asconf_len < sizeof(struct sctp_asconf_paramhdr)) break; stcb = sctp_findassociation_ep_asconf(m, *offset, dst, sh, &inp, netp, vrf_id); if (stcb != NULL) break; asconf_offset += SCTP_SIZE32(asconf_len); asconf_ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, asconf_offset, sizeof(struct sctp_chunkhdr), chunk_buf); } while (asconf_ch != NULL && asconf_ch->chunk_type == SCTP_ASCONF); if (stcb == NULL) { /* * reduce inp's refcount if not reduced in * sctp_findassociation_ep_asconf(). */ SCTP_INP_DECR_REF(inp); } else { locked_tcb = stcb; } /* now go back and verify any auth chunk to be sure */ if (auth_skipped && (stcb != NULL)) { struct sctp_auth_chunk *auth; auth = (struct sctp_auth_chunk *) sctp_m_getptr(m, auth_offset, auth_len, chunk_buf); got_auth = 1; auth_skipped = 0; if ((auth == NULL) || sctp_handle_auth(stcb, auth, m, auth_offset)) { /* auth HMAC failed so dump it */ *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } else { /* remaining chunks are HMAC checked */ stcb->asoc.authenticated = 1; } } } if (stcb == NULL) { snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__); op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), msg); /* no association, so it's out of the blue... */ sctp_handle_ootb(m, iphlen, *offset, src, dst, sh, inp, op_err, mflowtype, mflowid, inp->fibnum, vrf_id, port); *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } asoc = &stcb->asoc; /* ABORT and SHUTDOWN can use either v_tag... */ if ((ch->chunk_type == SCTP_ABORT_ASSOCIATION) || (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) || (ch->chunk_type == SCTP_PACKET_DROPPED)) { /* Take the T-bit always into account. */ if ((((ch->chunk_flags & SCTP_HAD_NO_TCB) == 0) && (vtag_in == asoc->my_vtag)) || (((ch->chunk_flags & SCTP_HAD_NO_TCB) == SCTP_HAD_NO_TCB) && (vtag_in == asoc->peer_vtag))) { /* this is valid */ } else { /* drop this packet... */ SCTP_STAT_INCR(sctps_badvtag); if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } } else if (ch->chunk_type == SCTP_SHUTDOWN_ACK) { if (vtag_in != asoc->my_vtag) { /* * this could be a stale SHUTDOWN-ACK or the * peer never got the SHUTDOWN-COMPLETE and * is still hung; we have started a new asoc * but it won't complete until the shutdown * is completed */ if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__); op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), msg); sctp_handle_ootb(m, iphlen, *offset, src, dst, sh, inp, op_err, mflowtype, mflowid, fibnum, vrf_id, port); return (NULL); } } else { /* for all other chunks, vtag must match */ if (vtag_in != asoc->my_vtag) { /* invalid vtag... */ SCTPDBG(SCTP_DEBUG_INPUT3, "invalid vtag: %xh, expect %xh\n", vtag_in, asoc->my_vtag); SCTP_STAT_INCR(sctps_badvtag); if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } *offset = length; return (NULL); } } } /* end if !SCTP_COOKIE_ECHO */ /* * process all control chunks... */ if (((ch->chunk_type == SCTP_SELECTIVE_ACK) || (ch->chunk_type == SCTP_NR_SELECTIVE_ACK) || (ch->chunk_type == SCTP_HEARTBEAT_REQUEST)) && (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) { /* implied cookie-ack.. we must have lost the ack */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb, *netp); } process_control_chunks: while (IS_SCTP_CONTROL(ch)) { /* validate chunk length */ chk_length = ntohs(ch->chunk_length); SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_process_control: processing a chunk type=%u, len=%u\n", ch->chunk_type, chk_length); SCTP_LTRACE_CHK(inp, stcb, ch->chunk_type, chk_length); if (chk_length < sizeof(*ch) || (*offset + (int)chk_length) > length) { *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } SCTP_STAT_INCR_COUNTER64(sctps_incontrolchunks); /* * INIT-ACK only gets the init ack "header" portion only * because we don't have to process the peer's COOKIE. All * others get a complete chunk. */ if ((ch->chunk_type == SCTP_INITIATION_ACK) || (ch->chunk_type == SCTP_INITIATION)) { /* get an init-ack chunk */ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset, sizeof(struct sctp_init_ack_chunk), chunk_buf); if (ch == NULL) { *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } } else { /* For cookies and all other chunks. */ if (chk_length > sizeof(chunk_buf)) { /* * use just the size of the chunk buffer so * the front part of our chunks fit in * contiguous space up to the chunk buffer * size (508 bytes). For chunks that need to * get more than that they must use the * sctp_m_getptr() function or other means * (e.g. know how to parse mbuf chains). * Cookies do this already. */ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset, (sizeof(chunk_buf) - 4), chunk_buf); if (ch == NULL) { *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } } else { /* We can fit it all */ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset, chk_length, chunk_buf); if (ch == NULL) { SCTP_PRINTF("sctp_process_control: Can't get the all data....\n"); *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } } } num_chunks++; /* Save off the last place we got a control from */ if (stcb != NULL) { if (((netp != NULL) && (*netp != NULL)) || (ch->chunk_type == SCTP_ASCONF)) { /* * allow last_control to be NULL if * ASCONF... ASCONF processing will find the * right net later */ if ((netp != NULL) && (*netp != NULL)) stcb->asoc.last_control_chunk_from = *netp; } } #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xB0, ch->chunk_type); #endif /* check to see if this chunk required auth, but isn't */ if ((stcb != NULL) && (stcb->asoc.auth_supported == 1) && sctp_auth_is_required_chunk(ch->chunk_type, stcb->asoc.local_auth_chunks) && !stcb->asoc.authenticated) { /* "silently" ignore */ SCTP_STAT_INCR(sctps_recvauthmissing); goto next_chunk; } switch (ch->chunk_type) { case SCTP_INITIATION: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT\n"); /* The INIT chunk must be the only chunk. */ if ((num_chunks > 1) || (length - *offset > (int)SCTP_SIZE32(chk_length))) { /* RFC 4960 requires that no ABORT is sent */ *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } /* Honor our resource limit. */ if (chk_length > SCTP_LARGEST_INIT_ACCEPTED) { op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, ""); sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, port); *offset = length; return (NULL); } sctp_handle_init(m, iphlen, *offset, src, dst, sh, (struct sctp_init_chunk *)ch, inp, stcb, *netp, &abort_no_unlock, mflowtype, mflowid, vrf_id, port); *offset = length; if ((!abort_no_unlock) && (locked_tcb)) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); break; case SCTP_PAD_CHUNK: break; case SCTP_INITIATION_ACK: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT-ACK\n"); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* We are not interested anymore */ if ((stcb) && (stcb->asoc.total_output_queue_size)) { ; } else { if ((locked_tcb != NULL) && (locked_tcb != stcb)) { /* Very unlikely */ SCTP_TCB_UNLOCK(locked_tcb); } *offset = length; if (stcb) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(inp); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } return (NULL); } } /* The INIT-ACK chunk must be the only chunk. */ if ((num_chunks > 1) || (length - *offset > (int)SCTP_SIZE32(chk_length))) { *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } if ((netp) && (*netp)) { ret = sctp_handle_init_ack(m, iphlen, *offset, src, dst, sh, (struct sctp_init_ack_chunk *)ch, stcb, *netp, &abort_no_unlock, mflowtype, mflowid, vrf_id); } else { ret = -1; } *offset = length; if (abort_no_unlock) { return (NULL); } /* * Special case, I must call the output routine to * get the cookie echoed */ if ((stcb != NULL) && (ret == 0)) { sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED); } if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); break; case SCTP_SELECTIVE_ACK: { struct sctp_sack_chunk *sack; int abort_now = 0; uint32_t a_rwnd, cum_ack; uint16_t num_seg, num_dup; uint8_t flags; int offset_seg, offset_dup; SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SACK\n"); SCTP_STAT_INCR(sctps_recvsacks); if (stcb == NULL) { SCTPDBG(SCTP_DEBUG_INDATA1, "No stcb when processing SACK chunk\n"); break; } if (chk_length < sizeof(struct sctp_sack_chunk)) { SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size on SACK chunk, too small\n"); break; } if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) { /*- * If we have sent a shutdown-ack, we will pay no * attention to a sack sent in to us since * we don't care anymore. */ break; } sack = (struct sctp_sack_chunk *)ch; flags = ch->chunk_flags; cum_ack = ntohl(sack->sack.cum_tsn_ack); num_seg = ntohs(sack->sack.num_gap_ack_blks); num_dup = ntohs(sack->sack.num_dup_tsns); a_rwnd = (uint32_t)ntohl(sack->sack.a_rwnd); if (sizeof(struct sctp_sack_chunk) + num_seg * sizeof(struct sctp_gap_ack_block) + num_dup * sizeof(uint32_t) != chk_length) { SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size of SACK chunk\n"); break; } offset_seg = *offset + sizeof(struct sctp_sack_chunk); offset_dup = offset_seg + num_seg * sizeof(struct sctp_gap_ack_block); SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SACK process cum_ack:%x num_seg:%d a_rwnd:%d\n", cum_ack, num_seg, a_rwnd); stcb->asoc.seen_a_sack_this_pkt = 1; if ((stcb->asoc.pr_sctp_cnt == 0) && (num_seg == 0) && SCTP_TSN_GE(cum_ack, stcb->asoc.last_acked_seq) && (stcb->asoc.saw_sack_with_frags == 0) && (stcb->asoc.saw_sack_with_nr_frags == 0) && (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) ) { /* * We have a SIMPLE sack having no * prior segments and data on sent * queue to be acked.. Use the * faster path sack processing. We * also allow window update sacks * with no missing segments to go * this way too. */ sctp_express_handle_sack(stcb, cum_ack, a_rwnd, &abort_now, ecne_seen); } else { if (netp && *netp) sctp_handle_sack(m, offset_seg, offset_dup, stcb, num_seg, 0, num_dup, &abort_now, flags, cum_ack, a_rwnd, ecne_seen); } if (abort_now) { /* ABORT signal from sack processing */ *offset = length; return (NULL); } if (TAILQ_EMPTY(&stcb->asoc.send_queue) && TAILQ_EMPTY(&stcb->asoc.sent_queue) && (stcb->asoc.stream_queue_cnt == 0)) { sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_NOT_LOCKED); } } break; /* * EY - nr_sack: If the received chunk is an * nr_sack chunk */ case SCTP_NR_SELECTIVE_ACK: { struct sctp_nr_sack_chunk *nr_sack; int abort_now = 0; uint32_t a_rwnd, cum_ack; uint16_t num_seg, num_nr_seg, num_dup; uint8_t flags; int offset_seg, offset_dup; SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_NR_SACK\n"); SCTP_STAT_INCR(sctps_recvsacks); if (stcb == NULL) { SCTPDBG(SCTP_DEBUG_INDATA1, "No stcb when processing NR-SACK chunk\n"); break; } if (stcb->asoc.nrsack_supported == 0) { goto unknown_chunk; } if (chk_length < sizeof(struct sctp_nr_sack_chunk)) { SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size on NR-SACK chunk, too small\n"); break; } if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) { /*- * If we have sent a shutdown-ack, we will pay no * attention to a sack sent in to us since * we don't care anymore. */ break; } nr_sack = (struct sctp_nr_sack_chunk *)ch; flags = ch->chunk_flags; cum_ack = ntohl(nr_sack->nr_sack.cum_tsn_ack); num_seg = ntohs(nr_sack->nr_sack.num_gap_ack_blks); num_nr_seg = ntohs(nr_sack->nr_sack.num_nr_gap_ack_blks); num_dup = ntohs(nr_sack->nr_sack.num_dup_tsns); a_rwnd = (uint32_t)ntohl(nr_sack->nr_sack.a_rwnd); if (sizeof(struct sctp_nr_sack_chunk) + (num_seg + num_nr_seg) * sizeof(struct sctp_gap_ack_block) + num_dup * sizeof(uint32_t) != chk_length) { SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size of NR_SACK chunk\n"); break; } offset_seg = *offset + sizeof(struct sctp_nr_sack_chunk); offset_dup = offset_seg + num_seg * sizeof(struct sctp_gap_ack_block); SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_NR_SACK process cum_ack:%x num_seg:%d a_rwnd:%d\n", cum_ack, num_seg, a_rwnd); stcb->asoc.seen_a_sack_this_pkt = 1; if ((stcb->asoc.pr_sctp_cnt == 0) && (num_seg == 0) && (num_nr_seg == 0) && SCTP_TSN_GE(cum_ack, stcb->asoc.last_acked_seq) && (stcb->asoc.saw_sack_with_frags == 0) && (stcb->asoc.saw_sack_with_nr_frags == 0) && (!TAILQ_EMPTY(&stcb->asoc.sent_queue))) { /* * We have a SIMPLE sack having no * prior segments and data on sent * queue to be acked. Use the faster * path sack processing. We also * allow window update sacks with no * missing segments to go this way * too. */ sctp_express_handle_sack(stcb, cum_ack, a_rwnd, &abort_now, ecne_seen); } else { if (netp && *netp) sctp_handle_sack(m, offset_seg, offset_dup, stcb, num_seg, num_nr_seg, num_dup, &abort_now, flags, cum_ack, a_rwnd, ecne_seen); } if (abort_now) { /* ABORT signal from sack processing */ *offset = length; return (NULL); } if (TAILQ_EMPTY(&stcb->asoc.send_queue) && TAILQ_EMPTY(&stcb->asoc.sent_queue) && (stcb->asoc.stream_queue_cnt == 0)) { sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_NOT_LOCKED); } } break; case SCTP_HEARTBEAT_REQUEST: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_HEARTBEAT\n"); if ((stcb) && netp && *netp) { SCTP_STAT_INCR(sctps_recvheartbeat); sctp_send_heartbeat_ack(stcb, m, *offset, chk_length, *netp); /* He's alive so give him credit */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; } break; case SCTP_HEARTBEAT_ACK: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_HEARTBEAT-ACK\n"); if ((stcb == NULL) || (chk_length != sizeof(struct sctp_heartbeat_chunk))) { /* Its not ours */ *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } /* He's alive so give him credit */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; SCTP_STAT_INCR(sctps_recvheartbeatack); if (netp && *netp) sctp_handle_heartbeat_ack((struct sctp_heartbeat_chunk *)ch, stcb, *netp); break; case SCTP_ABORT_ASSOCIATION: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ABORT, stcb %p\n", (void *)stcb); if ((stcb) && netp && *netp) sctp_handle_abort((struct sctp_abort_chunk *)ch, stcb, *netp); *offset = length; return (NULL); break; case SCTP_SHUTDOWN: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN, stcb %p\n", (void *)stcb); if ((stcb == NULL) || (chk_length != sizeof(struct sctp_shutdown_chunk))) { *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } if (netp && *netp) { int abort_flag = 0; sctp_handle_shutdown((struct sctp_shutdown_chunk *)ch, stcb, *netp, &abort_flag); if (abort_flag) { *offset = length; return (NULL); } } break; case SCTP_SHUTDOWN_ACK: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-ACK, stcb %p\n", (void *)stcb); if ((stcb) && (netp) && (*netp)) sctp_handle_shutdown_ack((struct sctp_shutdown_ack_chunk *)ch, stcb, *netp); *offset = length; return (NULL); break; case SCTP_OPERATION_ERROR: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_OP-ERR\n"); if ((stcb) && netp && *netp && sctp_handle_error(ch, stcb, *netp) < 0) { *offset = length; return (NULL); } break; case SCTP_COOKIE_ECHO: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ECHO, stcb %p\n", (void *)stcb); if ((stcb) && (stcb->asoc.total_output_queue_size)) { ; } else { if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* We are not interested anymore */ abend: if (stcb) { SCTP_TCB_UNLOCK(stcb); } *offset = length; return (NULL); } } /* * First are we accepting? We do this again here * since it is possible that a previous endpoint WAS * listening responded to a INIT-ACK and then * closed. We opened and bound.. and are now no * longer listening. */ if ((stcb == NULL) && (inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) { if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && (SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) { op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, ""); sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err, mflowtype, mflowid, vrf_id, port); } *offset = length; return (NULL); } else { struct mbuf *ret_buf; struct sctp_inpcb *linp; if (stcb) { linp = NULL; } else { linp = inp; } if (linp) { SCTP_ASOC_CREATE_LOCK(linp); if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) { SCTP_ASOC_CREATE_UNLOCK(linp); goto abend; } } if (netp) { ret_buf = sctp_handle_cookie_echo(m, iphlen, *offset, src, dst, sh, (struct sctp_cookie_echo_chunk *)ch, &inp, &stcb, netp, auth_skipped, auth_offset, auth_len, &locked_tcb, mflowtype, mflowid, vrf_id, port); } else { ret_buf = NULL; } if (linp) { SCTP_ASOC_CREATE_UNLOCK(linp); } if (ret_buf == NULL) { if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } SCTPDBG(SCTP_DEBUG_INPUT3, "GAK, null buffer\n"); *offset = length; return (NULL); } /* if AUTH skipped, see if it verified... */ if (auth_skipped) { got_auth = 1; auth_skipped = 0; } if (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) { /* * Restart the timer if we have * pending data */ struct sctp_tmit_chunk *chk; chk = TAILQ_FIRST(&stcb->asoc.sent_queue); sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, chk->whoTo); } } break; case SCTP_COOKIE_ACK: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ACK, stcb %p\n", (void *)stcb); if ((stcb == NULL) || chk_length != sizeof(struct sctp_cookie_ack_chunk)) { if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* We are not interested anymore */ if ((stcb) && (stcb->asoc.total_output_queue_size)) { ; } else if (stcb) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(inp); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_30); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif *offset = length; return (NULL); } } /* He's alive so give him credit */ if ((stcb) && netp && *netp) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb, *netp); } break; case SCTP_ECN_ECHO: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ECN-ECHO\n"); /* He's alive so give him credit */ if ((stcb == NULL) || (chk_length != sizeof(struct sctp_ecne_chunk))) { /* Its not ours */ if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } *offset = length; return (NULL); } if (stcb) { if (stcb->asoc.ecn_supported == 0) { goto unknown_chunk; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; sctp_handle_ecn_echo((struct sctp_ecne_chunk *)ch, stcb); ecne_seen = 1; } break; case SCTP_ECN_CWR: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ECN-CWR\n"); /* He's alive so give him credit */ if ((stcb == NULL) || (chk_length != sizeof(struct sctp_cwr_chunk))) { /* Its not ours */ if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } *offset = length; return (NULL); } if (stcb) { if (stcb->asoc.ecn_supported == 0) { goto unknown_chunk; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; sctp_handle_ecn_cwr((struct sctp_cwr_chunk *)ch, stcb, *netp); } break; case SCTP_SHUTDOWN_COMPLETE: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-COMPLETE, stcb %p\n", (void *)stcb); /* must be first and only chunk */ if ((num_chunks > 1) || (length - *offset > (int)SCTP_SIZE32(chk_length))) { *offset = length; if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } return (NULL); } if ((stcb) && netp && *netp) { sctp_handle_shutdown_complete((struct sctp_shutdown_complete_chunk *)ch, stcb, *netp); } *offset = length; return (NULL); break; case SCTP_ASCONF: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF\n"); /* He's alive so give him credit */ if (stcb) { if (stcb->asoc.asconf_supported == 0) { goto unknown_chunk; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; sctp_handle_asconf(m, *offset, src, (struct sctp_asconf_chunk *)ch, stcb, asconf_cnt == 0); asconf_cnt++; } break; case SCTP_ASCONF_ACK: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF-ACK\n"); if (chk_length < sizeof(struct sctp_asconf_ack_chunk)) { /* Its not ours */ if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } *offset = length; return (NULL); } if ((stcb) && netp && *netp) { if (stcb->asoc.asconf_supported == 0) { goto unknown_chunk; } /* He's alive so give him credit */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; sctp_handle_asconf_ack(m, *offset, (struct sctp_asconf_ack_chunk *)ch, stcb, *netp, &abort_no_unlock); if (abort_no_unlock) return (NULL); } break; case SCTP_FORWARD_CUM_TSN: case SCTP_IFORWARD_CUM_TSN: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_FWD-TSN\n"); if (chk_length < sizeof(struct sctp_forward_tsn_chunk)) { /* Its not ours */ if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } *offset = length; return (NULL); } /* He's alive so give him credit */ if (stcb) { int abort_flag = 0; if (stcb->asoc.prsctp_supported == 0) { goto unknown_chunk; } stcb->asoc.overall_error_count = 0; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } *fwd_tsn_seen = 1; if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* We are not interested anymore */ #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(inp); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_31); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif *offset = length; return (NULL); } /* * For sending a SACK this looks like DATA * chunks. */ stcb->asoc.last_data_chunk_from = stcb->asoc.last_control_chunk_from; sctp_handle_forward_tsn(stcb, (struct sctp_forward_tsn_chunk *)ch, &abort_flag, m, *offset); if (abort_flag) { *offset = length; return (NULL); } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; } } break; case SCTP_STREAM_RESET: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_STREAM_RESET\n"); if (((stcb == NULL) || (ch == NULL) || (chk_length < sizeof(struct sctp_stream_reset_tsn_req)))) { /* Its not ours */ if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } *offset = length; return (NULL); } if (stcb->asoc.reconfig_supported == 0) { goto unknown_chunk; } if (sctp_handle_stream_reset(stcb, m, *offset, ch)) { /* stop processing */ *offset = length; return (NULL); } break; case SCTP_PACKET_DROPPED: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_PACKET_DROPPED\n"); /* re-get it all please */ if (chk_length < sizeof(struct sctp_pktdrop_chunk)) { /* Its not ours */ if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } *offset = length; return (NULL); } if (ch && (stcb) && netp && (*netp)) { if (stcb->asoc.pktdrop_supported == 0) { goto unknown_chunk; } sctp_handle_packet_dropped((struct sctp_pktdrop_chunk *)ch, stcb, *netp, min(chk_length, (sizeof(chunk_buf) - 4))); } break; case SCTP_AUTHENTICATION: SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_AUTHENTICATION\n"); if (stcb == NULL) { /* save the first AUTH for later processing */ if (auth_skipped == 0) { auth_offset = *offset; auth_len = chk_length; auth_skipped = 1; } /* skip this chunk (temporarily) */ goto next_chunk; } if (stcb->asoc.auth_supported == 0) { goto unknown_chunk; } if ((chk_length < (sizeof(struct sctp_auth_chunk))) || (chk_length > (sizeof(struct sctp_auth_chunk) + SCTP_AUTH_DIGEST_LEN_MAX))) { /* Its not ours */ if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } *offset = length; return (NULL); } if (got_auth == 1) { /* skip this chunk... it's already auth'd */ goto next_chunk; } got_auth = 1; if ((ch == NULL) || sctp_handle_auth(stcb, (struct sctp_auth_chunk *)ch, m, *offset)) { /* auth HMAC failed so dump the packet */ *offset = length; return (stcb); } else { /* remaining chunks are HMAC checked */ stcb->asoc.authenticated = 1; } break; default: unknown_chunk: /* it's an unknown chunk! */ if ((ch->chunk_type & 0x40) && (stcb != NULL)) { struct sctp_gen_error_cause *cause; int len; op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_gen_error_cause), 0, M_NOWAIT, 1, MT_DATA); if (op_err != NULL) { len = min(SCTP_SIZE32(chk_length), (uint32_t)(length - *offset)); cause = mtod(op_err, struct sctp_gen_error_cause *); cause->code = htons(SCTP_CAUSE_UNRECOG_CHUNK); cause->length = htons((uint16_t)(len + sizeof(struct sctp_gen_error_cause))); SCTP_BUF_LEN(op_err) = sizeof(struct sctp_gen_error_cause); SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(m, *offset, len, M_NOWAIT); if (SCTP_BUF_NEXT(op_err) != NULL) { #ifdef SCTP_MBUF_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(SCTP_BUF_NEXT(op_err), SCTP_MBUF_ICOPY); } #endif sctp_queue_op_err(stcb, op_err); } else { sctp_m_freem(op_err); } } } if ((ch->chunk_type & 0x80) == 0) { /* discard this packet */ *offset = length; return (stcb); } /* else skip this bad chunk and continue... */ break; } /* switch (ch->chunk_type) */ next_chunk: /* get the next chunk */ *offset += SCTP_SIZE32(chk_length); if (*offset >= length) { /* no more data left in the mbuf chain */ break; } ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset, sizeof(struct sctp_chunkhdr), chunk_buf); if (ch == NULL) { if (locked_tcb) { SCTP_TCB_UNLOCK(locked_tcb); } *offset = length; return (NULL); } } /* while */ if (asconf_cnt > 0 && stcb != NULL) { sctp_send_asconf_ack(stcb); } return (stcb); } /* * common input chunk processing (v4 and v6) */ void sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int length, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_chunkhdr *ch, #if !defined(SCTP_WITH_NO_CSUM) uint8_t compute_crc, #endif uint8_t ecn_bits, uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum, uint32_t vrf_id, uint16_t port) { uint32_t high_tsn; int fwd_tsn_seen = 0, data_processed = 0; struct mbuf *m = *mm, *op_err; char msg[SCTP_DIAG_INFO_LEN]; int un_sent; int cnt_ctrl_ready = 0; struct sctp_inpcb *inp = NULL, *inp_decr = NULL; struct sctp_tcb *stcb = NULL; struct sctp_nets *net = NULL; SCTP_STAT_INCR(sctps_recvdatagrams); #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xE0, 1); sctp_auditing(0, inp, stcb, net); #endif #if !defined(SCTP_WITH_NO_CSUM) if (compute_crc != 0) { uint32_t check, calc_check; check = sh->checksum; sh->checksum = 0; calc_check = sctp_calculate_cksum(m, iphlen); sh->checksum = check; if (calc_check != check) { SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x m:%p mlen:%d iphlen:%d\n", calc_check, check, (void *)m, length, iphlen); stcb = sctp_findassociation_addr(m, offset, src, dst, sh, ch, &inp, &net, vrf_id); #if defined(INET) || defined(INET6) if ((ch->chunk_type != SCTP_INITIATION) && (net != NULL) && (net->port != port)) { if (net->port == 0) { /* UDP encapsulation turned on. */ net->mtu -= sizeof(struct udphdr); if (stcb->asoc.smallest_mtu > net->mtu) { sctp_pathmtu_adjustment(stcb, net->mtu); } } else if (port == 0) { /* UDP encapsulation turned off. */ net->mtu += sizeof(struct udphdr); /* XXX Update smallest_mtu */ } net->port = port; } #endif if (net != NULL) { net->flowtype = mflowtype; net->flowid = mflowid; } if ((inp != NULL) && (stcb != NULL)) { sctp_send_packet_dropped(stcb, net, m, length, iphlen, 1); sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED); } else if ((inp != NULL) && (stcb == NULL)) { inp_decr = inp; } SCTP_STAT_INCR(sctps_badsum); SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors); goto out; } } #endif /* Destination port of 0 is illegal, based on RFC4960. */ if (sh->dest_port == 0) { SCTP_STAT_INCR(sctps_hdrops); goto out; } stcb = sctp_findassociation_addr(m, offset, src, dst, sh, ch, &inp, &net, vrf_id); #if defined(INET) || defined(INET6) if ((ch->chunk_type != SCTP_INITIATION) && (net != NULL) && (net->port != port)) { if (net->port == 0) { /* UDP encapsulation turned on. */ net->mtu -= sizeof(struct udphdr); if (stcb->asoc.smallest_mtu > net->mtu) { sctp_pathmtu_adjustment(stcb, net->mtu); } } else if (port == 0) { /* UDP encapsulation turned off. */ net->mtu += sizeof(struct udphdr); /* XXX Update smallest_mtu */ } net->port = port; } #endif if (net != NULL) { net->flowtype = mflowtype; net->flowid = mflowid; } if (inp == NULL) { SCTP_STAT_INCR(sctps_noport); if (badport_bandlim(BANDLIM_SCTP_OOTB) < 0) { goto out; } if (ch->chunk_type == SCTP_SHUTDOWN_ACK) { sctp_send_shutdown_complete2(src, dst, sh, mflowtype, mflowid, fibnum, vrf_id, port); goto out; } if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) { goto out; } if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) { if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) || ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) && (ch->chunk_type != SCTP_INIT))) { op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), "Out of the blue"); sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err, mflowtype, mflowid, fibnum, vrf_id, port); } } goto out; } else if (stcb == NULL) { inp_decr = inp; } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /*- * I very much doubt any of the IPSEC stuff will work but I have no * idea, so I will leave it in place. */ if (inp != NULL) { switch (dst->sa_family) { #ifdef INET case AF_INET: - if (ipsec4_in_reject(m, &inp->ip_inp.inp)) { - SCTP_STAT_INCR(sctps_hdrops); - goto out; + if (IPSEC_ENABLED(ipv4)) { + if (IPSEC_CHECK_POLICY(ipv4, m, + &inp->ip_inp.inp) != 0) { + SCTP_STAT_INCR(sctps_hdrops); + goto out; + } } break; #endif #ifdef INET6 case AF_INET6: - if (ipsec6_in_reject(m, &inp->ip_inp.inp)) { - SCTP_STAT_INCR(sctps_hdrops); - goto out; + if (IPSEC_ENABLED(ipv6)) { + if (IPSEC_CHECK_POLICY(ipv6, m, + &inp->ip_inp.inp) != 0) { + SCTP_STAT_INCR(sctps_hdrops); + goto out; + } } break; #endif default: break; } } -#endif +#endif /* IPSEC */ SCTPDBG(SCTP_DEBUG_INPUT1, "Ok, Common input processing called, m:%p iphlen:%d offset:%d length:%d stcb:%p\n", (void *)m, iphlen, offset, length, (void *)stcb); if (stcb) { /* always clear this before beginning a packet */ stcb->asoc.authenticated = 0; stcb->asoc.seen_a_sack_this_pkt = 0; SCTPDBG(SCTP_DEBUG_INPUT1, "stcb:%p state:%x\n", (void *)stcb, stcb->asoc.state); if ((stcb->asoc.state & SCTP_STATE_WAS_ABORTED) || (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) { /*- * If we hit here, we had a ref count * up when the assoc was aborted and the * timer is clearing out the assoc, we should * NOT respond to any packet.. its OOTB. */ SCTP_TCB_UNLOCK(stcb); stcb = NULL; snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__); op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), msg); sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err, mflowtype, mflowid, inp->fibnum, vrf_id, port); goto out; } } if (IS_SCTP_CONTROL(ch)) { /* process the control portion of the SCTP packet */ /* sa_ignore NO_NULL_CHK */ stcb = sctp_process_control(m, iphlen, &offset, length, src, dst, sh, ch, inp, stcb, &net, &fwd_tsn_seen, mflowtype, mflowid, fibnum, vrf_id, port); if (stcb) { /* * This covers us if the cookie-echo was there and * it changes our INP. */ inp = stcb->sctp_ep; #if defined(INET) || defined(INET6) if ((ch->chunk_type != SCTP_INITIATION) && (net != NULL) && (net->port != port)) { if (net->port == 0) { /* UDP encapsulation turned on. */ net->mtu -= sizeof(struct udphdr); if (stcb->asoc.smallest_mtu > net->mtu) { sctp_pathmtu_adjustment(stcb, net->mtu); } } else if (port == 0) { /* UDP encapsulation turned off. */ net->mtu += sizeof(struct udphdr); /* XXX Update smallest_mtu */ } net->port = port; } #endif } } else { /* * no control chunks, so pre-process DATA chunks (these * checks are taken care of by control processing) */ /* * if DATA only packet, and auth is required, then punt... * can't have authenticated without any AUTH (control) * chunks */ if ((stcb != NULL) && (stcb->asoc.auth_supported == 1) && sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks)) { /* "silently" ignore */ SCTP_STAT_INCR(sctps_recvauthmissing); goto out; } if (stcb == NULL) { /* out of the blue DATA chunk */ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__); op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), msg); sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err, mflowtype, mflowid, fibnum, vrf_id, port); goto out; } if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) { /* v_tag mismatch! */ SCTP_STAT_INCR(sctps_badvtag); goto out; } } if (stcb == NULL) { /* * no valid TCB for this packet, or we found it's a bad * packet while processing control, or we're done with this * packet (done or skip rest of data), so we drop it... */ goto out; } /* * DATA chunk processing */ /* plow through the data chunks while length > offset */ /* * Rest should be DATA only. Check authentication state if AUTH for * DATA is required. */ if ((length > offset) && (stcb != NULL) && (stcb->asoc.auth_supported == 1) && sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks) && !stcb->asoc.authenticated) { /* "silently" ignore */ SCTP_STAT_INCR(sctps_recvauthmissing); SCTPDBG(SCTP_DEBUG_AUTH1, "Data chunk requires AUTH, skipped\n"); goto trigger_send; } if (length > offset) { int retval; /* * First check to make sure our state is correct. We would * not get here unless we really did have a tag, so we don't * abort if this happens, just dump the chunk silently. */ switch (SCTP_GET_STATE(&stcb->asoc)) { case SCTP_STATE_COOKIE_ECHOED: /* * we consider data with valid tags in this state * shows us the cookie-ack was lost. Imply it was * there. */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) { sctp_misc_ints(SCTP_THRESHOLD_CLEAR, stcb->asoc.overall_error_count, 0, SCTP_FROM_SCTP_INPUT, __LINE__); } stcb->asoc.overall_error_count = 0; sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb, net); break; case SCTP_STATE_COOKIE_WAIT: /* * We consider OOTB any data sent during asoc setup. */ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__); op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), msg); sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err, mflowtype, mflowid, inp->fibnum, vrf_id, port); goto out; /* sa_ignore NOTREACHED */ break; case SCTP_STATE_EMPTY: /* should not happen */ case SCTP_STATE_INUSE: /* should not happen */ case SCTP_STATE_SHUTDOWN_RECEIVED: /* This is a peer error */ case SCTP_STATE_SHUTDOWN_ACK_SENT: default: goto out; /* sa_ignore NOTREACHED */ break; case SCTP_STATE_OPEN: case SCTP_STATE_SHUTDOWN_SENT: break; } /* plow through the data chunks while length > offset */ retval = sctp_process_data(mm, iphlen, &offset, length, inp, stcb, net, &high_tsn); if (retval == 2) { /* * The association aborted, NO UNLOCK needed since * the association is destroyed. */ stcb = NULL; goto out; } data_processed = 1; /* * Anything important needs to have been m_copy'ed in * process_data */ } /* take care of ecn */ if ((data_processed == 1) && (stcb->asoc.ecn_supported == 1) && ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS)) { /* Yep, we need to add a ECNE */ sctp_send_ecn_echo(stcb, net, high_tsn); } if ((data_processed == 0) && (fwd_tsn_seen)) { int was_a_gap; uint32_t highest_tsn; if (SCTP_TSN_GT(stcb->asoc.highest_tsn_inside_nr_map, stcb->asoc.highest_tsn_inside_map)) { highest_tsn = stcb->asoc.highest_tsn_inside_nr_map; } else { highest_tsn = stcb->asoc.highest_tsn_inside_map; } was_a_gap = SCTP_TSN_GT(highest_tsn, stcb->asoc.cumulative_tsn); stcb->asoc.send_sack = 1; sctp_sack_check(stcb, was_a_gap); } else if (fwd_tsn_seen) { stcb->asoc.send_sack = 1; } /* trigger send of any chunks in queue... */ trigger_send: #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xE0, 2); sctp_auditing(1, inp, stcb, net); #endif SCTPDBG(SCTP_DEBUG_INPUT1, "Check for chunk output prw:%d tqe:%d tf=%d\n", stcb->asoc.peers_rwnd, TAILQ_EMPTY(&stcb->asoc.control_send_queue), stcb->asoc.total_flight); un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight); if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue)) { cnt_ctrl_ready = stcb->asoc.ctrl_queue_cnt - stcb->asoc.ecn_echo_cnt_onq; } if (!TAILQ_EMPTY(&stcb->asoc.asconf_send_queue) || cnt_ctrl_ready || stcb->asoc.trigger_reset || ((un_sent) && (stcb->asoc.peers_rwnd > 0 || (stcb->asoc.peers_rwnd <= 0 && stcb->asoc.total_flight == 0)))) { SCTPDBG(SCTP_DEBUG_INPUT3, "Calling chunk OUTPUT\n"); sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED); SCTPDBG(SCTP_DEBUG_INPUT3, "chunk OUTPUT returns\n"); } #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xE0, 3); sctp_auditing(2, inp, stcb, net); #endif out: if (stcb != NULL) { SCTP_TCB_UNLOCK(stcb); } if (inp_decr != NULL) { /* reduce ref-count */ SCTP_INP_WLOCK(inp_decr); SCTP_INP_DECR_REF(inp_decr); SCTP_INP_WUNLOCK(inp_decr); } return; } #ifdef INET void sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port) { struct mbuf *m; int iphlen; uint32_t vrf_id = 0; uint8_t ecn_bits; struct sockaddr_in src, dst; struct ip *ip; struct sctphdr *sh; struct sctp_chunkhdr *ch; int length, offset; #if !defined(SCTP_WITH_NO_CSUM) uint8_t compute_crc; #endif uint32_t mflowid; uint8_t mflowtype; uint16_t fibnum; iphlen = off; if (SCTP_GET_PKT_VRFID(i_pak, vrf_id)) { SCTP_RELEASE_PKT(i_pak); return; } m = SCTP_HEADER_TO_CHAIN(i_pak); #ifdef SCTP_MBUF_LOGGING /* Log in any input mbufs */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(m, SCTP_MBUF_INPUT); } #endif #ifdef SCTP_PACKET_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) { sctp_packet_log(m); } #endif SCTPDBG(SCTP_DEBUG_CRCOFFLOAD, "sctp_input(): Packet of length %d received on %s with csum_flags 0x%b.\n", m->m_pkthdr.len, if_name(m->m_pkthdr.rcvif), (int)m->m_pkthdr.csum_flags, CSUM_BITS); mflowid = m->m_pkthdr.flowid; mflowtype = M_HASHTYPE_GET(m); fibnum = M_GETFIB(m); SCTP_STAT_INCR(sctps_recvpackets); SCTP_STAT_INCR_COUNTER64(sctps_inpackets); /* Get IP, SCTP, and first chunk header together in the first mbuf. */ offset = iphlen + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr); if (SCTP_BUF_LEN(m) < offset) { if ((m = m_pullup(m, offset)) == NULL) { SCTP_STAT_INCR(sctps_hdrops); return; } } ip = mtod(m, struct ip *); sh = (struct sctphdr *)((caddr_t)ip + iphlen); ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr)); offset -= sizeof(struct sctp_chunkhdr); memset(&src, 0, sizeof(struct sockaddr_in)); src.sin_family = AF_INET; src.sin_len = sizeof(struct sockaddr_in); src.sin_port = sh->src_port; src.sin_addr = ip->ip_src; memset(&dst, 0, sizeof(struct sockaddr_in)); dst.sin_family = AF_INET; dst.sin_len = sizeof(struct sockaddr_in); dst.sin_port = sh->dest_port; dst.sin_addr = ip->ip_dst; length = ntohs(ip->ip_len); /* Validate mbuf chain length with IP payload length. */ if (SCTP_HEADER_LEN(m) != length) { SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_input() length:%d reported length:%d\n", length, SCTP_HEADER_LEN(m)); SCTP_STAT_INCR(sctps_hdrops); goto out; } /* SCTP does not allow broadcasts or multicasts */ if (IN_MULTICAST(ntohl(dst.sin_addr.s_addr))) { goto out; } if (SCTP_IS_IT_BROADCAST(dst.sin_addr, m)) { goto out; } ecn_bits = ip->ip_tos; #if defined(SCTP_WITH_NO_CSUM) SCTP_STAT_INCR(sctps_recvnocrc); #else if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) { SCTP_STAT_INCR(sctps_recvhwcrc); compute_crc = 0; } else { SCTP_STAT_INCR(sctps_recvswcrc); compute_crc = 1; } #endif sctp_common_input_processing(&m, iphlen, offset, length, (struct sockaddr *)&src, (struct sockaddr *)&dst, sh, ch, #if !defined(SCTP_WITH_NO_CSUM) compute_crc, #endif ecn_bits, mflowtype, mflowid, fibnum, vrf_id, port); out: if (m) { sctp_m_freem(m); } return; } #if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP) extern int *sctp_cpuarry; #endif int sctp_input(struct mbuf **mp, int *offp, int proto SCTP_UNUSED) { struct mbuf *m; int off; m = *mp; off = *offp; #if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP) if (mp_ncpus > 1) { struct ip *ip; struct sctphdr *sh; int offset; int cpu_to_use; uint32_t flowid, tag; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { flowid = m->m_pkthdr.flowid; } else { /* * No flow id built by lower layers fix it so we * create one. */ offset = off + sizeof(struct sctphdr); if (SCTP_BUF_LEN(m) < offset) { if ((m = m_pullup(m, offset)) == NULL) { SCTP_STAT_INCR(sctps_hdrops); return (IPPROTO_DONE); } } ip = mtod(m, struct ip *); sh = (struct sctphdr *)((caddr_t)ip + off); tag = htonl(sh->v_tag); flowid = tag ^ ntohs(sh->dest_port) ^ ntohs(sh->src_port); m->m_pkthdr.flowid = flowid; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE_HASH); } cpu_to_use = sctp_cpuarry[flowid % mp_ncpus]; sctp_queue_to_mcore(m, off, cpu_to_use); return (IPPROTO_DONE); } #endif sctp_input_with_port(m, off, 0); return (IPPROTO_DONE); } #endif diff --git a/sys/netinet/sctp_os_bsd.h b/sys/netinet/sctp_os_bsd.h index b0c92da649f6..0eb5359e3e82 100644 --- a/sys/netinet/sctp_os_bsd.h +++ b/sys/netinet/sctp_os_bsd.h @@ -1,496 +1,490 @@ /*- * Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifndef _NETINET_SCTP_OS_BSD_H_ #define _NETINET_SCTP_OS_BSD_H_ /* * includes */ #include "opt_ipsec.h" #include "opt_compat.h" #include "opt_inet6.h" #include "opt_inet.h" #include "opt_sctp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ +#include #ifdef INET6 #include -#ifdef IPSEC -#include -#endif #include #include #include #include #include #include #endif /* INET6 */ #include #include #include #ifndef in6pcb #define in6pcb inpcb #endif /* Declare all the malloc names for all the various mallocs */ MALLOC_DECLARE(SCTP_M_MAP); MALLOC_DECLARE(SCTP_M_STRMI); MALLOC_DECLARE(SCTP_M_STRMO); MALLOC_DECLARE(SCTP_M_ASC_ADDR); MALLOC_DECLARE(SCTP_M_ASC_IT); MALLOC_DECLARE(SCTP_M_AUTH_CL); MALLOC_DECLARE(SCTP_M_AUTH_KY); MALLOC_DECLARE(SCTP_M_AUTH_HL); MALLOC_DECLARE(SCTP_M_AUTH_IF); MALLOC_DECLARE(SCTP_M_STRESET); MALLOC_DECLARE(SCTP_M_CMSG); MALLOC_DECLARE(SCTP_M_COPYAL); MALLOC_DECLARE(SCTP_M_VRF); MALLOC_DECLARE(SCTP_M_IFA); MALLOC_DECLARE(SCTP_M_IFN); MALLOC_DECLARE(SCTP_M_TIMW); MALLOC_DECLARE(SCTP_M_MVRF); MALLOC_DECLARE(SCTP_M_ITER); MALLOC_DECLARE(SCTP_M_SOCKOPT); MALLOC_DECLARE(SCTP_M_MCORE); #if defined(SCTP_LOCAL_TRACE_BUF) #define SCTP_GET_CYCLECOUNT get_cyclecount() #define SCTP_CTR6 sctp_log_trace #else #define SCTP_CTR6 CTR6 #endif /* * Macros to expand out globals defined by various modules * to either a real global or a virtualized instance of one, * depending on whether VIMAGE is defined. */ /* then define the macro(s) that hook into the vimage macros */ #define MODULE_GLOBAL(__SYMBOL) V_##__SYMBOL #define V_system_base_info VNET(system_base_info) #define SCTP_BASE_INFO(__m) V_system_base_info.sctppcbinfo.__m #define SCTP_BASE_STATS V_system_base_info.sctpstat #define SCTP_BASE_STAT(__m) V_system_base_info.sctpstat.__m #define SCTP_BASE_SYSCTL(__m) V_system_base_info.sctpsysctl.__m #define SCTP_BASE_VAR(__m) V_system_base_info.__m #define SCTP_PRINTF(params...) printf(params) #if defined(SCTP_DEBUG) #define SCTPDBG(level, params...) \ { \ do { \ if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \ SCTP_PRINTF(params); \ } \ } while (0); \ } #define SCTPDBG_ADDR(level, addr) \ { \ do { \ if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \ sctp_print_address(addr); \ } \ } while (0); \ } #else #define SCTPDBG(level, params...) #define SCTPDBG_ADDR(level, addr) #endif #ifdef SCTP_LTRACE_CHUNKS #define SCTP_LTRACE_CHK(a, b, c, d) if(SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_CHUNK_ENABLE) SCTP_CTR6(KTR_SUBSYS, "SCTP:%d[%d]:%x-%x-%x-%x", SCTP_LOG_CHUNK_PROC, 0, a, b, c, d) #else #define SCTP_LTRACE_CHK(a, b, c, d) #endif #ifdef SCTP_LTRACE_ERRORS #define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) \ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \ SCTP_PRINTF("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \ m, inp, stcb, net, file, __LINE__, err); #define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) \ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \ SCTP_PRINTF("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \ inp, stcb, net, file, __LINE__, err); #else #define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) #define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) #endif /* * Local address and interface list handling */ #define SCTP_MAX_VRF_ID 0 #define SCTP_SIZE_OF_VRF_HASH 3 #define SCTP_IFNAMSIZ IFNAMSIZ #define SCTP_DEFAULT_VRFID 0 #define SCTP_VRF_ADDR_HASH_SIZE 16 #define SCTP_VRF_IFN_HASH_SIZE 3 #define SCTP_INIT_VRF_TABLEID(vrf) #define SCTP_IFN_IS_IFT_LOOP(ifn) ((ifn)->ifn_type == IFT_LOOP) #define SCTP_ROUTE_IS_REAL_LOOP(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifa && (ro)->ro_rt->rt_ifa->ifa_ifp && (ro)->ro_rt->rt_ifa->ifa_ifp->if_type == IFT_LOOP) /* * Access to IFN's to help with src-addr-selection */ /* This could return VOID if the index works but for BSD we provide both. */ #define SCTP_GET_IFN_VOID_FROM_ROUTE(ro) (void *)ro->ro_rt->rt_ifp #define SCTP_GET_IF_INDEX_FROM_ROUTE(ro) (ro)->ro_rt->rt_ifp->if_index #define SCTP_ROUTE_HAS_VALID_IFN(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifp) /* * general memory allocation */ #define SCTP_MALLOC(var, type, size, name) \ do { \ var = (type)malloc(size, name, M_NOWAIT); \ } while (0) #define SCTP_FREE(var, type) free(var, type) #define SCTP_MALLOC_SONAME(var, type, size) \ do { \ var = (type)malloc(size, M_SONAME, M_WAITOK | M_ZERO); \ } while (0) #define SCTP_FREE_SONAME(var) free(var, M_SONAME) #define SCTP_PROCESS_STRUCT struct proc * /* * zone allocation functions */ #include /* SCTP_ZONE_INIT: initialize the zone */ typedef struct uma_zone *sctp_zone_t; #define SCTP_ZONE_INIT(zone, name, size, number) { \ zone = uma_zcreate(name, size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,\ 0); \ uma_zone_set_max(zone, number); \ } #define SCTP_ZONE_DESTROY(zone) uma_zdestroy(zone) /* SCTP_ZONE_GET: allocate element from the zone */ #define SCTP_ZONE_GET(zone, type) \ (type *)uma_zalloc(zone, M_NOWAIT); /* SCTP_ZONE_FREE: free element from the zone */ #define SCTP_ZONE_FREE(zone, element) \ uma_zfree(zone, element); #define SCTP_HASH_INIT(size, hashmark) hashinit_flags(size, M_PCB, hashmark, HASH_NOWAIT) #define SCTP_HASH_FREE(table, hashmark) hashdestroy(table, M_PCB, hashmark) #define SCTP_M_COPYM m_copym /* * timers */ #include typedef struct callout sctp_os_timer_t; #define SCTP_OS_TIMER_INIT(tmr) callout_init(tmr, 1) #define SCTP_OS_TIMER_START callout_reset #define SCTP_OS_TIMER_STOP callout_stop #define SCTP_OS_TIMER_STOP_DRAIN callout_drain #define SCTP_OS_TIMER_PENDING callout_pending #define SCTP_OS_TIMER_ACTIVE callout_active #define SCTP_OS_TIMER_DEACTIVATE callout_deactivate #define sctp_get_tick_count() (ticks) #define SCTP_UNUSED __attribute__((unused)) /* * Functions */ /* Mbuf manipulation and access macros */ #define SCTP_BUF_LEN(m) (m->m_len) #define SCTP_BUF_NEXT(m) (m->m_next) #define SCTP_BUF_NEXT_PKT(m) (m->m_nextpkt) #define SCTP_BUF_RESV_UF(m, size) m->m_data += size #define SCTP_BUF_AT(m, size) m->m_data + size #define SCTP_BUF_IS_EXTENDED(m) (m->m_flags & M_EXT) #define SCTP_BUF_SIZE M_SIZE #define SCTP_BUF_TYPE(m) (m->m_type) #define SCTP_BUF_RECVIF(m) (m->m_pkthdr.rcvif) #define SCTP_BUF_PREPEND M_PREPEND #define SCTP_ALIGN_TO_END(m, len) M_ALIGN(m, len) /* We make it so if you have up to 4 threads * writing based on the default size of * the packet log 65 k, that would be * 4 16k packets before we would hit * a problem. */ #define SCTP_PKTLOG_WRITERS_NEED_LOCK 3 /*************************/ /* MTU */ /*************************/ #define SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, af) ((struct ifnet *)ifn)->if_mtu #define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((uint32_t)((rt != NULL) ? rt->rt_mtu : 0)) #define SCTP_GATHER_MTU_FROM_INTFC(sctp_ifn) ((sctp_ifn->ifn_p != NULL) ? ((struct ifnet *)(sctp_ifn->ifn_p))->if_mtu : 0) #define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) do { \ if (rt != NULL) \ rt->rt_mtu = mtu; \ } while(0) /* (de-)register interface event notifications */ #define SCTP_REGISTER_INTERFACE(ifhandle, af) #define SCTP_DEREGISTER_INTERFACE(ifhandle, af) /*************************/ /* These are for logging */ /*************************/ /* return the base ext data pointer */ #define SCTP_BUF_EXTEND_BASE(m) (m->m_ext.ext_buf) /* return the refcnt of the data pointer */ #define SCTP_BUF_EXTEND_REFCNT(m) (*m->m_ext.ext_cnt) /* return any buffer related flags, this is * used beyond logging for apple only. */ #define SCTP_BUF_GET_FLAGS(m) (m->m_flags) /* For BSD this just accesses the M_PKTHDR length * so it operates on an mbuf with hdr flag. Other * O/S's may have separate packet header and mbuf * chain pointers.. thus the macro. */ #define SCTP_HEADER_TO_CHAIN(m) (m) #define SCTP_DETACH_HEADER_FROM_CHAIN(m) #define SCTP_HEADER_LEN(m) ((m)->m_pkthdr.len) #define SCTP_GET_HEADER_FOR_OUTPUT(o_pak) 0 #define SCTP_RELEASE_HEADER(m) #define SCTP_RELEASE_PKT(m) sctp_m_freem(m) #define SCTP_ENABLE_UDP_CSUM(m) do { \ m->m_pkthdr.csum_flags = CSUM_UDP; \ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); \ } while (0) #define SCTP_GET_PKT_VRFID(m, vrf_id) ((vrf_id = SCTP_DEFAULT_VRFID) != SCTP_DEFAULT_VRFID) /* Attach the chain of data into the sendable packet. */ #define SCTP_ATTACH_CHAIN(pak, m, packet_length) do { \ pak = m; \ pak->m_pkthdr.len = packet_length; \ } while(0) /* Other m_pkthdr type things */ #define SCTP_IS_IT_BROADCAST(dst, m) ((m->m_flags & M_PKTHDR) ? in_broadcast(dst, m->m_pkthdr.rcvif) : 0) #define SCTP_IS_IT_LOOPBACK(m) ((m->m_flags & M_PKTHDR) && ((m->m_pkthdr.rcvif == NULL) || (m->m_pkthdr.rcvif->if_type == IFT_LOOP))) /* This converts any input packet header * into the chain of data holders, for BSD * its a NOP. */ /* get the v6 hop limit */ #define SCTP_GET_HLIM(inp, ro) in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL))); /* is the endpoint v6only? */ #define SCTP_IPV6_V6ONLY(inp) (((struct inpcb *)inp)->inp_flags & IN6P_IPV6_V6ONLY) /* is the socket non-blocking? */ #define SCTP_SO_IS_NBIO(so) ((so)->so_state & SS_NBIO) #define SCTP_SET_SO_NBIO(so) ((so)->so_state |= SS_NBIO) #define SCTP_CLEAR_SO_NBIO(so) ((so)->so_state &= ~SS_NBIO) /* get the socket type */ #define SCTP_SO_TYPE(so) ((so)->so_type) /* Use a macro for renaming sb_cc to sb_acc. * Initially sb_ccc was used, but this broke select() when used * with SCTP sockets. */ #define sb_cc sb_acc /* reserve sb space for a socket */ #define SCTP_SORESERVE(so, send, recv) soreserve(so, send, recv) /* wakeup a socket */ #define SCTP_SOWAKEUP(so) wakeup(&(so)->so_timeo) /* clear the socket buffer state */ #define SCTP_SB_CLEAR(sb) \ (sb).sb_cc = 0; \ (sb).sb_mb = NULL; \ (sb).sb_mbcnt = 0; #define SCTP_SB_LIMIT_RCV(so) so->so_rcv.sb_hiwat #define SCTP_SB_LIMIT_SND(so) so->so_snd.sb_hiwat /* * routes, output, etc. */ typedef struct route sctp_route_t; typedef struct rtentry sctp_rtentry_t; #define SCTP_RTALLOC(ro, vrf_id, fibnum) \ rtalloc_ign_fib((struct route *)ro, 0UL, fibnum) /* Future zero copy wakeup/send function */ #define SCTP_ZERO_COPY_EVENT(inp, so) /* This is re-pulse ourselves for sendbuf */ #define SCTP_ZERO_COPY_SENDQ_EVENT(inp, so) /* * SCTP protocol specific mbuf flags. */ #define M_NOTIFICATION M_PROTO1 /* SCTP notification */ /* * IP output routines */ #define SCTP_IP_OUTPUT(result, o_pak, ro, stcb, vrf_id) \ { \ int o_flgs = IP_RAWOUTPUT; \ struct sctp_tcb *local_stcb = stcb; \ if (local_stcb && \ local_stcb->sctp_ep && \ local_stcb->sctp_ep->sctp_socket) \ o_flgs |= local_stcb->sctp_ep->sctp_socket->so_options & SO_DONTROUTE; \ m_clrprotoflags(o_pak); \ result = ip_output(o_pak, NULL, ro, o_flgs, 0, NULL); \ } #define SCTP_IP6_OUTPUT(result, o_pak, ro, ifp, stcb, vrf_id) \ { \ struct sctp_tcb *local_stcb = stcb; \ m_clrprotoflags(o_pak); \ if (local_stcb && local_stcb->sctp_ep) \ result = ip6_output(o_pak, \ ((struct in6pcb *)(local_stcb->sctp_ep))->in6p_outputopts, \ (ro), 0, 0, ifp, NULL); \ else \ result = ip6_output(o_pak, NULL, (ro), 0, 0, ifp, NULL); \ } struct mbuf * sctp_get_mbuf_for_msg(unsigned int space_needed, int want_header, int how, int allonebuf, int type); /* * SCTP AUTH */ #define SCTP_READ_RANDOM(buf, len) read_random(buf, len) /* map standard crypto API names */ #define SCTP_SHA1_CTX SHA1_CTX #define SCTP_SHA1_INIT SHA1Init #define SCTP_SHA1_UPDATE SHA1Update #define SCTP_SHA1_FINAL(x,y) SHA1Final((caddr_t)x, y) #define SCTP_SHA256_CTX SHA256_CTX #define SCTP_SHA256_INIT SHA256_Init #define SCTP_SHA256_UPDATE SHA256_Update #define SCTP_SHA256_FINAL(x,y) SHA256_Final((caddr_t)x, y) #endif #define SCTP_DECREMENT_AND_CHECK_REFCOUNT(addr) (atomic_fetchadd_int(addr, -1) == 1) #if defined(INVARIANTS) #define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \ { \ int32_t oldval; \ oldval = atomic_fetchadd_int(addr, -val); \ if (oldval < val) { \ panic("Counter goes negative"); \ } \ } #else #define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \ { \ int32_t oldval; \ oldval = atomic_fetchadd_int(addr, -val); \ if (oldval < val) { \ *addr = 0; \ } \ } #endif diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c index ffc3a7e2def1..25a7c85d667e 100644 --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -1,7107 +1,7107 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #endif #ifdef INET6 #include #endif #include #include #include VNET_DEFINE(struct sctp_base_info, system_base_info); /* FIX: we don't handle multiple link local scopes */ /* "scopeless" replacement IN6_ARE_ADDR_EQUAL */ #ifdef INET6 int SCTP6_ARE_ADDR_EQUAL(struct sockaddr_in6 *a, struct sockaddr_in6 *b) { struct sockaddr_in6 tmp_a, tmp_b; memcpy(&tmp_a, a, sizeof(struct sockaddr_in6)); if (sa6_embedscope(&tmp_a, MODULE_GLOBAL(ip6_use_defzone)) != 0) { return (0); } memcpy(&tmp_b, b, sizeof(struct sockaddr_in6)); if (sa6_embedscope(&tmp_b, MODULE_GLOBAL(ip6_use_defzone)) != 0) { return (0); } return (IN6_ARE_ADDR_EQUAL(&tmp_a.sin6_addr, &tmp_b.sin6_addr)); } #endif void sctp_fill_pcbinfo(struct sctp_pcbinfo *spcb) { /* * We really don't need to lock this, but I will just because it * does not hurt. */ SCTP_INP_INFO_RLOCK(); spcb->ep_count = SCTP_BASE_INFO(ipi_count_ep); spcb->asoc_count = SCTP_BASE_INFO(ipi_count_asoc); spcb->laddr_count = SCTP_BASE_INFO(ipi_count_laddr); spcb->raddr_count = SCTP_BASE_INFO(ipi_count_raddr); spcb->chk_count = SCTP_BASE_INFO(ipi_count_chunk); spcb->readq_count = SCTP_BASE_INFO(ipi_count_readq); spcb->stream_oque = SCTP_BASE_INFO(ipi_count_strmoq); spcb->free_chunks = SCTP_BASE_INFO(ipi_free_chunks); SCTP_INP_INFO_RUNLOCK(); } /*- * Addresses are added to VRF's (Virtual Router's). For BSD we * have only the default VRF 0. We maintain a hash list of * VRF's. Each VRF has its own list of sctp_ifn's. Each of * these has a list of addresses. When we add a new address * to a VRF we lookup the ifn/ifn_index, if the ifn does * not exist we create it and add it to the list of IFN's * within the VRF. Once we have the sctp_ifn, we add the * address to the list. So we look something like: * * hash-vrf-table * vrf-> ifn-> ifn -> ifn * vrf | * ... +--ifa-> ifa -> ifa * vrf * * We keep these separate lists since the SCTP subsystem will * point to these from its source address selection nets structure. * When an address is deleted it does not happen right away on * the SCTP side, it gets scheduled. What we do when a * delete happens is immediately remove the address from * the master list and decrement the refcount. As our * addip iterator works through and frees the src address * selection pointing to the sctp_ifa, eventually the refcount * will reach 0 and we will delete it. Note that it is assumed * that any locking on system level ifn/ifa is done at the * caller of these functions and these routines will only * lock the SCTP structures as they add or delete things. * * Other notes on VRF concepts. * - An endpoint can be in multiple VRF's * - An association lives within a VRF and only one VRF. * - Any incoming packet we can deduce the VRF for by * looking at the mbuf/pak inbound (for BSD its VRF=0 :D) * - Any downward send call or connect call must supply the * VRF via ancillary data or via some sort of set default * VRF socket option call (again for BSD no brainer since * the VRF is always 0). * - An endpoint may add multiple VRF's to it. * - Listening sockets can accept associations in any * of the VRF's they are in but the assoc will end up * in only one VRF (gotten from the packet or connect/send). * */ struct sctp_vrf * sctp_allocate_vrf(int vrf_id) { struct sctp_vrf *vrf = NULL; struct sctp_vrflist *bucket; /* First allocate the VRF structure */ vrf = sctp_find_vrf(vrf_id); if (vrf) { /* Already allocated */ return (vrf); } SCTP_MALLOC(vrf, struct sctp_vrf *, sizeof(struct sctp_vrf), SCTP_M_VRF); if (vrf == NULL) { /* No memory */ #ifdef INVARIANTS panic("No memory for VRF:%d", vrf_id); #endif return (NULL); } /* setup the VRF */ memset(vrf, 0, sizeof(struct sctp_vrf)); vrf->vrf_id = vrf_id; LIST_INIT(&vrf->ifnlist); vrf->total_ifa_count = 0; vrf->refcount = 0; /* now also setup table ids */ SCTP_INIT_VRF_TABLEID(vrf); /* Init the HASH of addresses */ vrf->vrf_addr_hash = SCTP_HASH_INIT(SCTP_VRF_ADDR_HASH_SIZE, &vrf->vrf_addr_hashmark); if (vrf->vrf_addr_hash == NULL) { /* No memory */ #ifdef INVARIANTS panic("No memory for VRF:%d", vrf_id); #endif SCTP_FREE(vrf, SCTP_M_VRF); return (NULL); } /* Add it to the hash table */ bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(vrf_id & SCTP_BASE_INFO(hashvrfmark))]; LIST_INSERT_HEAD(bucket, vrf, next_vrf); atomic_add_int(&SCTP_BASE_INFO(ipi_count_vrfs), 1); return (vrf); } struct sctp_ifn * sctp_find_ifn(void *ifn, uint32_t ifn_index) { struct sctp_ifn *sctp_ifnp; struct sctp_ifnlist *hash_ifn_head; /* * We assume the lock is held for the addresses if that's wrong * problems could occur :-) */ hash_ifn_head = &SCTP_BASE_INFO(vrf_ifn_hash)[(ifn_index & SCTP_BASE_INFO(vrf_ifn_hashmark))]; LIST_FOREACH(sctp_ifnp, hash_ifn_head, next_bucket) { if (sctp_ifnp->ifn_index == ifn_index) { return (sctp_ifnp); } if (sctp_ifnp->ifn_p && ifn && (sctp_ifnp->ifn_p == ifn)) { return (sctp_ifnp); } } return (NULL); } struct sctp_vrf * sctp_find_vrf(uint32_t vrf_id) { struct sctp_vrflist *bucket; struct sctp_vrf *liste; bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(vrf_id & SCTP_BASE_INFO(hashvrfmark))]; LIST_FOREACH(liste, bucket, next_vrf) { if (vrf_id == liste->vrf_id) { return (liste); } } return (NULL); } void sctp_free_vrf(struct sctp_vrf *vrf) { if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&vrf->refcount)) { if (vrf->vrf_addr_hash) { SCTP_HASH_FREE(vrf->vrf_addr_hash, vrf->vrf_addr_hashmark); vrf->vrf_addr_hash = NULL; } /* We zero'd the count */ LIST_REMOVE(vrf, next_vrf); SCTP_FREE(vrf, SCTP_M_VRF); atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_vrfs), 1); } } void sctp_free_ifn(struct sctp_ifn *sctp_ifnp) { if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&sctp_ifnp->refcount)) { /* We zero'd the count */ if (sctp_ifnp->vrf) { sctp_free_vrf(sctp_ifnp->vrf); } SCTP_FREE(sctp_ifnp, SCTP_M_IFN); atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_ifns), 1); } } void sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu) { struct sctp_ifn *sctp_ifnp; sctp_ifnp = sctp_find_ifn((void *)NULL, ifn_index); if (sctp_ifnp != NULL) { sctp_ifnp->ifn_mtu = mtu; } } void sctp_free_ifa(struct sctp_ifa *sctp_ifap) { if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&sctp_ifap->refcount)) { /* We zero'd the count */ if (sctp_ifap->ifn_p) { sctp_free_ifn(sctp_ifap->ifn_p); } SCTP_FREE(sctp_ifap, SCTP_M_IFA); atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_ifas), 1); } } static void sctp_delete_ifn(struct sctp_ifn *sctp_ifnp, int hold_addr_lock) { struct sctp_ifn *found; found = sctp_find_ifn(sctp_ifnp->ifn_p, sctp_ifnp->ifn_index); if (found == NULL) { /* Not in the list.. sorry */ return; } if (hold_addr_lock == 0) SCTP_IPI_ADDR_WLOCK(); LIST_REMOVE(sctp_ifnp, next_bucket); LIST_REMOVE(sctp_ifnp, next_ifn); SCTP_DEREGISTER_INTERFACE(sctp_ifnp->ifn_index, sctp_ifnp->registered_af); if (hold_addr_lock == 0) SCTP_IPI_ADDR_WUNLOCK(); /* Take away the reference, and possibly free it */ sctp_free_ifn(sctp_ifnp); } void sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index) { struct sctp_vrf *vrf; struct sctp_ifa *sctp_ifap; SCTP_IPI_ADDR_RLOCK(); vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id); goto out; } sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED); if (sctp_ifap == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n"); goto out; } if (sctp_ifap->ifn_p == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unusable\n"); goto out; } if (if_name) { if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, SCTP_IFNAMSIZ) != 0) { SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n", sctp_ifap->ifn_p->ifn_name, if_name); goto out; } } else { if (sctp_ifap->ifn_p->ifn_index != ifn_index) { SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n", sctp_ifap->ifn_p->ifn_index, ifn_index); goto out; } } sctp_ifap->localifa_flags &= (~SCTP_ADDR_VALID); sctp_ifap->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE; out: SCTP_IPI_ADDR_RUNLOCK(); } void sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index) { struct sctp_vrf *vrf; struct sctp_ifa *sctp_ifap; SCTP_IPI_ADDR_RLOCK(); vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id); goto out; } sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED); if (sctp_ifap == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n"); goto out; } if (sctp_ifap->ifn_p == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unusable\n"); goto out; } if (if_name) { if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, SCTP_IFNAMSIZ) != 0) { SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n", sctp_ifap->ifn_p->ifn_name, if_name); goto out; } } else { if (sctp_ifap->ifn_p->ifn_index != ifn_index) { SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n", sctp_ifap->ifn_p->ifn_index, ifn_index); goto out; } } sctp_ifap->localifa_flags &= (~SCTP_ADDR_IFA_UNUSEABLE); sctp_ifap->localifa_flags |= SCTP_ADDR_VALID; out: SCTP_IPI_ADDR_RUNLOCK(); } /*- * Add an ifa to an ifn. * Register the interface as necessary. * NOTE: ADDR write lock MUST be held. */ static void sctp_add_ifa_to_ifn(struct sctp_ifn *sctp_ifnp, struct sctp_ifa *sctp_ifap) { int ifa_af; LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa); sctp_ifap->ifn_p = sctp_ifnp; atomic_add_int(&sctp_ifap->ifn_p->refcount, 1); /* update address counts */ sctp_ifnp->ifa_count++; ifa_af = sctp_ifap->address.sa.sa_family; switch (ifa_af) { #ifdef INET case AF_INET: sctp_ifnp->num_v4++; break; #endif #ifdef INET6 case AF_INET6: sctp_ifnp->num_v6++; break; #endif default: break; } if (sctp_ifnp->ifa_count == 1) { /* register the new interface */ SCTP_REGISTER_INTERFACE(sctp_ifnp->ifn_index, ifa_af); sctp_ifnp->registered_af = ifa_af; } } /*- * Remove an ifa from its ifn. * If no more addresses exist, remove the ifn too. Otherwise, re-register * the interface based on the remaining address families left. * NOTE: ADDR write lock MUST be held. */ static void sctp_remove_ifa_from_ifn(struct sctp_ifa *sctp_ifap) { LIST_REMOVE(sctp_ifap, next_ifa); if (sctp_ifap->ifn_p) { /* update address counts */ sctp_ifap->ifn_p->ifa_count--; switch (sctp_ifap->address.sa.sa_family) { #ifdef INET case AF_INET: sctp_ifap->ifn_p->num_v4--; break; #endif #ifdef INET6 case AF_INET6: sctp_ifap->ifn_p->num_v6--; break; #endif default: break; } if (LIST_EMPTY(&sctp_ifap->ifn_p->ifalist)) { /* remove the ifn, possibly freeing it */ sctp_delete_ifn(sctp_ifap->ifn_p, SCTP_ADDR_LOCKED); } else { /* re-register address family type, if needed */ if ((sctp_ifap->ifn_p->num_v6 == 0) && (sctp_ifap->ifn_p->registered_af == AF_INET6)) { SCTP_DEREGISTER_INTERFACE(sctp_ifap->ifn_p->ifn_index, AF_INET6); SCTP_REGISTER_INTERFACE(sctp_ifap->ifn_p->ifn_index, AF_INET); sctp_ifap->ifn_p->registered_af = AF_INET; } else if ((sctp_ifap->ifn_p->num_v4 == 0) && (sctp_ifap->ifn_p->registered_af == AF_INET)) { SCTP_DEREGISTER_INTERFACE(sctp_ifap->ifn_p->ifn_index, AF_INET); SCTP_REGISTER_INTERFACE(sctp_ifap->ifn_p->ifn_index, AF_INET6); sctp_ifap->ifn_p->registered_af = AF_INET6; } /* free the ifn refcount */ sctp_free_ifn(sctp_ifap->ifn_p); } sctp_ifap->ifn_p = NULL; } } struct sctp_ifa * sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index, uint32_t ifn_type, const char *if_name, void *ifa, struct sockaddr *addr, uint32_t ifa_flags, int dynamic_add) { struct sctp_vrf *vrf; struct sctp_ifn *sctp_ifnp = NULL; struct sctp_ifa *sctp_ifap = NULL; struct sctp_ifalist *hash_addr_head; struct sctp_ifnlist *hash_ifn_head; uint32_t hash_of_addr; int new_ifn_af = 0; #ifdef SCTP_DEBUG SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: adding address: ", vrf_id); SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr); #endif SCTP_IPI_ADDR_WLOCK(); sctp_ifnp = sctp_find_ifn(ifn, ifn_index); if (sctp_ifnp) { vrf = sctp_ifnp->vrf; } else { vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { vrf = sctp_allocate_vrf(vrf_id); if (vrf == NULL) { SCTP_IPI_ADDR_WUNLOCK(); return (NULL); } } } if (sctp_ifnp == NULL) { /* * build one and add it, can't hold lock until after malloc * done though. */ SCTP_IPI_ADDR_WUNLOCK(); SCTP_MALLOC(sctp_ifnp, struct sctp_ifn *, sizeof(struct sctp_ifn), SCTP_M_IFN); if (sctp_ifnp == NULL) { #ifdef INVARIANTS panic("No memory for IFN"); #endif return (NULL); } memset(sctp_ifnp, 0, sizeof(struct sctp_ifn)); sctp_ifnp->ifn_index = ifn_index; sctp_ifnp->ifn_p = ifn; sctp_ifnp->ifn_type = ifn_type; sctp_ifnp->refcount = 0; sctp_ifnp->vrf = vrf; atomic_add_int(&vrf->refcount, 1); sctp_ifnp->ifn_mtu = SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, addr->sa_family); if (if_name != NULL) { snprintf(sctp_ifnp->ifn_name, SCTP_IFNAMSIZ, "%s", if_name); } else { snprintf(sctp_ifnp->ifn_name, SCTP_IFNAMSIZ, "%s", "unknown"); } hash_ifn_head = &SCTP_BASE_INFO(vrf_ifn_hash)[(ifn_index & SCTP_BASE_INFO(vrf_ifn_hashmark))]; LIST_INIT(&sctp_ifnp->ifalist); SCTP_IPI_ADDR_WLOCK(); LIST_INSERT_HEAD(hash_ifn_head, sctp_ifnp, next_bucket); LIST_INSERT_HEAD(&vrf->ifnlist, sctp_ifnp, next_ifn); atomic_add_int(&SCTP_BASE_INFO(ipi_count_ifns), 1); new_ifn_af = 1; } sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED); if (sctp_ifap) { /* Hmm, it already exists? */ if ((sctp_ifap->ifn_p) && (sctp_ifap->ifn_p->ifn_index == ifn_index)) { SCTPDBG(SCTP_DEBUG_PCB4, "Using existing ifn %s (0x%x) for ifa %p\n", sctp_ifap->ifn_p->ifn_name, ifn_index, (void *)sctp_ifap); if (new_ifn_af) { /* Remove the created one that we don't want */ sctp_delete_ifn(sctp_ifnp, SCTP_ADDR_LOCKED); } if (sctp_ifap->localifa_flags & SCTP_BEING_DELETED) { /* easy to solve, just switch back to active */ SCTPDBG(SCTP_DEBUG_PCB4, "Clearing deleted ifa flag\n"); sctp_ifap->localifa_flags = SCTP_ADDR_VALID; sctp_ifap->ifn_p = sctp_ifnp; atomic_add_int(&sctp_ifap->ifn_p->refcount, 1); } exit_stage_left: SCTP_IPI_ADDR_WUNLOCK(); return (sctp_ifap); } else { if (sctp_ifap->ifn_p) { /* * The last IFN gets the address, remove the * old one */ SCTPDBG(SCTP_DEBUG_PCB4, "Moving ifa %p from %s (0x%x) to %s (0x%x)\n", (void *)sctp_ifap, sctp_ifap->ifn_p->ifn_name, sctp_ifap->ifn_p->ifn_index, if_name, ifn_index); /* remove the address from the old ifn */ sctp_remove_ifa_from_ifn(sctp_ifap); /* move the address over to the new ifn */ sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap); goto exit_stage_left; } else { /* repair ifnp which was NULL ? */ sctp_ifap->localifa_flags = SCTP_ADDR_VALID; SCTPDBG(SCTP_DEBUG_PCB4, "Repairing ifn %p for ifa %p\n", (void *)sctp_ifnp, (void *)sctp_ifap); sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap); } goto exit_stage_left; } } SCTP_IPI_ADDR_WUNLOCK(); SCTP_MALLOC(sctp_ifap, struct sctp_ifa *, sizeof(struct sctp_ifa), SCTP_M_IFA); if (sctp_ifap == NULL) { #ifdef INVARIANTS panic("No memory for IFA"); #endif return (NULL); } memset(sctp_ifap, 0, sizeof(struct sctp_ifa)); sctp_ifap->ifn_p = sctp_ifnp; atomic_add_int(&sctp_ifnp->refcount, 1); sctp_ifap->vrf_id = vrf_id; sctp_ifap->ifa = ifa; memcpy(&sctp_ifap->address, addr, addr->sa_len); sctp_ifap->localifa_flags = SCTP_ADDR_VALID | SCTP_ADDR_DEFER_USE; sctp_ifap->flags = ifa_flags; /* Set scope */ switch (sctp_ifap->address.sa.sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin; sin = &sctp_ifap->address.sin; if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) || (IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) { sctp_ifap->src_is_loop = 1; } if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) { sctp_ifap->src_is_priv = 1; } sctp_ifnp->num_v4++; if (new_ifn_af) new_ifn_af = AF_INET; break; } #endif #ifdef INET6 case AF_INET6: { /* ok to use deprecated addresses? */ struct sockaddr_in6 *sin6; sin6 = &sctp_ifap->address.sin6; if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) || (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) { sctp_ifap->src_is_loop = 1; } if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { sctp_ifap->src_is_priv = 1; } sctp_ifnp->num_v6++; if (new_ifn_af) new_ifn_af = AF_INET6; break; } #endif default: new_ifn_af = 0; break; } hash_of_addr = sctp_get_ifa_hash_val(&sctp_ifap->address.sa); if ((sctp_ifap->src_is_priv == 0) && (sctp_ifap->src_is_loop == 0)) { sctp_ifap->src_is_glob = 1; } SCTP_IPI_ADDR_WLOCK(); hash_addr_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)]; LIST_INSERT_HEAD(hash_addr_head, sctp_ifap, next_bucket); sctp_ifap->refcount = 1; LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa); sctp_ifnp->ifa_count++; vrf->total_ifa_count++; atomic_add_int(&SCTP_BASE_INFO(ipi_count_ifas), 1); if (new_ifn_af) { SCTP_REGISTER_INTERFACE(ifn_index, new_ifn_af); sctp_ifnp->registered_af = new_ifn_af; } SCTP_IPI_ADDR_WUNLOCK(); if (dynamic_add) { /* * Bump up the refcount so that when the timer completes it * will drop back down. */ struct sctp_laddr *wi; atomic_add_int(&sctp_ifap->refcount, 1); wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr); if (wi == NULL) { /* * Gak, what can we do? We have lost an address * change can you say HOSED? */ SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n"); /* Opps, must decrement the count */ sctp_del_addr_from_vrf(vrf_id, addr, ifn_index, if_name); return (NULL); } SCTP_INCR_LADDR_COUNT(); bzero(wi, sizeof(*wi)); (void)SCTP_GETTIME_TIMEVAL(&wi->start_time); wi->ifa = sctp_ifap; wi->action = SCTP_ADD_IP_ADDRESS; SCTP_WQ_ADDR_LOCK(); LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr); SCTP_WQ_ADDR_UNLOCK(); sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ, (struct sctp_inpcb *)NULL, (struct sctp_tcb *)NULL, (struct sctp_nets *)NULL); } else { /* it's ready for use */ sctp_ifap->localifa_flags &= ~SCTP_ADDR_DEFER_USE; } return (sctp_ifap); } void sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr, uint32_t ifn_index, const char *if_name) { struct sctp_vrf *vrf; struct sctp_ifa *sctp_ifap = NULL; SCTP_IPI_ADDR_WLOCK(); vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id); goto out_now; } #ifdef SCTP_DEBUG SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: deleting address:", vrf_id); SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr); #endif sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED); if (sctp_ifap) { /* Validate the delete */ if (sctp_ifap->ifn_p) { int valid = 0; /*- * The name has priority over the ifn_index * if its given. We do this especially for * panda who might recycle indexes fast. */ if (if_name) { if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, SCTP_IFNAMSIZ) == 0) { /* They match its a correct delete */ valid = 1; } } if (!valid) { /* last ditch check ifn_index */ if (ifn_index == sctp_ifap->ifn_p->ifn_index) { valid = 1; } } if (!valid) { SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s does not match addresses\n", ifn_index, ((if_name == NULL) ? "NULL" : if_name)); SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s - ignoring delete\n", sctp_ifap->ifn_p->ifn_index, sctp_ifap->ifn_p->ifn_name); SCTP_IPI_ADDR_WUNLOCK(); return; } } SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", (void *)sctp_ifap); sctp_ifap->localifa_flags &= SCTP_ADDR_VALID; /* * We don't set the flag. This means that the structure will * hang around in EP's that have bound specific to it until * they close. This gives us TCP like behavior if someone * removes an address (or for that matter adds it right * back). */ /* sctp_ifap->localifa_flags |= SCTP_BEING_DELETED; */ vrf->total_ifa_count--; LIST_REMOVE(sctp_ifap, next_bucket); sctp_remove_ifa_from_ifn(sctp_ifap); } #ifdef SCTP_DEBUG else { SCTPDBG(SCTP_DEBUG_PCB4, "Del Addr-ifn:%d Could not find address:", ifn_index); SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr); } #endif out_now: SCTP_IPI_ADDR_WUNLOCK(); if (sctp_ifap) { struct sctp_laddr *wi; wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr); if (wi == NULL) { /* * Gak, what can we do? We have lost an address * change can you say HOSED? */ SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n"); /* Oops, must decrement the count */ sctp_free_ifa(sctp_ifap); return; } SCTP_INCR_LADDR_COUNT(); bzero(wi, sizeof(*wi)); (void)SCTP_GETTIME_TIMEVAL(&wi->start_time); wi->ifa = sctp_ifap; wi->action = SCTP_DEL_IP_ADDRESS; SCTP_WQ_ADDR_LOCK(); /* * Should this really be a tailq? As it is we will process * the newest first :-0 */ LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr); SCTP_WQ_ADDR_UNLOCK(); sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ, (struct sctp_inpcb *)NULL, (struct sctp_tcb *)NULL, (struct sctp_nets *)NULL); } return; } static int sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to) { int loopback_scope; #if defined(INET) int ipv4_local_scope, ipv4_addr_legal; #endif #if defined(INET6) int local_scope, site_scope, ipv6_addr_legal; #endif struct sctp_vrf *vrf; struct sctp_ifn *sctp_ifn; struct sctp_ifa *sctp_ifa; loopback_scope = stcb->asoc.scope.loopback_scope; #if defined(INET) ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope; ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal; #endif #if defined(INET6) local_scope = stcb->asoc.scope.local_scope; site_scope = stcb->asoc.scope.site_scope; ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal; #endif SCTP_IPI_ADDR_RLOCK(); vrf = sctp_find_vrf(stcb->asoc.vrf_id); if (vrf == NULL) { /* no vrf, no addresses */ SCTP_IPI_ADDR_RUNLOCK(); return (0); } if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { if ((loopback_scope == 0) && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { continue; } LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { if (sctp_is_addr_restricted(stcb, sctp_ifa) && (!sctp_is_addr_pending(stcb, sctp_ifa))) { /* * We allow pending addresses, where * we have sent an asconf-add to be * considered valid. */ continue; } if (sctp_ifa->address.sa.sa_family != to->sa_family) { continue; } switch (sctp_ifa->address.sa.sa_family) { #ifdef INET case AF_INET: if (ipv4_addr_legal) { struct sockaddr_in *sin, *rsin; sin = &sctp_ifa->address.sin; rsin = (struct sockaddr_in *)to; if ((ipv4_local_scope == 0) && IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { continue; } if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred, &sin->sin_addr) != 0) { continue; } if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { SCTP_IPI_ADDR_RUNLOCK(); return (1); } } break; #endif #ifdef INET6 case AF_INET6: if (ipv6_addr_legal) { struct sockaddr_in6 *sin6, *rsin6; sin6 = &sctp_ifa->address.sin6; rsin6 = (struct sockaddr_in6 *)to; if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred, &sin6->sin6_addr) != 0) { continue; } if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { if (local_scope == 0) continue; if (sin6->sin6_scope_id == 0) { if (sa6_recoverscope(sin6) != 0) continue; } } if ((site_scope == 0) && (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) { continue; } if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { SCTP_IPI_ADDR_RUNLOCK(); return (1); } } break; #endif default: /* TSNH */ break; } } } } else { struct sctp_laddr *laddr; LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { SCTPDBG(SCTP_DEBUG_PCB1, "ifa being deleted\n"); continue; } if (sctp_is_addr_restricted(stcb, laddr->ifa) && (!sctp_is_addr_pending(stcb, laddr->ifa))) { /* * We allow pending addresses, where we have * sent an asconf-add to be considered * valid. */ continue; } if (laddr->ifa->address.sa.sa_family != to->sa_family) { continue; } switch (to->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin, *rsin; sin = &laddr->ifa->address.sin; rsin = (struct sockaddr_in *)to; if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { SCTP_IPI_ADDR_RUNLOCK(); return (1); } break; } #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6, *rsin6; sin6 = &laddr->ifa->address.sin6; rsin6 = (struct sockaddr_in6 *)to; if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { SCTP_IPI_ADDR_RUNLOCK(); return (1); } break; } #endif default: /* TSNH */ break; } } } SCTP_IPI_ADDR_RUNLOCK(); return (0); } static struct sctp_tcb * sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, struct sockaddr *to, struct sctp_nets **netp, uint32_t vrf_id) { /**** ASSUMES THE CALLER holds the INP_INFO_RLOCK */ /* * If we support the TCP model, then we must now dig through to see * if we can find our endpoint in the list of tcp ep's. */ uint16_t lport, rport; struct sctppcbhead *ephead; struct sctp_inpcb *inp; struct sctp_laddr *laddr; struct sctp_tcb *stcb; struct sctp_nets *net; if ((to == NULL) || (from == NULL)) { return (NULL); } switch (to->sa_family) { #ifdef INET case AF_INET: if (from->sa_family == AF_INET) { lport = ((struct sockaddr_in *)to)->sin_port; rport = ((struct sockaddr_in *)from)->sin_port; } else { return (NULL); } break; #endif #ifdef INET6 case AF_INET6: if (from->sa_family == AF_INET6) { lport = ((struct sockaddr_in6 *)to)->sin6_port; rport = ((struct sockaddr_in6 *)from)->sin6_port; } else { return (NULL); } break; #endif default: return (NULL); } ephead = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR((lport | rport), SCTP_BASE_INFO(hashtcpmark))]; /* * Ok now for each of the guys in this bucket we must look and see: * - Does the remote port match. - Does there single association's * addresses match this address (to). If so we update p_ep to point * to this ep and return the tcb from it. */ LIST_FOREACH(inp, ephead, sctp_hash) { SCTP_INP_RLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { SCTP_INP_RUNLOCK(inp); continue; } if (lport != inp->sctp_lport) { SCTP_INP_RUNLOCK(inp); continue; } switch (to->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin; sin = (struct sockaddr_in *)to; if (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sin->sin_addr) != 0) { SCTP_INP_RUNLOCK(inp); continue; } break; } #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)to; if (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sin6->sin6_addr) != 0) { SCTP_INP_RUNLOCK(inp); continue; } break; } #endif default: SCTP_INP_RUNLOCK(inp); continue; } if (inp->def_vrf_id != vrf_id) { SCTP_INP_RUNLOCK(inp); continue; } /* check to see if the ep has one of the addresses */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) { /* We are NOT bound all, so look further */ int match = 0; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == NULL) { SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __func__); continue; } if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { SCTPDBG(SCTP_DEBUG_PCB1, "ifa being deleted\n"); continue; } if (laddr->ifa->address.sa.sa_family == to->sa_family) { /* see if it matches */ #ifdef INET if (from->sa_family == AF_INET) { struct sockaddr_in *intf_addr, *sin; intf_addr = &laddr->ifa->address.sin; sin = (struct sockaddr_in *)to; if (sin->sin_addr.s_addr == intf_addr->sin_addr.s_addr) { match = 1; break; } } #endif #ifdef INET6 if (from->sa_family == AF_INET6) { struct sockaddr_in6 *intf_addr6; struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *) to; intf_addr6 = &laddr->ifa->address.sin6; if (SCTP6_ARE_ADDR_EQUAL(sin6, intf_addr6)) { match = 1; break; } } #endif } } if (match == 0) { /* This endpoint does not have this address */ SCTP_INP_RUNLOCK(inp); continue; } } /* * Ok if we hit here the ep has the address, does it hold * the tcb? */ /* XXX: Why don't we TAILQ_FOREACH through sctp_asoc_list? */ stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { SCTP_INP_RUNLOCK(inp); continue; } SCTP_TCB_LOCK(stcb); if (!sctp_does_stcb_own_this_addr(stcb, to)) { SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); continue; } if (stcb->rport != rport) { /* remote port does not match. */ SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); continue; } if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); continue; } if (!sctp_does_stcb_own_this_addr(stcb, to)) { SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); continue; } /* Does this TCB have a matching address? */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (net->ro._l_addr.sa.sa_family != from->sa_family) { /* not the same family, can't be a match */ continue; } switch (from->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin, *rsin; sin = (struct sockaddr_in *)&net->ro._l_addr; rsin = (struct sockaddr_in *)from; if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { /* found it */ if (netp != NULL) { *netp = net; } /* * Update the endpoint * pointer */ *inp_p = inp; SCTP_INP_RUNLOCK(inp); return (stcb); } break; } #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6, *rsin6; sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; rsin6 = (struct sockaddr_in6 *)from; if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { /* found it */ if (netp != NULL) { *netp = net; } /* * Update the endpoint * pointer */ *inp_p = inp; SCTP_INP_RUNLOCK(inp); return (stcb); } break; } #endif default: /* TSNH */ break; } } SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); } return (NULL); } /* * rules for use * * 1) If I return a NULL you must decrement any INP ref cnt. 2) If I find an * stcb, both will be locked (locked_tcb and stcb) but decrement will be done * (if locked == NULL). 3) Decrement happens on return ONLY if locked == * NULL. */ struct sctp_tcb * sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote, struct sctp_nets **netp, struct sockaddr *local, struct sctp_tcb *locked_tcb) { struct sctpasochead *head; struct sctp_inpcb *inp; struct sctp_tcb *stcb = NULL; struct sctp_nets *net; uint16_t rport; inp = *inp_p; switch (remote->sa_family) { #ifdef INET case AF_INET: rport = (((struct sockaddr_in *)remote)->sin_port); break; #endif #ifdef INET6 case AF_INET6: rport = (((struct sockaddr_in6 *)remote)->sin6_port); break; #endif default: return (NULL); } if (locked_tcb) { /* * UN-lock so we can do proper locking here this occurs when * called from load_addresses_from_init. */ atomic_add_int(&locked_tcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(locked_tcb); } SCTP_INP_INFO_RLOCK(); if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { /*- * Now either this guy is our listener or it's the * connector. If it is the one that issued the connect, then * it's only chance is to be the first TCB in the list. If * it is the acceptor, then do the special_lookup to hash * and find the real inp. */ if ((inp->sctp_socket) && (inp->sctp_socket->so_qlimit)) { /* to is peer addr, from is my addr */ stcb = sctp_tcb_special_locate(inp_p, remote, local, netp, inp->def_vrf_id); if ((stcb != NULL) && (locked_tcb == NULL)) { /* we have a locked tcb, lower refcount */ SCTP_INP_DECR_REF(inp); } if ((locked_tcb != NULL) && (locked_tcb != stcb)) { SCTP_INP_RLOCK(locked_tcb->sctp_ep); SCTP_TCB_LOCK(locked_tcb); atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); SCTP_INP_RUNLOCK(locked_tcb->sctp_ep); } SCTP_INP_INFO_RUNLOCK(); return (stcb); } else { SCTP_INP_WLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { goto null_return; } stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { goto null_return; } SCTP_TCB_LOCK(stcb); if (stcb->rport != rport) { /* remote port does not match. */ SCTP_TCB_UNLOCK(stcb); goto null_return; } if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { SCTP_TCB_UNLOCK(stcb); goto null_return; } if (local && !sctp_does_stcb_own_this_addr(stcb, local)) { SCTP_TCB_UNLOCK(stcb); goto null_return; } /* now look at the list of remote addresses */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { #ifdef INVARIANTS if (net == (TAILQ_NEXT(net, sctp_next))) { panic("Corrupt net list"); } #endif if (net->ro._l_addr.sa.sa_family != remote->sa_family) { /* not the same family */ continue; } switch (remote->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin, *rsin; sin = (struct sockaddr_in *) &net->ro._l_addr; rsin = (struct sockaddr_in *)remote; if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { /* found it */ if (netp != NULL) { *netp = net; } if (locked_tcb == NULL) { SCTP_INP_DECR_REF(inp); } else if (locked_tcb != stcb) { SCTP_TCB_LOCK(locked_tcb); } if (locked_tcb) { atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); } SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); return (stcb); } break; } #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6, *rsin6; sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; rsin6 = (struct sockaddr_in6 *)remote; if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { /* found it */ if (netp != NULL) { *netp = net; } if (locked_tcb == NULL) { SCTP_INP_DECR_REF(inp); } else if (locked_tcb != stcb) { SCTP_TCB_LOCK(locked_tcb); } if (locked_tcb) { atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); } SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); return (stcb); } break; } #endif default: /* TSNH */ break; } } SCTP_TCB_UNLOCK(stcb); } } else { SCTP_INP_WLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { goto null_return; } head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(rport, inp->sctp_hashmark)]; LIST_FOREACH(stcb, head, sctp_tcbhash) { if (stcb->rport != rport) { /* remote port does not match */ continue; } SCTP_TCB_LOCK(stcb); if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { SCTP_TCB_UNLOCK(stcb); continue; } if (local && !sctp_does_stcb_own_this_addr(stcb, local)) { SCTP_TCB_UNLOCK(stcb); continue; } /* now look at the list of remote addresses */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { #ifdef INVARIANTS if (net == (TAILQ_NEXT(net, sctp_next))) { panic("Corrupt net list"); } #endif if (net->ro._l_addr.sa.sa_family != remote->sa_family) { /* not the same family */ continue; } switch (remote->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin, *rsin; sin = (struct sockaddr_in *) &net->ro._l_addr; rsin = (struct sockaddr_in *)remote; if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { /* found it */ if (netp != NULL) { *netp = net; } if (locked_tcb == NULL) { SCTP_INP_DECR_REF(inp); } else if (locked_tcb != stcb) { SCTP_TCB_LOCK(locked_tcb); } if (locked_tcb) { atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); } SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); return (stcb); } break; } #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6, *rsin6; sin6 = (struct sockaddr_in6 *) &net->ro._l_addr; rsin6 = (struct sockaddr_in6 *)remote; if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { /* found it */ if (netp != NULL) { *netp = net; } if (locked_tcb == NULL) { SCTP_INP_DECR_REF(inp); } else if (locked_tcb != stcb) { SCTP_TCB_LOCK(locked_tcb); } if (locked_tcb) { atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); } SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); return (stcb); } break; } #endif default: /* TSNH */ break; } } SCTP_TCB_UNLOCK(stcb); } } null_return: /* clean up for returning null */ if (locked_tcb) { SCTP_TCB_LOCK(locked_tcb); atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); } SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); /* not found */ return (NULL); } /* * Find an association for a specific endpoint using the association id given * out in the COMM_UP notification */ struct sctp_tcb * sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock) { /* * Use my the assoc_id to find a endpoint */ struct sctpasochead *head; struct sctp_tcb *stcb; uint32_t id; if (inp == NULL) { SCTP_PRINTF("TSNH ep_associd\n"); return (NULL); } if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { SCTP_PRINTF("TSNH ep_associd0\n"); return (NULL); } id = (uint32_t)asoc_id; head = &inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(id, inp->hashasocidmark)]; if (head == NULL) { /* invalid id TSNH */ SCTP_PRINTF("TSNH ep_associd1\n"); return (NULL); } LIST_FOREACH(stcb, head, sctp_tcbasocidhash) { if (stcb->asoc.assoc_id == id) { if (inp != stcb->sctp_ep) { /* * some other guy has the same id active (id * collision ??). */ SCTP_PRINTF("TSNH ep_associd2\n"); continue; } if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { continue; } if (want_lock) { SCTP_TCB_LOCK(stcb); } return (stcb); } } return (NULL); } struct sctp_tcb * sctp_findassociation_ep_asocid(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock) { struct sctp_tcb *stcb; SCTP_INP_RLOCK(inp); stcb = sctp_findasoc_ep_asocid_locked(inp, asoc_id, want_lock); SCTP_INP_RUNLOCK(inp); return (stcb); } /* * Endpoint probe expects that the INP_INFO is locked. */ static struct sctp_inpcb * sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, uint16_t lport, uint32_t vrf_id) { struct sctp_inpcb *inp; struct sctp_laddr *laddr; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; struct sockaddr_in6 *intf_addr6; #endif int fnd; #ifdef INET sin = NULL; #endif #ifdef INET6 sin6 = NULL; #endif switch (nam->sa_family) { #ifdef INET case AF_INET: sin = (struct sockaddr_in *)nam; break; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)nam; break; #endif default: /* unsupported family */ return (NULL); } if (head == NULL) return (NULL); LIST_FOREACH(inp, head, sctp_hash) { SCTP_INP_RLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { SCTP_INP_RUNLOCK(inp); continue; } if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) && (inp->sctp_lport == lport)) { /* got it */ switch (nam->sa_family) { #ifdef INET case AF_INET: if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && SCTP_IPV6_V6ONLY(inp)) { /* * IPv4 on a IPv6 socket with ONLY * IPv6 set */ SCTP_INP_RUNLOCK(inp); continue; } if (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sin->sin_addr) != 0) { SCTP_INP_RUNLOCK(inp); continue; } break; #endif #ifdef INET6 case AF_INET6: /* * A V6 address and the endpoint is NOT * bound V6 */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) { SCTP_INP_RUNLOCK(inp); continue; } if (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sin6->sin6_addr) != 0) { SCTP_INP_RUNLOCK(inp); continue; } break; #endif default: break; } /* does a VRF id match? */ fnd = 0; if (inp->def_vrf_id == vrf_id) fnd = 1; SCTP_INP_RUNLOCK(inp); if (!fnd) continue; return (inp); } SCTP_INP_RUNLOCK(inp); } switch (nam->sa_family) { #ifdef INET case AF_INET: if (sin->sin_addr.s_addr == INADDR_ANY) { /* Can't hunt for one that has no address specified */ return (NULL); } break; #endif #ifdef INET6 case AF_INET6: if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* Can't hunt for one that has no address specified */ return (NULL); } break; #endif default: break; } /* * ok, not bound to all so see if we can find a EP bound to this * address. */ LIST_FOREACH(inp, head, sctp_hash) { SCTP_INP_RLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { SCTP_INP_RUNLOCK(inp); continue; } if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL)) { SCTP_INP_RUNLOCK(inp); continue; } /* * Ok this could be a likely candidate, look at all of its * addresses */ if (inp->sctp_lport != lport) { SCTP_INP_RUNLOCK(inp); continue; } /* does a VRF id match? */ fnd = 0; if (inp->def_vrf_id == vrf_id) fnd = 1; if (!fnd) { SCTP_INP_RUNLOCK(inp); continue; } LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == NULL) { SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __func__); continue; } SCTPDBG(SCTP_DEBUG_PCB1, "Ok laddr->ifa:%p is possible, ", (void *)laddr->ifa); if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { SCTPDBG(SCTP_DEBUG_PCB1, "Huh IFA being deleted\n"); continue; } if (laddr->ifa->address.sa.sa_family == nam->sa_family) { /* possible, see if it matches */ switch (nam->sa_family) { #ifdef INET case AF_INET: if (sin->sin_addr.s_addr == laddr->ifa->address.sin.sin_addr.s_addr) { SCTP_INP_RUNLOCK(inp); return (inp); } break; #endif #ifdef INET6 case AF_INET6: intf_addr6 = &laddr->ifa->address.sin6; if (SCTP6_ARE_ADDR_EQUAL(sin6, intf_addr6)) { SCTP_INP_RUNLOCK(inp); return (inp); } break; #endif } } } SCTP_INP_RUNLOCK(inp); } return (NULL); } static struct sctp_inpcb * sctp_isport_inuse(struct sctp_inpcb *inp, uint16_t lport, uint32_t vrf_id) { struct sctppcbhead *head; struct sctp_inpcb *t_inp; int fnd; head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashmark))]; LIST_FOREACH(t_inp, head, sctp_hash) { if (t_inp->sctp_lport != lport) { continue; } /* is it in the VRF in question */ fnd = 0; if (t_inp->def_vrf_id == vrf_id) fnd = 1; if (!fnd) continue; /* This one is in use. */ /* check the v6/v4 binding issue */ if ((t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && SCTP_IPV6_V6ONLY(t_inp)) { if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { /* collision in V6 space */ return (t_inp); } else { /* inp is BOUND_V4 no conflict */ continue; } } else if (t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { /* t_inp is bound v4 and v6, conflict always */ return (t_inp); } else { /* t_inp is bound only V4 */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && SCTP_IPV6_V6ONLY(inp)) { /* no conflict */ continue; } /* else fall through to conflict */ } return (t_inp); } return (NULL); } int sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp) { /* For 1-2-1 with port reuse */ struct sctppcbhead *head; struct sctp_inpcb *tinp, *ninp; if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE)) { /* only works with port reuse on */ return (-1); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) == 0) { return (0); } SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_WLOCK(); head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(inp->sctp_lport, SCTP_BASE_INFO(hashmark))]; /* Kick out all non-listeners to the TCP hash */ LIST_FOREACH_SAFE(tinp, head, sctp_hash, ninp) { if (tinp->sctp_lport != inp->sctp_lport) { continue; } if (tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { continue; } if (tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { continue; } if (tinp->sctp_socket->so_qlimit) { continue; } SCTP_INP_WLOCK(tinp); LIST_REMOVE(tinp, sctp_hash); head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR(tinp->sctp_lport, SCTP_BASE_INFO(hashtcpmark))]; tinp->sctp_flags |= SCTP_PCB_FLAGS_IN_TCPPOOL; LIST_INSERT_HEAD(head, tinp, sctp_hash); SCTP_INP_WUNLOCK(tinp); } SCTP_INP_WLOCK(inp); /* Pull from where he was */ LIST_REMOVE(inp, sctp_hash); inp->sctp_flags &= ~SCTP_PCB_FLAGS_IN_TCPPOOL; head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(inp->sctp_lport, SCTP_BASE_INFO(hashmark))]; LIST_INSERT_HEAD(head, inp, sctp_hash); SCTP_INP_WUNLOCK(inp); SCTP_INP_RLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return (0); } struct sctp_inpcb * sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock, uint32_t vrf_id) { /* * First we check the hash table to see if someone has this port * bound with just the port. */ struct sctp_inpcb *inp; struct sctppcbhead *head; int lport; unsigned int i; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif switch (nam->sa_family) { #ifdef INET case AF_INET: sin = (struct sockaddr_in *)nam; lport = sin->sin_port; break; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)nam; lport = sin6->sin6_port; break; #endif default: return (NULL); } /* * I could cheat here and just cast to one of the types but we will * do it right. It also provides the check against an Unsupported * type too. */ /* Find the head of the ALLADDR chain */ if (have_lock == 0) { SCTP_INP_INFO_RLOCK(); } head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashmark))]; inp = sctp_endpoint_probe(nam, head, lport, vrf_id); /* * If the TCP model exists it could be that the main listening * endpoint is gone but there still exists a connected socket for * this guy. If so we can return the first one that we find. This * may NOT be the correct one so the caller should be wary on the * returned INP. Currently the only caller that sets find_tcp_pool * is in bindx where we are verifying that a user CAN bind the * address. He either has bound it already, or someone else has, or * its open to bind, so this is good enough. */ if (inp == NULL && find_tcp_pool) { for (i = 0; i < SCTP_BASE_INFO(hashtcpmark) + 1; i++) { head = &SCTP_BASE_INFO(sctp_tcpephash)[i]; inp = sctp_endpoint_probe(nam, head, lport, vrf_id); if (inp) { break; } } } if (inp) { SCTP_INP_INCR_REF(inp); } if (have_lock == 0) { SCTP_INP_INFO_RUNLOCK(); } return (inp); } /* * Find an association for an endpoint with the pointer to whom you want to * send to and the endpoint pointer. The address can be IPv4 or IPv6. We may * need to change the *to to some other struct like a mbuf... */ struct sctp_tcb * sctp_findassociation_addr_sa(struct sockaddr *from, struct sockaddr *to, struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool, uint32_t vrf_id) { struct sctp_inpcb *inp = NULL; struct sctp_tcb *stcb; SCTP_INP_INFO_RLOCK(); if (find_tcp_pool) { if (inp_p != NULL) { stcb = sctp_tcb_special_locate(inp_p, from, to, netp, vrf_id); } else { stcb = sctp_tcb_special_locate(&inp, from, to, netp, vrf_id); } if (stcb != NULL) { SCTP_INP_INFO_RUNLOCK(); return (stcb); } } inp = sctp_pcb_findep(to, 0, 1, vrf_id); if (inp_p != NULL) { *inp_p = inp; } SCTP_INP_INFO_RUNLOCK(); if (inp == NULL) { return (NULL); } /* * ok, we have an endpoint, now lets find the assoc for it (if any) * we now place the source address or from in the to of the find * endpoint call. Since in reality this chain is used from the * inbound packet side. */ if (inp_p != NULL) { stcb = sctp_findassociation_ep_addr(inp_p, from, netp, to, NULL); } else { stcb = sctp_findassociation_ep_addr(&inp, from, netp, to, NULL); } return (stcb); } /* * This routine will grub through the mbuf that is a INIT or INIT-ACK and * find all addresses that the sender has specified in any address list. Each * address will be used to lookup the TCB and see if one exits. */ static struct sctp_tcb * sctp_findassociation_special_addr(struct mbuf *m, int offset, struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp, struct sockaddr *dst) { struct sctp_paramhdr *phdr, parm_buf; #if defined(INET) || defined(INET6) struct sctp_tcb *stcb; uint16_t ptype; #endif uint16_t plen; #ifdef INET struct sockaddr_in sin4; #endif #ifdef INET6 struct sockaddr_in6 sin6; #endif #ifdef INET memset(&sin4, 0, sizeof(sin4)); sin4.sin_len = sizeof(sin4); sin4.sin_family = AF_INET; sin4.sin_port = sh->src_port; #endif #ifdef INET6 memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(sin6); sin6.sin6_family = AF_INET6; sin6.sin6_port = sh->src_port; #endif offset += sizeof(struct sctp_init_chunk); phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf)); while (phdr != NULL) { /* now we must see if we want the parameter */ #if defined(INET) || defined(INET6) ptype = ntohs(phdr->param_type); #endif plen = ntohs(phdr->param_length); if (plen == 0) { break; } #ifdef INET if (ptype == SCTP_IPV4_ADDRESS && plen == sizeof(struct sctp_ipv4addr_param)) { /* Get the rest of the address */ struct sctp_ipv4addr_param ip4_parm, *p4; phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&ip4_parm, min(plen, sizeof(ip4_parm))); if (phdr == NULL) { return (NULL); } p4 = (struct sctp_ipv4addr_param *)phdr; memcpy(&sin4.sin_addr, &p4->addr, sizeof(p4->addr)); /* look it up */ stcb = sctp_findassociation_ep_addr(inp_p, (struct sockaddr *)&sin4, netp, dst, NULL); if (stcb != NULL) { return (stcb); } } #endif #ifdef INET6 if (ptype == SCTP_IPV6_ADDRESS && plen == sizeof(struct sctp_ipv6addr_param)) { /* Get the rest of the address */ struct sctp_ipv6addr_param ip6_parm, *p6; phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&ip6_parm, min(plen, sizeof(ip6_parm))); if (phdr == NULL) { return (NULL); } p6 = (struct sctp_ipv6addr_param *)phdr; memcpy(&sin6.sin6_addr, &p6->addr, sizeof(p6->addr)); /* look it up */ stcb = sctp_findassociation_ep_addr(inp_p, (struct sockaddr *)&sin6, netp, dst, NULL); if (stcb != NULL) { return (stcb); } } #endif offset += SCTP_SIZE32(plen); phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf)); } return (NULL); } static struct sctp_tcb * sctp_findassoc_by_vtag(struct sockaddr *from, struct sockaddr *to, uint32_t vtag, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint16_t rport, uint16_t lport, int skip_src_check, uint32_t vrf_id, uint32_t remote_tag) { /* * Use my vtag to hash. If we find it we then verify the source addr * is in the assoc. If all goes well we save a bit on rec of a * packet. */ struct sctpasochead *head; struct sctp_nets *net; struct sctp_tcb *stcb; SCTP_INP_INFO_RLOCK(); head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(vtag, SCTP_BASE_INFO(hashasocmark))]; LIST_FOREACH(stcb, head, sctp_asocs) { SCTP_INP_RLOCK(stcb->sctp_ep); if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { SCTP_INP_RUNLOCK(stcb->sctp_ep); continue; } if (stcb->sctp_ep->def_vrf_id != vrf_id) { SCTP_INP_RUNLOCK(stcb->sctp_ep); continue; } SCTP_TCB_LOCK(stcb); SCTP_INP_RUNLOCK(stcb->sctp_ep); if (stcb->asoc.my_vtag == vtag) { /* candidate */ if (stcb->rport != rport) { SCTP_TCB_UNLOCK(stcb); continue; } if (stcb->sctp_ep->sctp_lport != lport) { SCTP_TCB_UNLOCK(stcb); continue; } if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { SCTP_TCB_UNLOCK(stcb); continue; } /* RRS:Need toaddr check here */ if (sctp_does_stcb_own_this_addr(stcb, to) == 0) { /* Endpoint does not own this address */ SCTP_TCB_UNLOCK(stcb); continue; } if (remote_tag) { /* * If we have both vtags that's all we match * on */ if (stcb->asoc.peer_vtag == remote_tag) { /* * If both tags match we consider it * conclusive and check NO * source/destination addresses */ goto conclusive; } } if (skip_src_check) { conclusive: if (from) { *netp = sctp_findnet(stcb, from); } else { *netp = NULL; /* unknown */ } if (inp_p) *inp_p = stcb->sctp_ep; SCTP_INP_INFO_RUNLOCK(); return (stcb); } net = sctp_findnet(stcb, from); if (net) { /* yep its him. */ *netp = net; SCTP_STAT_INCR(sctps_vtagexpress); *inp_p = stcb->sctp_ep; SCTP_INP_INFO_RUNLOCK(); return (stcb); } else { /* * not him, this should only happen in rare * cases so I peg it. */ SCTP_STAT_INCR(sctps_vtagbogus); } } SCTP_TCB_UNLOCK(stcb); } SCTP_INP_INFO_RUNLOCK(); return (NULL); } /* * Find an association with the pointer to the inbound IP packet. This can be * a IPv4 or IPv6 packet. */ struct sctp_tcb * sctp_findassociation_addr(struct mbuf *m, int offset, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id) { struct sctp_tcb *stcb; struct sctp_inpcb *inp; if (sh->v_tag) { /* we only go down this path if vtag is non-zero */ stcb = sctp_findassoc_by_vtag(src, dst, ntohl(sh->v_tag), inp_p, netp, sh->src_port, sh->dest_port, 0, vrf_id, 0); if (stcb) { return (stcb); } } if (inp_p) { stcb = sctp_findassociation_addr_sa(src, dst, inp_p, netp, 1, vrf_id); inp = *inp_p; } else { stcb = sctp_findassociation_addr_sa(src, dst, &inp, netp, 1, vrf_id); } SCTPDBG(SCTP_DEBUG_PCB1, "stcb:%p inp:%p\n", (void *)stcb, (void *)inp); if (stcb == NULL && inp) { /* Found a EP but not this address */ if ((ch->chunk_type == SCTP_INITIATION) || (ch->chunk_type == SCTP_INITIATION_ACK)) { /*- * special hook, we do NOT return linp or an * association that is linked to an existing * association that is under the TCP pool (i.e. no * listener exists). The endpoint finding routine * will always find a listener before examining the * TCP pool. */ if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) { if (inp_p) { *inp_p = NULL; } return (NULL); } stcb = sctp_findassociation_special_addr(m, offset, sh, &inp, netp, dst); if (inp_p != NULL) { *inp_p = inp; } } } SCTPDBG(SCTP_DEBUG_PCB1, "stcb is %p\n", (void *)stcb); return (stcb); } /* * lookup an association by an ASCONF lookup address. * if the lookup address is 0.0.0.0 or ::0, use the vtag to do the lookup */ struct sctp_tcb * sctp_findassociation_ep_asconf(struct mbuf *m, int offset, struct sockaddr *dst, struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id) { struct sctp_tcb *stcb; union sctp_sockstore remote_store; struct sctp_paramhdr parm_buf, *phdr; int ptype; int zero_address = 0; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif memset(&remote_store, 0, sizeof(remote_store)); phdr = sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk), &parm_buf, sizeof(struct sctp_paramhdr)); if (phdr == NULL) { SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf lookup addr\n", __func__); return NULL; } ptype = (int)((uint32_t)ntohs(phdr->param_type)); /* get the correlation address */ switch (ptype) { #ifdef INET6 case SCTP_IPV6_ADDRESS: { /* ipv6 address param */ struct sctp_ipv6addr_param *p6, p6_buf; if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv6addr_param)) { return NULL; } p6 = (struct sctp_ipv6addr_param *)sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk), &p6_buf.ph, sizeof(*p6)); if (p6 == NULL) { SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v6 lookup addr\n", __func__); return (NULL); } sin6 = &remote_store.sin6; sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); sin6->sin6_port = sh->src_port; memcpy(&sin6->sin6_addr, &p6->addr, sizeof(struct in6_addr)); if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) zero_address = 1; break; } #endif #ifdef INET case SCTP_IPV4_ADDRESS: { /* ipv4 address param */ struct sctp_ipv4addr_param *p4, p4_buf; if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv4addr_param)) { return NULL; } p4 = (struct sctp_ipv4addr_param *)sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk), &p4_buf.ph, sizeof(*p4)); if (p4 == NULL) { SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v4 lookup addr\n", __func__); return (NULL); } sin = &remote_store.sin; sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_port = sh->src_port; memcpy(&sin->sin_addr, &p4->addr, sizeof(struct in_addr)); if (sin->sin_addr.s_addr == INADDR_ANY) zero_address = 1; break; } #endif default: /* invalid address param type */ return NULL; } if (zero_address) { stcb = sctp_findassoc_by_vtag(NULL, dst, ntohl(sh->v_tag), inp_p, netp, sh->src_port, sh->dest_port, 1, vrf_id, 0); if (stcb != NULL) { SCTP_INP_DECR_REF(*inp_p); } } else { stcb = sctp_findassociation_ep_addr(inp_p, &remote_store.sa, netp, dst, NULL); } return (stcb); } /* * allocate a sctp_inpcb and setup a temporary binding to a port/all * addresses. This way if we don't get a bind we by default pick a ephemeral * port with all addresses bound. */ int sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id) { /* * we get called when a new endpoint starts up. We need to allocate * the sctp_inpcb structure from the zone and init it. Mark it as * unbound and find a port that we can use as an ephemeral with * INADDR_ANY. If the user binds later no problem we can then add in * the specific addresses. And setup the default parameters for the * EP. */ int i, error; struct sctp_inpcb *inp; struct sctp_pcb *m; struct timeval time; sctp_sharedkey_t *null_key; error = 0; SCTP_INP_INFO_WLOCK(); inp = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_ep), struct sctp_inpcb); if (inp == NULL) { SCTP_PRINTF("Out of SCTP-INPCB structures - no resources\n"); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS); return (ENOBUFS); } /* zap it */ bzero(inp, sizeof(*inp)); /* bump generations */ /* setup socket pointers */ inp->sctp_socket = so; inp->ip_inp.inp.inp_socket = so; inp->ip_inp.inp.inp_cred = crhold(so->so_cred); #ifdef INET6 if (INP_SOCKAF(so) == AF_INET6) { if (MODULE_GLOBAL(ip6_auto_flowlabel)) { inp->ip_inp.inp.inp_flags |= IN6P_AUTOFLOWLABEL; } if (MODULE_GLOBAL(ip6_v6only)) { inp->ip_inp.inp.inp_flags |= IN6P_IPV6_V6ONLY; } } #endif inp->sctp_associd_counter = 1; inp->partial_delivery_point = SCTP_SB_LIMIT_RCV(so) >> SCTP_PARTIAL_DELIVERY_SHIFT; inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT; inp->max_cwnd = 0; inp->sctp_cmt_on_off = SCTP_BASE_SYSCTL(sctp_cmt_on_off); inp->ecn_supported = (uint8_t)SCTP_BASE_SYSCTL(sctp_ecn_enable); inp->prsctp_supported = (uint8_t)SCTP_BASE_SYSCTL(sctp_pr_enable); inp->auth_supported = (uint8_t)SCTP_BASE_SYSCTL(sctp_auth_enable); inp->asconf_supported = (uint8_t)SCTP_BASE_SYSCTL(sctp_asconf_enable); inp->reconfig_supported = (uint8_t)SCTP_BASE_SYSCTL(sctp_reconfig_enable); inp->nrsack_supported = (uint8_t)SCTP_BASE_SYSCTL(sctp_nrsack_enable); inp->pktdrop_supported = (uint8_t)SCTP_BASE_SYSCTL(sctp_pktdrop_enable); inp->idata_supported = 0; inp->fibnum = so->so_fibnum; /* init the small hash table we use to track asocid <-> tcb */ inp->sctp_asocidhash = SCTP_HASH_INIT(SCTP_STACK_VTAG_HASH_SIZE, &inp->hashasocidmark); if (inp->sctp_asocidhash == NULL) { crfree(inp->ip_inp.inp.inp_cred); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); SCTP_INP_INFO_WUNLOCK(); return (ENOBUFS); } -#ifdef IPSEC - error = ipsec_init_policy(so, &inp->ip_inp.inp.inp_sp); +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + error = ipsec_init_pcbpolicy(&inp->ip_inp.inp); if (error != 0) { crfree(inp->ip_inp.inp.inp_cred); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); SCTP_INP_INFO_WUNLOCK(); return error; } #endif /* IPSEC */ SCTP_INCR_EP_COUNT(); inp->ip_inp.inp.inp_ip_ttl = MODULE_GLOBAL(ip_defttl); SCTP_INP_INFO_WUNLOCK(); so->so_pcb = (caddr_t)inp; if (SCTP_SO_TYPE(so) == SOCK_SEQPACKET) { /* UDP style socket */ inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE | SCTP_PCB_FLAGS_UNBOUND); /* Be sure it is NON-BLOCKING IO for UDP */ /* SCTP_SET_SO_NBIO(so); */ } else if (SCTP_SO_TYPE(so) == SOCK_STREAM) { /* TCP style socket */ inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE | SCTP_PCB_FLAGS_UNBOUND); /* Be sure we have blocking IO by default */ SCTP_CLEAR_SO_NBIO(so); } else { /* * unsupported socket type (RAW, etc)- in case we missed it * in protosw */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EOPNOTSUPP); so->so_pcb = NULL; crfree(inp->ip_inp.inp.inp_cred); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) ipsec_delete_pcbpolicy(&inp->ip_inp.inp); #endif SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); return (EOPNOTSUPP); } if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_1) { sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE); sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS); } else if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_2) { sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE); sctp_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS); } else if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_0) { sctp_feature_off(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE); sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS); } inp->sctp_tcbhash = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_pcbtblsize), &inp->sctp_hashmark); if (inp->sctp_tcbhash == NULL) { SCTP_PRINTF("Out of SCTP-INPCB->hashinit - no resources\n"); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS); so->so_pcb = NULL; crfree(inp->ip_inp.inp.inp_cred); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) ipsec_delete_pcbpolicy(&inp->ip_inp.inp); #endif SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); return (ENOBUFS); } inp->def_vrf_id = vrf_id; SCTP_INP_INFO_WLOCK(); SCTP_INP_LOCK_INIT(inp); INP_LOCK_INIT(&inp->ip_inp.inp, "inp", "sctpinp"); SCTP_INP_READ_INIT(inp); SCTP_ASOC_CREATE_LOCK_INIT(inp); /* lock the new ep */ SCTP_INP_WLOCK(inp); /* add it to the info area */ LIST_INSERT_HEAD(&SCTP_BASE_INFO(listhead), inp, sctp_list); SCTP_INP_INFO_WUNLOCK(); TAILQ_INIT(&inp->read_queue); LIST_INIT(&inp->sctp_addr_list); LIST_INIT(&inp->sctp_asoc_list); #ifdef SCTP_TRACK_FREED_ASOCS /* TEMP CODE */ LIST_INIT(&inp->sctp_asoc_free_list); #endif /* Init the timer structure for signature change */ SCTP_OS_TIMER_INIT(&inp->sctp_ep.signature_change.timer); inp->sctp_ep.signature_change.type = SCTP_TIMER_TYPE_NEWCOOKIE; /* now init the actual endpoint default data */ m = &inp->sctp_ep; /* setup the base timeout information */ m->sctp_timeoutticks[SCTP_TIMER_SEND] = SEC_TO_TICKS(SCTP_SEND_SEC); /* needed ? */ m->sctp_timeoutticks[SCTP_TIMER_INIT] = SEC_TO_TICKS(SCTP_INIT_SEC); /* needed ? */ m->sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default)); m->sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default)); m->sctp_timeoutticks[SCTP_TIMER_PMTU] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default)); m->sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default)); m->sctp_timeoutticks[SCTP_TIMER_SIGNATURE] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_secret_lifetime_default)); /* all max/min max are in ms */ m->sctp_maxrto = SCTP_BASE_SYSCTL(sctp_rto_max_default); m->sctp_minrto = SCTP_BASE_SYSCTL(sctp_rto_min_default); m->initial_rto = SCTP_BASE_SYSCTL(sctp_rto_initial_default); m->initial_init_rto_max = SCTP_BASE_SYSCTL(sctp_init_rto_max_default); m->sctp_sack_freq = SCTP_BASE_SYSCTL(sctp_sack_freq_default); m->max_init_times = SCTP_BASE_SYSCTL(sctp_init_rtx_max_default); m->max_send_times = SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default); m->def_net_failure = SCTP_BASE_SYSCTL(sctp_path_rtx_max_default); m->def_net_pf_threshold = SCTP_BASE_SYSCTL(sctp_path_pf_threshold); m->sctp_sws_sender = SCTP_SWS_SENDER_DEF; m->sctp_sws_receiver = SCTP_SWS_RECEIVER_DEF; m->max_burst = SCTP_BASE_SYSCTL(sctp_max_burst_default); m->fr_max_burst = SCTP_BASE_SYSCTL(sctp_fr_max_burst_default); m->sctp_default_cc_module = SCTP_BASE_SYSCTL(sctp_default_cc_module); m->sctp_default_ss_module = SCTP_BASE_SYSCTL(sctp_default_ss_module); m->max_open_streams_intome = SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default); /* number of streams to pre-open on a association */ m->pre_open_stream_count = SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default); /* Add adaptation cookie */ m->adaptation_layer_indicator = 0; m->adaptation_layer_indicator_provided = 0; /* seed random number generator */ m->random_counter = 1; m->store_at = SCTP_SIGNATURE_SIZE; SCTP_READ_RANDOM(m->random_numbers, sizeof(m->random_numbers)); sctp_fill_random_store(m); /* Minimum cookie size */ m->size_of_a_cookie = (sizeof(struct sctp_init_msg) * 2) + sizeof(struct sctp_state_cookie); m->size_of_a_cookie += SCTP_SIGNATURE_SIZE; /* Setup the initial secret */ (void)SCTP_GETTIME_TIMEVAL(&time); m->time_of_secret_change = time.tv_sec; for (i = 0; i < SCTP_NUMBER_OF_SECRETS; i++) { m->secret_key[0][i] = sctp_select_initial_TSN(m); } sctp_timer_start(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL); /* How long is a cookie good for ? */ m->def_cookie_life = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default)); /* * Initialize authentication parameters */ m->local_hmacs = sctp_default_supported_hmaclist(); m->local_auth_chunks = sctp_alloc_chunklist(); if (inp->asconf_supported) { sctp_auth_add_chunk(SCTP_ASCONF, m->local_auth_chunks); sctp_auth_add_chunk(SCTP_ASCONF_ACK, m->local_auth_chunks); } m->default_dscp = 0; #ifdef INET6 m->default_flowlabel = 0; #endif m->port = 0; /* encapsulation disabled by default */ LIST_INIT(&m->shared_keys); /* add default NULL key as key id 0 */ null_key = sctp_alloc_sharedkey(); sctp_insert_sharedkey(&m->shared_keys, null_key); SCTP_INP_WUNLOCK(inp); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 12); #endif return (error); } void sctp_move_pcb_and_assoc(struct sctp_inpcb *old_inp, struct sctp_inpcb *new_inp, struct sctp_tcb *stcb) { struct sctp_nets *net; uint16_t lport, rport; struct sctppcbhead *head; struct sctp_laddr *laddr, *oladdr; atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(old_inp); SCTP_INP_WLOCK(new_inp); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); new_inp->sctp_ep.time_of_secret_change = old_inp->sctp_ep.time_of_secret_change; memcpy(new_inp->sctp_ep.secret_key, old_inp->sctp_ep.secret_key, sizeof(old_inp->sctp_ep.secret_key)); new_inp->sctp_ep.current_secret_number = old_inp->sctp_ep.current_secret_number; new_inp->sctp_ep.last_secret_number = old_inp->sctp_ep.last_secret_number; new_inp->sctp_ep.size_of_a_cookie = old_inp->sctp_ep.size_of_a_cookie; /* make it so new data pours into the new socket */ stcb->sctp_socket = new_inp->sctp_socket; stcb->sctp_ep = new_inp; /* Copy the port across */ lport = new_inp->sctp_lport = old_inp->sctp_lport; rport = stcb->rport; /* Pull the tcb from the old association */ LIST_REMOVE(stcb, sctp_tcbhash); LIST_REMOVE(stcb, sctp_tcblist); if (stcb->asoc.in_asocid_hash) { LIST_REMOVE(stcb, sctp_tcbasocidhash); } /* Now insert the new_inp into the TCP connected hash */ head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR((lport | rport), SCTP_BASE_INFO(hashtcpmark))]; LIST_INSERT_HEAD(head, new_inp, sctp_hash); /* Its safe to access */ new_inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND; /* Now move the tcb into the endpoint list */ LIST_INSERT_HEAD(&new_inp->sctp_asoc_list, stcb, sctp_tcblist); /* * Question, do we even need to worry about the ep-hash since we * only have one connection? Probably not :> so lets get rid of it * and not suck up any kernel memory in that. */ if (stcb->asoc.in_asocid_hash) { struct sctpasochead *lhd; lhd = &new_inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(stcb->asoc.assoc_id, new_inp->hashasocidmark)]; LIST_INSERT_HEAD(lhd, stcb, sctp_tcbasocidhash); } /* Ok. Let's restart timer. */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, new_inp, stcb, net); } SCTP_INP_INFO_WUNLOCK(); if (new_inp->sctp_tcbhash != NULL) { SCTP_HASH_FREE(new_inp->sctp_tcbhash, new_inp->sctp_hashmark); new_inp->sctp_tcbhash = NULL; } if ((new_inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) { /* Subset bound, so copy in the laddr list from the old_inp */ LIST_FOREACH(oladdr, &old_inp->sctp_addr_list, sctp_nxt_addr) { laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr); if (laddr == NULL) { /* * Gak, what can we do? This assoc is really * HOSED. We probably should send an abort * here. */ SCTPDBG(SCTP_DEBUG_PCB1, "Association hosed in TCP model, out of laddr memory\n"); continue; } SCTP_INCR_LADDR_COUNT(); bzero(laddr, sizeof(*laddr)); (void)SCTP_GETTIME_TIMEVAL(&laddr->start_time); laddr->ifa = oladdr->ifa; atomic_add_int(&laddr->ifa->refcount, 1); LIST_INSERT_HEAD(&new_inp->sctp_addr_list, laddr, sctp_nxt_addr); new_inp->laddr_count++; if (oladdr == stcb->asoc.last_used_address) { stcb->asoc.last_used_address = laddr; } } } /* * Now any running timers need to be adjusted since we really don't * care if they are running or not just blast in the new_inp into * all of them. */ stcb->asoc.dack_timer.ep = (void *)new_inp; stcb->asoc.asconf_timer.ep = (void *)new_inp; stcb->asoc.strreset_timer.ep = (void *)new_inp; stcb->asoc.shut_guard_timer.ep = (void *)new_inp; stcb->asoc.autoclose_timer.ep = (void *)new_inp; stcb->asoc.delayed_event_timer.ep = (void *)new_inp; stcb->asoc.delete_prim_timer.ep = (void *)new_inp; /* now what about the nets? */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { net->pmtu_timer.ep = (void *)new_inp; net->hb_timer.ep = (void *)new_inp; net->rxt_timer.ep = (void *)new_inp; } SCTP_INP_WUNLOCK(new_inp); SCTP_INP_WUNLOCK(old_inp); } /* * insert an laddr entry with the given ifa for the desired list */ static int sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act) { struct sctp_laddr *laddr; laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr); if (laddr == NULL) { /* out of memory? */ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } SCTP_INCR_LADDR_COUNT(); bzero(laddr, sizeof(*laddr)); (void)SCTP_GETTIME_TIMEVAL(&laddr->start_time); laddr->ifa = ifa; laddr->action = act; atomic_add_int(&ifa->refcount, 1); /* insert it */ LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr); return (0); } /* * Remove an laddr entry from the local address list (on an assoc) */ static void sctp_remove_laddr(struct sctp_laddr *laddr) { /* remove from the list */ LIST_REMOVE(laddr, sctp_nxt_addr); sctp_free_ifa(laddr->ifa); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), laddr); SCTP_DECR_LADDR_COUNT(); } /* sctp_ifap is used to bypass normal local address validation checks */ int sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct sctp_ifa *sctp_ifap, struct thread *p) { /* bind a ep to a socket address */ struct sctppcbhead *head; struct sctp_inpcb *inp, *inp_tmp; struct inpcb *ip_inp; int port_reuse_active = 0; int bindall; uint16_t lport; int error; uint32_t vrf_id; lport = 0; bindall = 1; inp = (struct sctp_inpcb *)so->so_pcb; ip_inp = (struct inpcb *)so->so_pcb; #ifdef SCTP_DEBUG if (addr) { SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port: %d\n", ntohs(((struct sockaddr_in *)addr)->sin_port)); SCTPDBG(SCTP_DEBUG_PCB1, "Addr: "); SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr); } #endif if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == 0) { /* already did a bind, subsequent binds NOT allowed ! */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } #ifdef INVARIANTS if (p == NULL) panic("null proc/thread"); #endif if (addr != NULL) { switch (addr->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin; /* IPV6_V6ONLY socket? */ if (SCTP_IPV6_V6ONLY(ip_inp)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } if (addr->sa_len != sizeof(*sin)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } sin = (struct sockaddr_in *)addr; lport = sin->sin_port; /* * For LOOPBACK the prison_local_ip4() call * will transmute the ip address to the * proper value. */ if (p && (error = prison_local_ip4(p->td_ucred, &sin->sin_addr)) != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error); return (error); } if (sin->sin_addr.s_addr != INADDR_ANY) { bindall = 0; } break; } #endif #ifdef INET6 case AF_INET6: { /* * Only for pure IPv6 Address. (No IPv4 * Mapped!) */ struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addr; if (addr->sa_len != sizeof(*sin6)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } lport = sin6->sin6_port; /* * For LOOPBACK the prison_local_ip6() call * will transmute the ipv6 address to the * proper value. */ if (p && (error = prison_local_ip6(p->td_ucred, &sin6->sin6_addr, (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error); return (error); } if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { bindall = 0; /* KAME hack: embed scopeid */ if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } } /* this must be cleared for ifa_ifwithaddr() */ sin6->sin6_scope_id = 0; break; } #endif default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EAFNOSUPPORT); return (EAFNOSUPPORT); } } SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(inp); /* Setup a vrf_id to be the default for the non-bind-all case. */ vrf_id = inp->def_vrf_id; /* increase our count due to the unlock we do */ SCTP_INP_INCR_REF(inp); if (lport) { /* * Did the caller specify a port? if so we must see if an ep * already has this one bound. */ /* got to be root to get at low ports */ if (ntohs(lport) < IPPORT_RESERVED) { if (p && (error = priv_check(p, PRIV_NETINET_RESERVEDPORT) )) { SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return (error); } } SCTP_INP_WUNLOCK(inp); if (bindall) { vrf_id = inp->def_vrf_id; inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id); if (inp_tmp != NULL) { /* * lock guy returned and lower count note * that we are not bound so inp_tmp should * NEVER be inp. And it is this inp * (inp_tmp) that gets the reference bump, * so we must lower it. */ SCTP_INP_DECR_REF(inp_tmp); /* unlock info */ if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) && (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) { /* * Ok, must be one-2-one and * allowing port re-use */ port_reuse_active = 1; goto continue_anyway; } SCTP_INP_DECR_REF(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE); return (EADDRINUSE); } } else { inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id); if (inp_tmp != NULL) { /* * lock guy returned and lower count note * that we are not bound so inp_tmp should * NEVER be inp. And it is this inp * (inp_tmp) that gets the reference bump, * so we must lower it. */ SCTP_INP_DECR_REF(inp_tmp); /* unlock info */ if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) && (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) { /* * Ok, must be one-2-one and * allowing port re-use */ port_reuse_active = 1; goto continue_anyway; } SCTP_INP_DECR_REF(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE); return (EADDRINUSE); } } continue_anyway: SCTP_INP_WLOCK(inp); if (bindall) { /* verify that no lport is not used by a singleton */ if ((port_reuse_active == 0) && (inp_tmp = sctp_isport_inuse(inp, lport, vrf_id))) { /* Sorry someone already has this one bound */ if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) && (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) { port_reuse_active = 1; } else { SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE); return (EADDRINUSE); } } } } else { uint16_t first, last, candidate; uint16_t count; int done; if (ip_inp->inp_flags & INP_HIGHPORT) { first = MODULE_GLOBAL(ipport_hifirstauto); last = MODULE_GLOBAL(ipport_hilastauto); } else if (ip_inp->inp_flags & INP_LOWPORT) { if (p && (error = priv_check(p, PRIV_NETINET_RESERVEDPORT) )) { SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error); return (error); } first = MODULE_GLOBAL(ipport_lowfirstauto); last = MODULE_GLOBAL(ipport_lowlastauto); } else { first = MODULE_GLOBAL(ipport_firstauto); last = MODULE_GLOBAL(ipport_lastauto); } if (first > last) { uint16_t temp; temp = first; first = last; last = temp; } count = last - first + 1; /* number of candidates */ candidate = first + sctp_select_initial_TSN(&inp->sctp_ep) % (count); done = 0; while (!done) { if (sctp_isport_inuse(inp, htons(candidate), inp->def_vrf_id) == NULL) { done = 1; } if (!done) { if (--count == 0) { SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE); return (EADDRINUSE); } if (candidate == last) candidate = first; else candidate = candidate + 1; } } lport = htons(candidate); } SCTP_INP_DECR_REF(inp); if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_SOCKET_ALLGONE)) { /* * this really should not happen. The guy did a non-blocking * bind and then did a close at the same time. */ SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } /* ok we look clear to give out this port, so lets setup the binding */ if (bindall) { /* binding to all addresses, so just set in the proper flags */ inp->sctp_flags |= SCTP_PCB_FLAGS_BOUNDALL; /* set the automatic addr changes from kernel flag */ if (SCTP_BASE_SYSCTL(sctp_auto_asconf) == 0) { sctp_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF); sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF); } else { sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF); sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF); } if (SCTP_BASE_SYSCTL(sctp_multiple_asconfs) == 0) { sctp_feature_off(inp, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS); } else { sctp_feature_on(inp, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS); } /* * set the automatic mobility_base from kernel flag (by * micchie) */ if (SCTP_BASE_SYSCTL(sctp_mobility_base) == 0) { sctp_mobility_feature_off(inp, SCTP_MOBILITY_BASE); sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED); } else { sctp_mobility_feature_on(inp, SCTP_MOBILITY_BASE); sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED); } /* * set the automatic mobility_fasthandoff from kernel flag * (by micchie) */ if (SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff) == 0) { sctp_mobility_feature_off(inp, SCTP_MOBILITY_FASTHANDOFF); sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED); } else { sctp_mobility_feature_on(inp, SCTP_MOBILITY_FASTHANDOFF); sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED); } } else { /* * bind specific, make sure flags is off and add a new * address structure to the sctp_addr_list inside the ep * structure. * * We will need to allocate one and insert it at the head. * The socketopt call can just insert new addresses in there * as well. It will also have to do the embed scope kame * hack too (before adding). */ struct sctp_ifa *ifa; union sctp_sockstore store; memset(&store, 0, sizeof(store)); switch (addr->sa_family) { #ifdef INET case AF_INET: memcpy(&store.sin, addr, sizeof(struct sockaddr_in)); store.sin.sin_port = 0; break; #endif #ifdef INET6 case AF_INET6: memcpy(&store.sin6, addr, sizeof(struct sockaddr_in6)); store.sin6.sin6_port = 0; break; #endif default: break; } /* * first find the interface with the bound address need to * zero out the port to find the address! yuck! can't do * this earlier since need port for sctp_pcb_findep() */ if (sctp_ifap != NULL) { ifa = sctp_ifap; } else { /* * Note for BSD we hit here always other O/S's will * pass things in via the sctp_ifap argument * (Panda). */ ifa = sctp_find_ifa_by_addr(&store.sa, vrf_id, SCTP_ADDR_NOT_LOCKED); } if (ifa == NULL) { /* Can't find an interface with that address */ SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRNOTAVAIL); return (EADDRNOTAVAIL); } #ifdef INET6 if (addr->sa_family == AF_INET6) { /* GAK, more FIXME IFA lock? */ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { /* Can't bind a non-existent addr. */ SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } } #endif /* we're not bound all */ inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUNDALL; /* allow bindx() to send ASCONF's for binding changes */ sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF); /* clear automatic addr changes from kernel flag */ sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF); /* add this address to the endpoint list */ error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, 0); if (error != 0) { SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return (error); } inp->laddr_count++; } /* find the bucket */ if (port_reuse_active) { /* Put it into tcp 1-2-1 hash */ head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashtcpmark))]; inp->sctp_flags |= SCTP_PCB_FLAGS_IN_TCPPOOL; } else { head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashmark))]; } /* put it in the bucket */ LIST_INSERT_HEAD(head, inp, sctp_hash); SCTPDBG(SCTP_DEBUG_PCB1, "Main hash to bind at head:%p, bound port:%d - in tcp_pool=%d\n", (void *)head, ntohs(lport), port_reuse_active); /* set in the port */ inp->sctp_lport = lport; /* turn off just the unbound flag */ inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND; SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return (0); } static void sctp_iterator_inp_being_freed(struct sctp_inpcb *inp) { struct sctp_iterator *it, *nit; /* * We enter with the only the ITERATOR_LOCK in place and a write * lock on the inp_info stuff. */ it = sctp_it_ctl.cur_it; if (it && (it->vn != curvnet)) { /* Its not looking at our VNET */ return; } if (it && (it->inp == inp)) { /* * This is tricky and we hold the iterator lock, but when it * returns and gets the lock (when we release it) the * iterator will try to operate on inp. We need to stop that * from happening. But of course the iterator has a * reference on the stcb and inp. We can mark it and it will * stop. * * If its a single iterator situation, we set the end * iterator flag. Otherwise we set the iterator to go to the * next inp. * */ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT; } else { sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_INP; } } /* * Now go through and remove any single reference to our inp that * may be still pending on the list */ SCTP_IPI_ITERATOR_WQ_LOCK(); TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) { if (it->vn != curvnet) { continue; } if (it->inp == inp) { /* This one points to me is it inp specific? */ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { /* Remove and free this one */ TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); if (it->function_atend != NULL) { (*it->function_atend) (it->pointer, it->val); } SCTP_FREE(it, SCTP_M_ITER); } else { it->inp = LIST_NEXT(it->inp, sctp_list); if (it->inp) { SCTP_INP_INCR_REF(it->inp); } } /* * When its put in the refcnt is incremented so decr * it */ SCTP_INP_DECR_REF(inp); } } SCTP_IPI_ITERATOR_WQ_UNLOCK(); } /* release sctp_inpcb unbind the port */ void sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) { /* * Here we free a endpoint. We must find it (if it is in the Hash * table) and remove it from there. Then we must also find it in the * overall list and remove it from there. After all removals are * complete then any timer has to be stopped. Then start the actual * freeing. a) Any local lists. b) Any associations. c) The hash of * all associations. d) finally the ep itself. */ struct sctp_tcb *asoc, *nasoc; struct sctp_laddr *laddr, *nladdr; struct inpcb *ip_pcb; struct socket *so; int being_refed = 0; struct sctp_queued_to_read *sq, *nsq; int cnt; sctp_sharedkey_t *shared_key, *nshared_key; #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 0); #endif SCTP_ITERATOR_LOCK(); /* mark any iterators on the list or being processed */ sctp_iterator_inp_being_freed(inp); SCTP_ITERATOR_UNLOCK(); so = inp->sctp_socket; if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { /* been here before.. eeks.. get out of here */ SCTP_PRINTF("This conflict in free SHOULD not be happening! from %d, imm %d\n", from, immediate); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 1); #endif return; } SCTP_ASOC_CREATE_LOCK(inp); SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(inp); if (from == SCTP_CALLED_AFTER_CMPSET_OFCLOSE) { inp->sctp_flags &= ~SCTP_PCB_FLAGS_CLOSE_IP; /* socket is gone, so no more wakeups allowed */ inp->sctp_flags |= SCTP_PCB_FLAGS_DONT_WAKE; inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT; inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT; } /* First time through we have the socket lock, after that no more. */ sctp_timer_stop(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL, SCTP_FROM_SCTP_PCB + SCTP_LOC_1); if (inp->control) { sctp_m_freem(inp->control); inp->control = NULL; } if (inp->pkt) { sctp_m_freem(inp->pkt); inp->pkt = NULL; } ip_pcb = &inp->ip_inp.inp; /* we could just cast the main pointer * here but I will be nice :> (i.e. * ip_pcb = ep;) */ if (immediate == SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE) { int cnt_in_sd; cnt_in_sd = 0; LIST_FOREACH_SAFE(asoc, &inp->sctp_asoc_list, sctp_tcblist, nasoc) { SCTP_TCB_LOCK(asoc); if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { /* Skip guys being freed */ cnt_in_sd++; if (asoc->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE) { /* * Special case - we did not start a * kill timer on the asoc due to it * was not closed. So go ahead and * start it now. */ asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE; sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL); } SCTP_TCB_UNLOCK(asoc); continue; } if (((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_WAIT) || (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_ECHOED)) && (asoc->asoc.total_output_queue_size == 0)) { /* * If we have data in queue, we don't want * to just free since the app may have done, * send()/close or connect/send/close. And * it wants the data to get across first. */ /* Just abandon things in the front states */ if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_NOFORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_2) == 0) { cnt_in_sd++; } continue; } /* Disconnect the socket please */ asoc->sctp_socket = NULL; asoc->asoc.state |= SCTP_STATE_CLOSED_SOCKET; if ((asoc->asoc.size_on_reasm_queue > 0) || (asoc->asoc.control_pdapi) || (asoc->asoc.size_on_all_streams > 0) || (so && (so->so_rcv.sb_cc > 0))) { /* Left with Data unread */ struct mbuf *op_err; op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, ""); asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3; sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_NOFORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_4) == 0) { cnt_in_sd++; } continue; } else if (TAILQ_EMPTY(&asoc->asoc.send_queue) && TAILQ_EMPTY(&asoc->asoc.sent_queue) && (asoc->asoc.stream_queue_cnt == 0)) { if ((*asoc->asoc.ss_functions.sctp_ss_is_user_msgs_incomplete) (asoc, &asoc->asoc)) { goto abort_anyway; } if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_SENT) && (SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) { struct sctp_nets *netp; /* * there is nothing queued to send, * so I send shutdown */ if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_SET_STATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_SENT); SCTP_CLEAR_SUBSTATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_PENDING); sctp_stop_timers_for_shutdown(asoc); if (asoc->asoc.alternate) { netp = asoc->asoc.alternate; } else { netp = asoc->asoc.primary_destination; } sctp_send_shutdown(asoc, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, asoc->sctp_ep, asoc, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc, asoc->asoc.primary_destination); sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_LOCKED); } } else { /* mark into shutdown pending */ asoc->asoc.state |= SCTP_STATE_SHUTDOWN_PENDING; sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc, asoc->asoc.primary_destination); if ((*asoc->asoc.ss_functions.sctp_ss_is_user_msgs_incomplete) (asoc, &asoc->asoc)) { asoc->asoc.state |= SCTP_STATE_PARTIAL_MSG_LEFT; } if (TAILQ_EMPTY(&asoc->asoc.send_queue) && TAILQ_EMPTY(&asoc->asoc.sent_queue) && (asoc->asoc.state & SCTP_STATE_PARTIAL_MSG_LEFT)) { struct mbuf *op_err; abort_anyway: op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, ""); asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5; sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_NOFORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_6) == 0) { cnt_in_sd++; } continue; } else { sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED); } } cnt_in_sd++; SCTP_TCB_UNLOCK(asoc); } /* now is there some left in our SHUTDOWN state? */ if (cnt_in_sd) { #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 2); #endif inp->sctp_socket = NULL; SCTP_INP_WUNLOCK(inp); SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return; } } inp->sctp_socket = NULL; if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) != SCTP_PCB_FLAGS_UNBOUND) { /* * ok, this guy has been bound. It's port is somewhere in * the SCTP_BASE_INFO(hash table). Remove it! */ LIST_REMOVE(inp, sctp_hash); inp->sctp_flags |= SCTP_PCB_FLAGS_UNBOUND; } /* * If there is a timer running to kill us, forget it, since it may * have a contest on the INP lock.. which would cause us to die ... */ cnt = 0; LIST_FOREACH_SAFE(asoc, &inp->sctp_asoc_list, sctp_tcblist, nasoc) { SCTP_TCB_LOCK(asoc); if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { if (asoc->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE) { asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE; sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL); } cnt++; SCTP_TCB_UNLOCK(asoc); continue; } /* Free associations that are NOT killing us */ if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_COOKIE_WAIT) && ((asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) { struct mbuf *op_err; op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, ""); asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_7; sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); } else if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { cnt++; SCTP_TCB_UNLOCK(asoc); continue; } if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) { cnt++; } } if (cnt) { /* Ok we have someone out there that will kill us */ (void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 3); #endif SCTP_INP_WUNLOCK(inp); SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return; } if (SCTP_INP_LOCK_CONTENDED(inp)) being_refed++; if (SCTP_INP_READ_CONTENDED(inp)) being_refed++; if (SCTP_ASOC_CREATE_LOCK_CONTENDED(inp)) being_refed++; if ((inp->refcount) || (being_refed) || (inp->sctp_flags & SCTP_PCB_FLAGS_CLOSE_IP)) { (void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 4); #endif sctp_timer_start(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL); SCTP_INP_WUNLOCK(inp); SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return; } inp->sctp_ep.signature_change.type = 0; inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_ALLGONE; /* * Remove it from the list .. last thing we need a lock for. */ LIST_REMOVE(inp, sctp_list); SCTP_INP_WUNLOCK(inp); SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); /* * Now we release all locks. Since this INP cannot be found anymore * except possibly by the kill timer that might be running. We call * the drain function here. It should hit the case were it sees the * ACTIVE flag cleared and exit out freeing us to proceed and * destroy everything. */ if (from != SCTP_CALLED_FROM_INPKILL_TIMER) { (void)SCTP_OS_TIMER_STOP_DRAIN(&inp->sctp_ep.signature_change.timer); } else { /* Probably un-needed */ (void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer); } #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 5); #endif if ((inp->sctp_asocidhash) != NULL) { SCTP_HASH_FREE(inp->sctp_asocidhash, inp->hashasocidmark); inp->sctp_asocidhash = NULL; } /* sa_ignore FREED_MEMORY */ TAILQ_FOREACH_SAFE(sq, &inp->read_queue, next, nsq) { /* Its only abandoned if it had data left */ if (sq->length) SCTP_STAT_INCR(sctps_left_abandon); TAILQ_REMOVE(&inp->read_queue, sq, next); sctp_free_remote_addr(sq->whoFrom); if (so) so->so_rcv.sb_cc -= sq->length; if (sq->data) { sctp_m_freem(sq->data); sq->data = NULL; } /* * no need to free the net count, since at this point all * assoc's are gone. */ sctp_free_a_readq(NULL, sq); } /* Now the sctp_pcb things */ /* * free each asoc if it is not already closed/free. we can't use the * macro here since le_next will get freed as part of the * sctp_free_assoc() call. */ -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) ipsec_delete_pcbpolicy(ip_pcb); #endif if (ip_pcb->inp_options) { (void)sctp_m_free(ip_pcb->inp_options); ip_pcb->inp_options = 0; } #ifdef INET6 if (ip_pcb->inp_vflag & INP_IPV6) { struct in6pcb *in6p; in6p = (struct in6pcb *)inp; ip6_freepcbopts(in6p->in6p_outputopts); } #endif /* INET6 */ ip_pcb->inp_vflag = 0; /* free up authentication fields */ if (inp->sctp_ep.local_auth_chunks != NULL) sctp_free_chunklist(inp->sctp_ep.local_auth_chunks); if (inp->sctp_ep.local_hmacs != NULL) sctp_free_hmaclist(inp->sctp_ep.local_hmacs); LIST_FOREACH_SAFE(shared_key, &inp->sctp_ep.shared_keys, next, nshared_key) { LIST_REMOVE(shared_key, next); sctp_free_sharedkey(shared_key); /* sa_ignore FREED_MEMORY */ } /* * if we have an address list the following will free the list of * ifaddr's that are set into this ep. Again macro limitations here, * since the LIST_FOREACH could be a bad idea. */ LIST_FOREACH_SAFE(laddr, &inp->sctp_addr_list, sctp_nxt_addr, nladdr) { sctp_remove_laddr(laddr); } #ifdef SCTP_TRACK_FREED_ASOCS /* TEMP CODE */ LIST_FOREACH_SAFE(asoc, &inp->sctp_asoc_free_list, sctp_tcblist, nasoc) { LIST_REMOVE(asoc, sctp_tcblist); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), asoc); SCTP_DECR_ASOC_COUNT(); } /* *** END TEMP CODE *** */ #endif /* Now lets see about freeing the EP hash table. */ if (inp->sctp_tcbhash != NULL) { SCTP_HASH_FREE(inp->sctp_tcbhash, inp->sctp_hashmark); inp->sctp_tcbhash = NULL; } /* Now we must put the ep memory back into the zone pool */ crfree(inp->ip_inp.inp.inp_cred); INP_LOCK_DESTROY(&inp->ip_inp.inp); SCTP_INP_LOCK_DESTROY(inp); SCTP_INP_READ_DESTROY(inp); SCTP_ASOC_CREATE_LOCK_DESTROY(inp); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); SCTP_DECR_EP_COUNT(); } struct sctp_nets * sctp_findnet(struct sctp_tcb *stcb, struct sockaddr *addr) { struct sctp_nets *net; /* locate the address */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (sctp_cmpaddr(addr, (struct sockaddr *)&net->ro._l_addr)) return (net); } return (NULL); } int sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id) { struct sctp_ifa *sctp_ifa; sctp_ifa = sctp_find_ifa_by_addr(addr, vrf_id, SCTP_ADDR_NOT_LOCKED); if (sctp_ifa) { return (1); } else { return (0); } } /* * add's a remote endpoint address, done with the INIT/INIT-ACK as well as * when a ASCONF arrives that adds it. It will also initialize all the cwnd * stats of stuff. */ int sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, struct sctp_nets **netp, uint16_t port, int set_scope, int from) { /* * The following is redundant to the same lines in the * sctp_aloc_assoc() but is needed since others call the add address * function */ struct sctp_nets *net, *netfirst; int addr_inscope; SCTPDBG(SCTP_DEBUG_PCB1, "Adding an address (from:%d) to the peer: ", from); SCTPDBG_ADDR(SCTP_DEBUG_PCB1, newaddr); netfirst = sctp_findnet(stcb, newaddr); if (netfirst) { /* * Lie and return ok, we don't want to make the association * go away for this behavior. It will happen in the TCP * model in a connected socket. It does not reach the hash * table until after the association is built so it can't be * found. Mark as reachable, since the initial creation will * have been cleared and the NOT_IN_ASSOC flag will have * been added... and we don't want to end up removing it * back out. */ if (netfirst->dest_state & SCTP_ADDR_UNCONFIRMED) { netfirst->dest_state = (SCTP_ADDR_REACHABLE | SCTP_ADDR_UNCONFIRMED); } else { netfirst->dest_state = SCTP_ADDR_REACHABLE; } return (0); } addr_inscope = 1; switch (newaddr->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin; sin = (struct sockaddr_in *)newaddr; if (sin->sin_addr.s_addr == 0) { /* Invalid address */ return (-1); } /* zero out the bzero area */ memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); /* assure len is set */ sin->sin_len = sizeof(struct sockaddr_in); if (set_scope) { if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { stcb->asoc.scope.ipv4_local_scope = 1; } } else { /* Validate the address is in scope */ if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) && (stcb->asoc.scope.ipv4_local_scope == 0)) { addr_inscope = 0; } } break; } #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)newaddr; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* Invalid address */ return (-1); } /* assure len is set */ sin6->sin6_len = sizeof(struct sockaddr_in6); if (set_scope) { if (sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id)) { stcb->asoc.scope.loopback_scope = 1; stcb->asoc.scope.local_scope = 0; stcb->asoc.scope.ipv4_local_scope = 1; stcb->asoc.scope.site_scope = 1; } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { /* * If the new destination is a * LINK_LOCAL we must have common * site scope. Don't set the local * scope since we may not share all * links, only loopback can do this. * Links on the local network would * also be on our private network * for v4 too. */ stcb->asoc.scope.ipv4_local_scope = 1; stcb->asoc.scope.site_scope = 1; } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) { /* * If the new destination is * SITE_LOCAL then we must have site * scope in common. */ stcb->asoc.scope.site_scope = 1; } } else { /* Validate the address is in scope */ if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) && (stcb->asoc.scope.loopback_scope == 0)) { addr_inscope = 0; } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) && (stcb->asoc.scope.local_scope == 0)) { addr_inscope = 0; } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) && (stcb->asoc.scope.site_scope == 0)) { addr_inscope = 0; } } break; } #endif default: /* not supported family type */ return (-1); } net = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_net), struct sctp_nets); if (net == NULL) { return (-1); } SCTP_INCR_RADDR_COUNT(); bzero(net, sizeof(struct sctp_nets)); (void)SCTP_GETTIME_TIMEVAL(&net->start_time); memcpy(&net->ro._l_addr, newaddr, newaddr->sa_len); switch (newaddr->sa_family) { #ifdef INET case AF_INET: ((struct sockaddr_in *)&net->ro._l_addr)->sin_port = stcb->rport; break; #endif #ifdef INET6 case AF_INET6: ((struct sockaddr_in6 *)&net->ro._l_addr)->sin6_port = stcb->rport; break; #endif default: break; } net->addr_is_local = sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id); if (net->addr_is_local && ((set_scope || (from == SCTP_ADDR_IS_CONFIRMED)))) { stcb->asoc.scope.loopback_scope = 1; stcb->asoc.scope.ipv4_local_scope = 1; stcb->asoc.scope.local_scope = 0; stcb->asoc.scope.site_scope = 1; addr_inscope = 1; } net->failure_threshold = stcb->asoc.def_net_failure; net->pf_threshold = stcb->asoc.def_net_pf_threshold; if (addr_inscope == 0) { net->dest_state = (SCTP_ADDR_REACHABLE | SCTP_ADDR_OUT_OF_SCOPE); } else { if (from == SCTP_ADDR_IS_CONFIRMED) /* SCTP_ADDR_IS_CONFIRMED is passed by connect_x */ net->dest_state = SCTP_ADDR_REACHABLE; else net->dest_state = SCTP_ADDR_REACHABLE | SCTP_ADDR_UNCONFIRMED; } /* * We set this to 0, the timer code knows that this means its an * initial value */ net->rto_needed = 1; net->RTO = 0; net->RTO_measured = 0; stcb->asoc.numnets++; net->ref_count = 1; net->cwr_window_tsn = net->last_cwr_tsn = stcb->asoc.sending_seq - 1; net->port = port; net->dscp = stcb->asoc.default_dscp; #ifdef INET6 net->flowlabel = stcb->asoc.default_flowlabel; #endif if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) { net->dest_state |= SCTP_ADDR_NOHB; } else { net->dest_state &= ~SCTP_ADDR_NOHB; } if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD)) { net->dest_state |= SCTP_ADDR_NO_PMTUD; } else { net->dest_state &= ~SCTP_ADDR_NO_PMTUD; } net->heart_beat_delay = stcb->asoc.heart_beat_delay; /* Init the timer structure */ SCTP_OS_TIMER_INIT(&net->rxt_timer.timer); SCTP_OS_TIMER_INIT(&net->pmtu_timer.timer); SCTP_OS_TIMER_INIT(&net->hb_timer.timer); /* Now generate a route for this guy */ #ifdef INET6 /* KAME hack: embed scopeid */ if (newaddr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; (void)sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)); sin6->sin6_scope_id = 0; } #endif SCTP_RTALLOC((sctp_route_t *)&net->ro, stcb->asoc.vrf_id, stcb->sctp_ep->fibnum); if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro)) { /* Get source address */ net->ro._s_addr = sctp_source_address_selection(stcb->sctp_ep, stcb, (sctp_route_t *)&net->ro, net, 0, stcb->asoc.vrf_id); if (net->ro._s_addr != NULL) { net->src_addr_selected = 1; /* Now get the interface MTU */ if (net->ro._s_addr->ifn_p != NULL) { net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p); } } else { net->src_addr_selected = 0; } if (net->mtu > 0) { uint32_t rmtu; rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_rt); if (rmtu == 0) { /* * Start things off to match mtu of * interface please. */ SCTP_SET_MTU_OF_ROUTE(&net->ro._l_addr.sa, net->ro.ro_rt, net->mtu); } else { /* * we take the route mtu over the interface, * since the route may be leading out the * loopback, or a different interface. */ net->mtu = rmtu; } } } else { net->src_addr_selected = 0; } if (net->mtu == 0) { switch (newaddr->sa_family) { #ifdef INET case AF_INET: net->mtu = SCTP_DEFAULT_MTU; break; #endif #ifdef INET6 case AF_INET6: net->mtu = 1280; break; #endif default: break; } } #if defined(INET) || defined(INET6) if (net->port) { net->mtu -= (uint32_t)sizeof(struct udphdr); } #endif if (from == SCTP_ALLOC_ASOC) { stcb->asoc.smallest_mtu = net->mtu; } if (stcb->asoc.smallest_mtu > net->mtu) { sctp_pathmtu_adjustment(stcb, net->mtu); } #ifdef INET6 if (newaddr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; (void)sa6_recoverscope(sin6); } #endif /* JRS - Use the congestion control given in the CC module */ if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) (*stcb->asoc.cc_functions.sctp_set_initial_cc_param) (stcb, net); /* * CMT: CUC algo - set find_pseudo_cumack to TRUE (1) at beginning * of assoc (2005/06/27, iyengar@cis.udel.edu) */ net->find_pseudo_cumack = 1; net->find_rtx_pseudo_cumack = 1; /* Choose an initial flowid. */ net->flowid = stcb->asoc.my_vtag ^ ntohs(stcb->rport) ^ ntohs(stcb->sctp_ep->sctp_lport); net->flowtype = M_HASHTYPE_OPAQUE_HASH; if (netp) { *netp = net; } netfirst = TAILQ_FIRST(&stcb->asoc.nets); if (net->ro.ro_rt == NULL) { /* Since we have no route put it at the back */ TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next); } else if (netfirst == NULL) { /* We are the first one in the pool. */ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next); } else if (netfirst->ro.ro_rt == NULL) { /* * First one has NO route. Place this one ahead of the first * one. */ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next); } else if (net->ro.ro_rt->rt_ifp != netfirst->ro.ro_rt->rt_ifp) { /* * This one has a different interface than the one at the * top of the list. Place it ahead. */ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next); } else { /* * Ok we have the same interface as the first one. Move * forward until we find either a) one with a NULL route... * insert ahead of that b) one with a different ifp.. insert * after that. c) end of the list.. insert at the tail. */ struct sctp_nets *netlook; do { netlook = TAILQ_NEXT(netfirst, sctp_next); if (netlook == NULL) { /* End of the list */ TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next); break; } else if (netlook->ro.ro_rt == NULL) { /* next one has NO route */ TAILQ_INSERT_BEFORE(netfirst, net, sctp_next); break; } else if (netlook->ro.ro_rt->rt_ifp != net->ro.ro_rt->rt_ifp) { TAILQ_INSERT_AFTER(&stcb->asoc.nets, netlook, net, sctp_next); break; } /* Shift forward */ netfirst = netlook; } while (netlook != NULL); } /* got to have a primary set */ if (stcb->asoc.primary_destination == 0) { stcb->asoc.primary_destination = net; } else if ((stcb->asoc.primary_destination->ro.ro_rt == NULL) && (net->ro.ro_rt) && ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) { /* No route to current primary adopt new primary */ stcb->asoc.primary_destination = net; } /* Validate primary is first */ net = TAILQ_FIRST(&stcb->asoc.nets); if ((net != stcb->asoc.primary_destination) && (stcb->asoc.primary_destination)) { /* * first one on the list is NOT the primary sctp_cmpaddr() * is much more efficient if the primary is the first on the * list, make it so. */ TAILQ_REMOVE(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next); TAILQ_INSERT_HEAD(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next); } return (0); } static uint32_t sctp_aloc_a_assoc_id(struct sctp_inpcb *inp, struct sctp_tcb *stcb) { uint32_t id; struct sctpasochead *head; struct sctp_tcb *lstcb; SCTP_INP_WLOCK(inp); try_again: if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { /* TSNH */ SCTP_INP_WUNLOCK(inp); return (0); } /* * We don't allow assoc id to be one of SCTP_FUTURE_ASSOC, * SCTP_CURRENT_ASSOC and SCTP_ALL_ASSOC. */ if (inp->sctp_associd_counter <= SCTP_ALL_ASSOC) { inp->sctp_associd_counter = SCTP_ALL_ASSOC + 1; } id = inp->sctp_associd_counter; inp->sctp_associd_counter++; lstcb = sctp_findasoc_ep_asocid_locked(inp, (sctp_assoc_t)id, 0); if (lstcb) { goto try_again; } head = &inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(id, inp->hashasocidmark)]; LIST_INSERT_HEAD(head, stcb, sctp_tcbasocidhash); stcb->asoc.in_asocid_hash = 1; SCTP_INP_WUNLOCK(inp); return id; } /* * allocate an association and add it to the endpoint. The caller must be * careful to add all additional addresses once they are know right away or * else the assoc will be may experience a blackout scenario. */ struct sctp_tcb * sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr, int *error, uint32_t override_tag, uint32_t vrf_id, uint16_t o_streams, uint16_t port, struct thread *p ) { /* note the p argument is only valid in unbound sockets */ struct sctp_tcb *stcb; struct sctp_association *asoc; struct sctpasochead *head; uint16_t rport; int err; /* * Assumption made here: Caller has done a * sctp_findassociation_ep_addr(ep, addr's); to make sure the * address does not exist already. */ if (SCTP_BASE_INFO(ipi_count_asoc) >= SCTP_MAX_NUM_OF_ASOC) { /* Hit max assoc, sorry no more */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS); *error = ENOBUFS; return (NULL); } if (firstaddr == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } SCTP_INP_RLOCK(inp); if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) && ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE)) || (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED))) { /* * If its in the TCP pool, its NOT allowed to create an * association. The parent listener needs to call * sctp_aloc_assoc.. or the one-2-many socket. If a peeled * off, or connected one does this.. its an error. */ SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) || (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE)) { if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) || (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED)) { SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } } SCTPDBG(SCTP_DEBUG_PCB3, "Allocate an association for peer:"); #ifdef SCTP_DEBUG if (firstaddr) { SCTPDBG_ADDR(SCTP_DEBUG_PCB3, firstaddr); switch (firstaddr->sa_family) { #ifdef INET case AF_INET: SCTPDBG(SCTP_DEBUG_PCB3, "Port:%d\n", ntohs(((struct sockaddr_in *)firstaddr)->sin_port)); break; #endif #ifdef INET6 case AF_INET6: SCTPDBG(SCTP_DEBUG_PCB3, "Port:%d\n", ntohs(((struct sockaddr_in6 *)firstaddr)->sin6_port)); break; #endif default: break; } } else { SCTPDBG(SCTP_DEBUG_PCB3, "None\n"); } #endif /* SCTP_DEBUG */ switch (firstaddr->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin; sin = (struct sockaddr_in *)firstaddr; if ((ntohs(sin->sin_port) == 0) || (sin->sin_addr.s_addr == INADDR_ANY) || (sin->sin_addr.s_addr == INADDR_BROADCAST) || IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { /* Invalid address */ SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } rport = sin->sin_port; break; } #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)firstaddr; if ((ntohs(sin6->sin6_port) == 0) || IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { /* Invalid address */ SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } rport = sin6->sin6_port; break; } #endif default: /* not supported family type */ SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } SCTP_INP_RUNLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) { /* * If you have not performed a bind, then we need to do the * ephemeral bind for you. */ if ((err = sctp_inpcb_bind(inp->sctp_socket, (struct sockaddr *)NULL, (struct sctp_ifa *)NULL, p ))) { /* bind error, probably perm */ *error = err; return (NULL); } } stcb = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_asoc), struct sctp_tcb); if (stcb == NULL) { /* out of memory? */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM); *error = ENOMEM; return (NULL); } SCTP_INCR_ASOC_COUNT(); bzero(stcb, sizeof(*stcb)); asoc = &stcb->asoc; asoc->assoc_id = sctp_aloc_a_assoc_id(inp, stcb); SCTP_TCB_LOCK_INIT(stcb); SCTP_TCB_SEND_LOCK_INIT(stcb); stcb->rport = rport; /* setup back pointer's */ stcb->sctp_ep = inp; stcb->sctp_socket = inp->sctp_socket; if ((err = sctp_init_asoc(inp, stcb, override_tag, vrf_id, o_streams))) { /* failed */ SCTP_TCB_LOCK_DESTROY(stcb); SCTP_TCB_SEND_LOCK_DESTROY(stcb); LIST_REMOVE(stcb, sctp_tcbasocidhash); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb); SCTP_DECR_ASOC_COUNT(); *error = err; return (NULL); } /* and the port */ SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(inp); if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_SOCKET_ALLGONE)) { /* inpcb freed while alloc going on */ SCTP_TCB_LOCK_DESTROY(stcb); SCTP_TCB_SEND_LOCK_DESTROY(stcb); LIST_REMOVE(stcb, sctp_tcbasocidhash); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb); SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_DECR_ASOC_COUNT(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } SCTP_TCB_LOCK(stcb); /* now that my_vtag is set, add it to the hash */ head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))]; /* put it in the bucket in the vtag hash of assoc's for the system */ LIST_INSERT_HEAD(head, stcb, sctp_asocs); SCTP_INP_INFO_WUNLOCK(); if ((err = sctp_add_remote_addr(stcb, firstaddr, NULL, port, SCTP_DO_SETSCOPE, SCTP_ALLOC_ASOC))) { /* failure.. memory error? */ if (asoc->strmout) { SCTP_FREE(asoc->strmout, SCTP_M_STRMO); asoc->strmout = NULL; } if (asoc->mapping_array) { SCTP_FREE(asoc->mapping_array, SCTP_M_MAP); asoc->mapping_array = NULL; } if (asoc->nr_mapping_array) { SCTP_FREE(asoc->nr_mapping_array, SCTP_M_MAP); asoc->nr_mapping_array = NULL; } SCTP_DECR_ASOC_COUNT(); SCTP_TCB_UNLOCK(stcb); SCTP_TCB_LOCK_DESTROY(stcb); SCTP_TCB_SEND_LOCK_DESTROY(stcb); LIST_REMOVE(stcb, sctp_tcbasocidhash); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb); SCTP_INP_WUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS); *error = ENOBUFS; return (NULL); } /* Init all the timers */ SCTP_OS_TIMER_INIT(&asoc->dack_timer.timer); SCTP_OS_TIMER_INIT(&asoc->strreset_timer.timer); SCTP_OS_TIMER_INIT(&asoc->asconf_timer.timer); SCTP_OS_TIMER_INIT(&asoc->shut_guard_timer.timer); SCTP_OS_TIMER_INIT(&asoc->autoclose_timer.timer); SCTP_OS_TIMER_INIT(&asoc->delayed_event_timer.timer); SCTP_OS_TIMER_INIT(&asoc->delete_prim_timer.timer); LIST_INSERT_HEAD(&inp->sctp_asoc_list, stcb, sctp_tcblist); /* now file the port under the hash as well */ if (inp->sctp_tcbhash != NULL) { head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(stcb->rport, inp->sctp_hashmark)]; LIST_INSERT_HEAD(head, stcb, sctp_tcbhash); } SCTP_INP_WUNLOCK(inp); SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", (void *)stcb); return (stcb); } void sctp_remove_net(struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_association *asoc; asoc = &stcb->asoc; asoc->numnets--; TAILQ_REMOVE(&asoc->nets, net, sctp_next); if (net == asoc->primary_destination) { /* Reset primary */ struct sctp_nets *lnet; lnet = TAILQ_FIRST(&asoc->nets); /* * Mobility adaptation Ideally, if deleted destination is * the primary, it becomes a fast retransmission trigger by * the subsequent SET PRIMARY. (by micchie) */ if (sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE) || sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_FASTHANDOFF)) { SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: primary dst is deleting\n"); if (asoc->deleted_primary != NULL) { SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: deleted primary may be already stored\n"); goto out; } asoc->deleted_primary = net; atomic_add_int(&net->ref_count, 1); memset(&net->lastsa, 0, sizeof(net->lastsa)); memset(&net->lastsv, 0, sizeof(net->lastsv)); sctp_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_PRIM_DELETED); sctp_timer_start(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL); } out: /* Try to find a confirmed primary */ asoc->primary_destination = sctp_find_alternate_net(stcb, lnet, 0); } if (net == asoc->last_data_chunk_from) { /* Reset primary */ asoc->last_data_chunk_from = TAILQ_FIRST(&asoc->nets); } if (net == asoc->last_control_chunk_from) { /* Clear net */ asoc->last_control_chunk_from = NULL; } if (net == stcb->asoc.alternate) { sctp_free_remote_addr(stcb->asoc.alternate); stcb->asoc.alternate = NULL; } sctp_free_remote_addr(net); } /* * remove a remote endpoint address from an association, it will fail if the * address does not exist. */ int sctp_del_remote_addr(struct sctp_tcb *stcb, struct sockaddr *remaddr) { /* * Here we need to remove a remote address. This is quite simple, we * first find it in the list of address for the association * (tasoc->asoc.nets) and then if it is there, we do a LIST_REMOVE * on that item. Note we do not allow it to be removed if there are * no other addresses. */ struct sctp_association *asoc; struct sctp_nets *net, *nnet; asoc = &stcb->asoc; /* locate the address */ TAILQ_FOREACH_SAFE(net, &asoc->nets, sctp_next, nnet) { if (net->ro._l_addr.sa.sa_family != remaddr->sa_family) { continue; } if (sctp_cmpaddr((struct sockaddr *)&net->ro._l_addr, remaddr)) { /* we found the guy */ if (asoc->numnets < 2) { /* Must have at LEAST two remote addresses */ return (-1); } else { sctp_remove_net(stcb, net); return (0); } } } /* not found. */ return (-2); } void sctp_delete_from_timewait(uint32_t tag, uint16_t lport, uint16_t rport) { struct sctpvtaghead *chain; struct sctp_tagblock *twait_block; int found = 0; int i; chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)]; LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) { for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) { if ((twait_block->vtag_block[i].v_tag == tag) && (twait_block->vtag_block[i].lport == lport) && (twait_block->vtag_block[i].rport == rport)) { twait_block->vtag_block[i].tv_sec_at_expire = 0; twait_block->vtag_block[i].v_tag = 0; twait_block->vtag_block[i].lport = 0; twait_block->vtag_block[i].rport = 0; found = 1; break; } } if (found) break; } } int sctp_is_in_timewait(uint32_t tag, uint16_t lport, uint16_t rport) { struct sctpvtaghead *chain; struct sctp_tagblock *twait_block; int found = 0; int i; SCTP_INP_INFO_WLOCK(); chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)]; LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) { for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) { if ((twait_block->vtag_block[i].v_tag == tag) && (twait_block->vtag_block[i].lport == lport) && (twait_block->vtag_block[i].rport == rport)) { found = 1; break; } } if (found) break; } SCTP_INP_INFO_WUNLOCK(); return (found); } void sctp_add_vtag_to_timewait(uint32_t tag, uint32_t time, uint16_t lport, uint16_t rport) { struct sctpvtaghead *chain; struct sctp_tagblock *twait_block; struct timeval now; int set, i; if (time == 0) { /* Its disabled */ return; } (void)SCTP_GETTIME_TIMEVAL(&now); chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)]; set = 0; LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) { /* Block(s) present, lets find space, and expire on the fly */ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) { if ((twait_block->vtag_block[i].v_tag == 0) && !set) { twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time; twait_block->vtag_block[i].v_tag = tag; twait_block->vtag_block[i].lport = lport; twait_block->vtag_block[i].rport = rport; set = 1; } else if ((twait_block->vtag_block[i].v_tag) && ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) { /* Audit expires this guy */ twait_block->vtag_block[i].tv_sec_at_expire = 0; twait_block->vtag_block[i].v_tag = 0; twait_block->vtag_block[i].lport = 0; twait_block->vtag_block[i].rport = 0; if (set == 0) { /* Reuse it for my new tag */ twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time; twait_block->vtag_block[i].v_tag = tag; twait_block->vtag_block[i].lport = lport; twait_block->vtag_block[i].rport = rport; set = 1; } } } if (set) { /* * We only do up to the block where we can place our * tag for audits */ break; } } /* Need to add a new block to chain */ if (!set) { SCTP_MALLOC(twait_block, struct sctp_tagblock *, sizeof(struct sctp_tagblock), SCTP_M_TIMW); if (twait_block == NULL) { #ifdef INVARIANTS panic("Can not alloc tagblock"); #endif return; } memset(twait_block, 0, sizeof(struct sctp_tagblock)); LIST_INSERT_HEAD(chain, twait_block, sctp_nxt_tagblock); twait_block->vtag_block[0].tv_sec_at_expire = now.tv_sec + time; twait_block->vtag_block[0].v_tag = tag; twait_block->vtag_block[0].lport = lport; twait_block->vtag_block[0].rport = rport; } } void sctp_clean_up_stream(struct sctp_tcb *stcb, struct sctp_readhead *rh) { struct sctp_tmit_chunk *chk, *nchk; struct sctp_queued_to_read *ctl, *nctl; TAILQ_FOREACH_SAFE(ctl, rh, next_instrm, nctl) { TAILQ_REMOVE(rh, ctl, next_instrm); ctl->on_strm_q = 0; if (ctl->on_read_q == 0) { sctp_free_remote_addr(ctl->whoFrom); if (ctl->data) { sctp_m_freem(ctl->data); ctl->data = NULL; } } /* Reassembly free? */ TAILQ_FOREACH_SAFE(chk, &ctl->reasm, sctp_next, nchk) { TAILQ_REMOVE(&ctl->reasm, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); sctp_free_remote_addr(chk->whoTo); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); /* sa_ignore FREED_MEMORY */ } /* * We don't free the address here since all the net's were * freed above. */ if (ctl->on_read_q == 0) { sctp_free_a_readq(stcb, ctl); } } } /*- * Free the association after un-hashing the remote port. This * function ALWAYS returns holding NO LOCK on the stcb. It DOES * expect that the input to this function IS a locked TCB. * It will return 0, if it did NOT destroy the association (instead * it unlocks it. It will return NON-zero if it either destroyed the * association OR the association is already destroyed. */ int sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfree, int from_location) { int i; struct sctp_association *asoc; struct sctp_nets *net, *nnet; struct sctp_laddr *laddr, *naddr; struct sctp_tmit_chunk *chk, *nchk; struct sctp_asconf_addr *aparam, *naparam; struct sctp_asconf_ack *aack, *naack; struct sctp_stream_reset_list *strrst, *nstrrst; struct sctp_queued_to_read *sq, *nsq; struct sctp_stream_queue_pending *sp, *nsp; sctp_sharedkey_t *shared_key, *nshared_key; struct socket *so; /* first, lets purge the entry from the hash table. */ #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, stcb, 6); #endif if (stcb->asoc.state == 0) { #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 7); #endif /* there is no asoc, really TSNH :-0 */ return (1); } if (stcb->asoc.alternate) { sctp_free_remote_addr(stcb->asoc.alternate); stcb->asoc.alternate = NULL; } /* TEMP CODE */ if (stcb->freed_from_where == 0) { /* Only record the first place free happened from */ stcb->freed_from_where = from_location; } /* TEMP CODE */ asoc = &stcb->asoc; if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) /* nothing around */ so = NULL; else so = inp->sctp_socket; /* * We used timer based freeing if a reader or writer is in the way. * So we first check if we are actually being called from a timer, * if so we abort early if a reader or writer is still in the way. */ if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) && (from_inpcbfree == SCTP_NORMAL_PROC)) { /* * is it the timer driving us? if so are the reader/writers * gone? */ if (stcb->asoc.refcnt) { /* nope, reader or writer in the way */ sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL); /* no asoc destroyed */ SCTP_TCB_UNLOCK(stcb); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, stcb, 8); #endif return (0); } } /* now clean up any other timers */ (void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer); asoc->dack_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer); /*- * For stream reset we don't blast this unless * it is a str-reset timer, it might be the * free-asoc timer which we DON'T want to * disturb. */ if (asoc->strreset_timer.type == SCTP_TIMER_TYPE_STRRESET) asoc->strreset_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer); asoc->asconf_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer); asoc->autoclose_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer); asoc->shut_guard_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer); asoc->delayed_event_timer.self = NULL; /* Mobility adaptation */ (void)SCTP_OS_TIMER_STOP(&asoc->delete_prim_timer.timer); asoc->delete_prim_timer.self = NULL; TAILQ_FOREACH(net, &asoc->nets, sctp_next) { (void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer); net->rxt_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer); net->pmtu_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&net->hb_timer.timer); net->hb_timer.self = NULL; } /* Now the read queue needs to be cleaned up (only once) */ if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0) { stcb->asoc.state |= SCTP_STATE_ABOUT_TO_BE_FREED; SCTP_INP_READ_LOCK(inp); TAILQ_FOREACH(sq, &inp->read_queue, next) { if (sq->stcb == stcb) { sq->do_not_ref_stcb = 1; sq->sinfo_cumtsn = stcb->asoc.cumulative_tsn; /* * If there is no end, there never will be * now. */ if (sq->end_added == 0) { /* Held for PD-API clear that. */ sq->pdapi_aborted = 1; sq->held_length = 0; if (sctp_stcb_is_feature_on(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) { /* * Need to add a PD-API * aborted indication. * Setting the control_pdapi * assures that it will be * added right after this * msg. */ uint32_t strseq; stcb->asoc.control_pdapi = sq; strseq = (sq->sinfo_stream << 16) | (sq->mid & 0x0000ffff); sctp_ulp_notify(SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION, stcb, SCTP_PARTIAL_DELIVERY_ABORTED, (void *)&strseq, SCTP_SO_LOCKED); stcb->asoc.control_pdapi = NULL; } } /* Add an end to wake them */ sq->end_added = 1; } } SCTP_INP_READ_UNLOCK(inp); if (stcb->block_entry) { SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PCB, ECONNRESET); stcb->block_entry->error = ECONNRESET; stcb->block_entry = NULL; } } if ((stcb->asoc.refcnt) || (stcb->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE)) { /* * Someone holds a reference OR the socket is unaccepted * yet. */ if ((stcb->asoc.refcnt) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) { stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE; sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL); } SCTP_TCB_UNLOCK(stcb); if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) /* nothing around */ so = NULL; if (so) { /* Wake any reader/writers */ sctp_sorwakeup(inp, so); sctp_sowwakeup(inp, so); } #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, stcb, 9); #endif /* no asoc destroyed */ return (0); } #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, stcb, 10); #endif /* * When I reach here, no others want to kill the assoc yet.. and I * own the lock. Now its possible an abort comes in when I do the * lock exchange below to grab all the locks to do the final take * out. to prevent this we increment the count, which will start a * timer and blow out above thus assuring us that we hold exclusive * killing of the asoc. Note that after getting back the TCB lock we * will go ahead and increment the counter back up and stop any * timer a passing stranger may have started :-S */ if (from_inpcbfree == SCTP_NORMAL_PROC) { atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(inp); SCTP_TCB_LOCK(stcb); } /* Double check the GONE flag */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) /* nothing around */ so = NULL; if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { /* * For TCP type we need special handling when we are * connected. We also include the peel'ed off ones to. */ if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { inp->sctp_flags &= ~SCTP_PCB_FLAGS_CONNECTED; inp->sctp_flags |= SCTP_PCB_FLAGS_WAS_CONNECTED; if (so) { SOCK_LOCK(so); if (so->so_rcv.sb_cc == 0) { so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING | SS_ISCONFIRMING | SS_ISCONNECTED); } socantrcvmore_locked(so); socantsendmore(so); sctp_sowwakeup(inp, so); sctp_sorwakeup(inp, so); SCTP_SOWAKEUP(so); } } } /* * Make it invalid too, that way if its about to run it will abort * and return. */ /* re-increment the lock */ if (from_inpcbfree == SCTP_NORMAL_PROC) { atomic_add_int(&stcb->asoc.refcnt, -1); } if (stcb->asoc.refcnt) { stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE; sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL); if (from_inpcbfree == SCTP_NORMAL_PROC) { SCTP_INP_INFO_WUNLOCK(); SCTP_INP_WUNLOCK(inp); } SCTP_TCB_UNLOCK(stcb); return (0); } asoc->state = 0; if (inp->sctp_tcbhash) { LIST_REMOVE(stcb, sctp_tcbhash); } if (stcb->asoc.in_asocid_hash) { LIST_REMOVE(stcb, sctp_tcbasocidhash); } /* Now lets remove it from the list of ALL associations in the EP */ LIST_REMOVE(stcb, sctp_tcblist); if (from_inpcbfree == SCTP_NORMAL_PROC) { SCTP_INP_INCR_REF(inp); SCTP_INP_WUNLOCK(inp); } /* pull from vtag hash */ LIST_REMOVE(stcb, sctp_asocs); sctp_add_vtag_to_timewait(asoc->my_vtag, SCTP_BASE_SYSCTL(sctp_vtag_time_wait), inp->sctp_lport, stcb->rport); /* * Now restop the timers to be sure this is paranoia at is finest! */ (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer); TAILQ_FOREACH(net, &asoc->nets, sctp_next) { (void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer); (void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer); (void)SCTP_OS_TIMER_STOP(&net->hb_timer.timer); } asoc->strreset_timer.type = SCTP_TIMER_TYPE_NONE; /* * The chunk lists and such SHOULD be empty but we check them just * in case. */ /* anything on the wheel needs to be removed */ for (i = 0; i < asoc->streamoutcnt; i++) { struct sctp_stream_out *outs; outs = &asoc->strmout[i]; /* now clean up any chunks here */ TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) { atomic_subtract_int(&asoc->stream_queue_cnt, 1); TAILQ_REMOVE(&outs->outqueue, sp, next); stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, 0); sctp_free_spbufspace(stcb, asoc, sp); if (sp->data) { if (so) { /* Still an open socket - report */ sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb, 0, (void *)sp, SCTP_SO_LOCKED); } if (sp->data) { sctp_m_freem(sp->data); sp->data = NULL; sp->tail_mbuf = NULL; sp->length = 0; } } if (sp->net) { sctp_free_remote_addr(sp->net); sp->net = NULL; } sctp_free_a_strmoq(stcb, sp, SCTP_SO_LOCKED); } } /* sa_ignore FREED_MEMORY */ TAILQ_FOREACH_SAFE(strrst, &asoc->resetHead, next_resp, nstrrst) { TAILQ_REMOVE(&asoc->resetHead, strrst, next_resp); SCTP_FREE(strrst, SCTP_M_STRESET); } TAILQ_FOREACH_SAFE(sq, &asoc->pending_reply_queue, next, nsq) { TAILQ_REMOVE(&asoc->pending_reply_queue, sq, next); if (sq->data) { sctp_m_freem(sq->data); sq->data = NULL; } sctp_free_remote_addr(sq->whoFrom); sq->whoFrom = NULL; sq->stcb = NULL; /* Free the ctl entry */ sctp_free_a_readq(stcb, sq); /* sa_ignore FREED_MEMORY */ } TAILQ_FOREACH_SAFE(chk, &asoc->free_chunks, sctp_next, nchk) { TAILQ_REMOVE(&asoc->free_chunks, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); atomic_subtract_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); asoc->free_chunk_cnt--; /* sa_ignore FREED_MEMORY */ } /* pending send queue SHOULD be empty */ TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) { if (asoc->strmout[chk->rec.data.sid].chunks_on_queues > 0) { asoc->strmout[chk->rec.data.sid].chunks_on_queues--; #ifdef INVARIANTS } else { panic("No chunks on the queues for sid %u.", chk->rec.data.sid); #endif } TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next); if (chk->data) { if (so) { /* Still a socket? */ sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb, 0, chk, SCTP_SO_LOCKED); } if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); if (chk->whoTo) { sctp_free_remote_addr(chk->whoTo); chk->whoTo = NULL; } SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); /* sa_ignore FREED_MEMORY */ } /* sent queue SHOULD be empty */ TAILQ_FOREACH_SAFE(chk, &asoc->sent_queue, sctp_next, nchk) { if (chk->sent != SCTP_DATAGRAM_NR_ACKED) { if (asoc->strmout[chk->rec.data.sid].chunks_on_queues > 0) { asoc->strmout[chk->rec.data.sid].chunks_on_queues--; #ifdef INVARIANTS } else { panic("No chunks on the queues for sid %u.", chk->rec.data.sid); #endif } } TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next); if (chk->data) { if (so) { /* Still a socket? */ sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb, 0, chk, SCTP_SO_LOCKED); } if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); sctp_free_remote_addr(chk->whoTo); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); /* sa_ignore FREED_MEMORY */ } #ifdef INVARIANTS for (i = 0; i < stcb->asoc.streamoutcnt; i++) { if (stcb->asoc.strmout[i].chunks_on_queues > 0) { panic("%u chunks left for stream %u.", stcb->asoc.strmout[i].chunks_on_queues, i); } } #endif /* control queue MAY not be empty */ TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) { TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); sctp_free_remote_addr(chk->whoTo); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); /* sa_ignore FREED_MEMORY */ } /* ASCONF queue MAY not be empty */ TAILQ_FOREACH_SAFE(chk, &asoc->asconf_send_queue, sctp_next, nchk) { TAILQ_REMOVE(&asoc->asconf_send_queue, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED); sctp_free_remote_addr(chk->whoTo); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); /* sa_ignore FREED_MEMORY */ } if (asoc->mapping_array) { SCTP_FREE(asoc->mapping_array, SCTP_M_MAP); asoc->mapping_array = NULL; } if (asoc->nr_mapping_array) { SCTP_FREE(asoc->nr_mapping_array, SCTP_M_MAP); asoc->nr_mapping_array = NULL; } /* the stream outs */ if (asoc->strmout) { SCTP_FREE(asoc->strmout, SCTP_M_STRMO); asoc->strmout = NULL; } asoc->strm_realoutsize = asoc->streamoutcnt = 0; if (asoc->strmin) { for (i = 0; i < asoc->streamincnt; i++) { sctp_clean_up_stream(stcb, &asoc->strmin[i].inqueue); sctp_clean_up_stream(stcb, &asoc->strmin[i].uno_inqueue); } SCTP_FREE(asoc->strmin, SCTP_M_STRMI); asoc->strmin = NULL; } asoc->streamincnt = 0; TAILQ_FOREACH_SAFE(net, &asoc->nets, sctp_next, nnet) { #ifdef INVARIANTS if (SCTP_BASE_INFO(ipi_count_raddr) == 0) { panic("no net's left alloc'ed, or list points to itself"); } #endif TAILQ_REMOVE(&asoc->nets, net, sctp_next); sctp_free_remote_addr(net); } LIST_FOREACH_SAFE(laddr, &asoc->sctp_restricted_addrs, sctp_nxt_addr, naddr) { /* sa_ignore FREED_MEMORY */ sctp_remove_laddr(laddr); } /* pending asconf (address) parameters */ TAILQ_FOREACH_SAFE(aparam, &asoc->asconf_queue, next, naparam) { /* sa_ignore FREED_MEMORY */ TAILQ_REMOVE(&asoc->asconf_queue, aparam, next); SCTP_FREE(aparam, SCTP_M_ASC_ADDR); } TAILQ_FOREACH_SAFE(aack, &asoc->asconf_ack_sent, next, naack) { /* sa_ignore FREED_MEMORY */ TAILQ_REMOVE(&asoc->asconf_ack_sent, aack, next); if (aack->data != NULL) { sctp_m_freem(aack->data); } SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asconf_ack), aack); } /* clean up auth stuff */ if (asoc->local_hmacs) sctp_free_hmaclist(asoc->local_hmacs); if (asoc->peer_hmacs) sctp_free_hmaclist(asoc->peer_hmacs); if (asoc->local_auth_chunks) sctp_free_chunklist(asoc->local_auth_chunks); if (asoc->peer_auth_chunks) sctp_free_chunklist(asoc->peer_auth_chunks); sctp_free_authinfo(&asoc->authinfo); LIST_FOREACH_SAFE(shared_key, &asoc->shared_keys, next, nshared_key) { LIST_REMOVE(shared_key, next); sctp_free_sharedkey(shared_key); /* sa_ignore FREED_MEMORY */ } /* Insert new items here :> */ /* Get rid of LOCK */ SCTP_TCB_UNLOCK(stcb); SCTP_TCB_LOCK_DESTROY(stcb); SCTP_TCB_SEND_LOCK_DESTROY(stcb); if (from_inpcbfree == SCTP_NORMAL_PROC) { SCTP_INP_INFO_WUNLOCK(); SCTP_INP_RLOCK(inp); } #ifdef SCTP_TRACK_FREED_ASOCS if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* now clean up the tasoc itself */ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb); SCTP_DECR_ASOC_COUNT(); } else { LIST_INSERT_HEAD(&inp->sctp_asoc_free_list, stcb, sctp_tcblist); } #else SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb); SCTP_DECR_ASOC_COUNT(); #endif if (from_inpcbfree == SCTP_NORMAL_PROC) { if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* * If its NOT the inp_free calling us AND sctp_close * as been called, we call back... */ SCTP_INP_RUNLOCK(inp); /* * This will start the kill timer (if we are the * last one) since we hold an increment yet. But * this is the only safe way to do this since * otherwise if the socket closes at the same time * we are here we might collide in the cleanup. */ sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE, SCTP_CALLED_DIRECTLY_NOCMPSET); SCTP_INP_DECR_REF(inp); goto out_of; } else { /* The socket is still open. */ SCTP_INP_DECR_REF(inp); } } if (from_inpcbfree == SCTP_NORMAL_PROC) { SCTP_INP_RUNLOCK(inp); } out_of: /* destroyed the asoc */ #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 11); #endif return (1); } /* * determine if a destination is "reachable" based upon the addresses bound * to the current endpoint (e.g. only v4 or v6 currently bound) */ /* * FIX: if we allow assoc-level bindx(), then this needs to be fixed to use * assoc level v4/v6 flags, as the assoc *may* not have the same address * types bound as its endpoint */ int sctp_destination_is_reachable(struct sctp_tcb *stcb, struct sockaddr *destaddr) { struct sctp_inpcb *inp; int answer; /* * No locks here, the TCB, in all cases is already locked and an * assoc is up. There is either a INP lock by the caller applied (in * asconf case when deleting an address) or NOT in the HB case, * however if HB then the INP increment is up and the INP will not * be removed (on top of the fact that we have a TCB lock). So we * only want to read the sctp_flags, which is either bound-all or * not.. no protection needed since once an assoc is up you can't be * changing your binding. */ inp = stcb->sctp_ep; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* if bound all, destination is not restricted */ /* * RRS: Question during lock work: Is this correct? If you * are bound-all you still might need to obey the V4--V6 * flags??? IMO this bound-all stuff needs to be removed! */ return (1); } /* NOTE: all "scope" checks are done when local addresses are added */ switch (destaddr->sa_family) { #ifdef INET6 case AF_INET6: answer = inp->ip_inp.inp.inp_vflag & INP_IPV6; break; #endif #ifdef INET case AF_INET: answer = inp->ip_inp.inp.inp_vflag & INP_IPV4; break; #endif default: /* invalid family, so it's unreachable */ answer = 0; break; } return (answer); } /* * update the inp_vflags on an endpoint */ static void sctp_update_ep_vflag(struct sctp_inpcb *inp) { struct sctp_laddr *laddr; /* first clear the flag */ inp->ip_inp.inp.inp_vflag = 0; /* set the flag based on addresses on the ep list */ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == NULL) { SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __func__); continue; } if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { continue; } switch (laddr->ifa->address.sa.sa_family) { #ifdef INET6 case AF_INET6: inp->ip_inp.inp.inp_vflag |= INP_IPV6; break; #endif #ifdef INET case AF_INET: inp->ip_inp.inp.inp_vflag |= INP_IPV4; break; #endif default: break; } } } /* * Add the address to the endpoint local address list There is nothing to be * done if we are bound to all addresses */ void sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa, uint32_t action) { struct sctp_laddr *laddr; struct sctp_tcb *stcb; int fnd, error = 0; fnd = 0; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* You are already bound to all. You have it already */ return; } #ifdef INET6 if (ifa->address.sa.sa_family == AF_INET6) { if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { /* Can't bind a non-useable addr. */ return; } } #endif /* first, is it already present? */ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == ifa) { fnd = 1; break; } } if (fnd == 0) { /* Not in the ep list */ error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, action); if (error != 0) return; inp->laddr_count++; /* update inp_vflag flags */ switch (ifa->address.sa.sa_family) { #ifdef INET6 case AF_INET6: inp->ip_inp.inp.inp_vflag |= INP_IPV6; break; #endif #ifdef INET case AF_INET: inp->ip_inp.inp.inp_vflag |= INP_IPV4; break; #endif default: break; } LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { sctp_add_local_addr_restricted(stcb, ifa); } } return; } /* * select a new (hopefully reachable) destination net (should only be used * when we deleted an ep addr that is the only usable source address to reach * the destination net) */ static void sctp_select_primary_destination(struct sctp_tcb *stcb) { struct sctp_nets *net; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { /* for now, we'll just pick the first reachable one we find */ if (net->dest_state & SCTP_ADDR_UNCONFIRMED) continue; if (sctp_destination_is_reachable(stcb, (struct sockaddr *)&net->ro._l_addr)) { /* found a reachable destination */ stcb->asoc.primary_destination = net; } } /* I can't there from here! ...we're gonna die shortly... */ } /* * Delete the address from the endpoint local address list. There is nothing * to be done if we are bound to all addresses */ void sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa) { struct sctp_laddr *laddr; int fnd; fnd = 0; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* You are already bound to all. You have it already */ return; } LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == ifa) { fnd = 1; break; } } if (fnd && (inp->laddr_count < 2)) { /* can't delete unless there are at LEAST 2 addresses */ return; } if (fnd) { /* * clean up any use of this address go through our * associations and clear any last_used_address that match * this one for each assoc, see if a new primary_destination * is needed */ struct sctp_tcb *stcb; /* clean up "next_addr_touse" */ if (inp->next_addr_touse == laddr) /* delete this address */ inp->next_addr_touse = NULL; /* clean up "last_used_address" */ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { struct sctp_nets *net; SCTP_TCB_LOCK(stcb); if (stcb->asoc.last_used_address == laddr) /* delete this address */ stcb->asoc.last_used_address = NULL; /* * Now spin through all the nets and purge any ref * to laddr */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (net->ro._s_addr == laddr->ifa) { /* Yep, purge src address selected */ sctp_rtentry_t *rt; /* delete this address if cached */ rt = net->ro.ro_rt; if (rt != NULL) { RTFREE(rt); net->ro.ro_rt = NULL; } sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; net->src_addr_selected = 0; } } SCTP_TCB_UNLOCK(stcb); } /* for each tcb */ /* remove it from the ep list */ sctp_remove_laddr(laddr); inp->laddr_count--; /* update inp_vflag flags */ sctp_update_ep_vflag(inp); } return; } /* * Add the address to the TCB local address restricted list. * This is a "pending" address list (eg. addresses waiting for an * ASCONF-ACK response) and cannot be used as a valid source address. */ void sctp_add_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa) { struct sctp_laddr *laddr; struct sctpladdr *list; /* * Assumes TCB is locked.. and possibly the INP. May need to * confirm/fix that if we need it and is not the case. */ list = &stcb->asoc.sctp_restricted_addrs; #ifdef INET6 if (ifa->address.sa.sa_family == AF_INET6) { if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { /* Can't bind a non-existent addr. */ return; } } #endif /* does the address already exist? */ LIST_FOREACH(laddr, list, sctp_nxt_addr) { if (laddr->ifa == ifa) { return; } } /* add to the list */ (void)sctp_insert_laddr(list, ifa, 0); return; } /* * Remove a local address from the TCB local address restricted list */ void sctp_del_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa) { struct sctp_inpcb *inp; struct sctp_laddr *laddr; /* * This is called by asconf work. It is assumed that a) The TCB is * locked and b) The INP is locked. This is true in as much as I can * trace through the entry asconf code where I did these locks. * Again, the ASCONF code is a bit different in that it does lock * the INP during its work often times. This must be since we don't * want other proc's looking up things while what they are looking * up is changing :-D */ inp = stcb->sctp_ep; /* if subset bound and don't allow ASCONF's, can't delete last */ if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) && sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) { if (stcb->sctp_ep->laddr_count < 2) { /* can't delete last address */ return; } } LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) { /* remove the address if it exists */ if (laddr->ifa == NULL) continue; if (laddr->ifa == ifa) { sctp_remove_laddr(laddr); return; } } /* address not found! */ return; } /* * Temporarily remove for __APPLE__ until we use the Tiger equivalents */ /* sysctl */ static int sctp_max_number_of_assoc = SCTP_MAX_NUM_OF_ASOC; static int sctp_scale_up_for_address = SCTP_SCALE_FOR_ADDR; #if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP) struct sctp_mcore_ctrl *sctp_mcore_workers = NULL; int *sctp_cpuarry = NULL; void sctp_queue_to_mcore(struct mbuf *m, int off, int cpu_to_use) { /* Queue a packet to a processor for the specified core */ struct sctp_mcore_queue *qent; struct sctp_mcore_ctrl *wkq; int need_wake = 0; if (sctp_mcore_workers == NULL) { /* Something went way bad during setup */ sctp_input_with_port(m, off, 0); return; } SCTP_MALLOC(qent, struct sctp_mcore_queue *, (sizeof(struct sctp_mcore_queue)), SCTP_M_MCORE); if (qent == NULL) { /* This is trouble */ sctp_input_with_port(m, off, 0); return; } qent->vn = curvnet; qent->m = m; qent->off = off; qent->v6 = 0; wkq = &sctp_mcore_workers[cpu_to_use]; SCTP_MCORE_QLOCK(wkq); TAILQ_INSERT_TAIL(&wkq->que, qent, next); if (wkq->running == 0) { need_wake = 1; } SCTP_MCORE_QUNLOCK(wkq); if (need_wake) { wakeup(&wkq->running); } } static void sctp_mcore_thread(void *arg) { struct sctp_mcore_ctrl *wkq; struct sctp_mcore_queue *qent; wkq = (struct sctp_mcore_ctrl *)arg; struct mbuf *m; int off, v6; /* Wait for first tickle */ SCTP_MCORE_LOCK(wkq); wkq->running = 0; msleep(&wkq->running, &wkq->core_mtx, 0, "wait for pkt", 0); SCTP_MCORE_UNLOCK(wkq); /* Bind to our cpu */ thread_lock(curthread); sched_bind(curthread, wkq->cpuid); thread_unlock(curthread); /* Now lets start working */ SCTP_MCORE_LOCK(wkq); /* Now grab lock and go */ for (;;) { SCTP_MCORE_QLOCK(wkq); skip_sleep: wkq->running = 1; qent = TAILQ_FIRST(&wkq->que); if (qent) { TAILQ_REMOVE(&wkq->que, qent, next); SCTP_MCORE_QUNLOCK(wkq); CURVNET_SET(qent->vn); m = qent->m; off = qent->off; v6 = qent->v6; SCTP_FREE(qent, SCTP_M_MCORE); if (v6 == 0) { sctp_input_with_port(m, off, 0); } else { SCTP_PRINTF("V6 not yet supported\n"); sctp_m_freem(m); } CURVNET_RESTORE(); SCTP_MCORE_QLOCK(wkq); } wkq->running = 0; if (!TAILQ_EMPTY(&wkq->que)) { goto skip_sleep; } SCTP_MCORE_QUNLOCK(wkq); msleep(&wkq->running, &wkq->core_mtx, 0, "wait for pkt", 0); } } static void sctp_startup_mcore_threads(void) { int i, cpu; if (mp_ncpus == 1) return; if (sctp_mcore_workers != NULL) { /* * Already been here in some previous vnet? */ return; } SCTP_MALLOC(sctp_mcore_workers, struct sctp_mcore_ctrl *, ((mp_maxid + 1) * sizeof(struct sctp_mcore_ctrl)), SCTP_M_MCORE); if (sctp_mcore_workers == NULL) { /* TSNH I hope */ return; } memset(sctp_mcore_workers, 0, ((mp_maxid + 1) * sizeof(struct sctp_mcore_ctrl))); /* Init the structures */ for (i = 0; i <= mp_maxid; i++) { TAILQ_INIT(&sctp_mcore_workers[i].que); SCTP_MCORE_LOCK_INIT(&sctp_mcore_workers[i]); SCTP_MCORE_QLOCK_INIT(&sctp_mcore_workers[i]); sctp_mcore_workers[i].cpuid = i; } if (sctp_cpuarry == NULL) { SCTP_MALLOC(sctp_cpuarry, int *, (mp_ncpus * sizeof(int)), SCTP_M_MCORE); i = 0; CPU_FOREACH(cpu) { sctp_cpuarry[i] = cpu; i++; } } /* Now start them all */ CPU_FOREACH(cpu) { (void)kproc_create(sctp_mcore_thread, (void *)&sctp_mcore_workers[cpu], &sctp_mcore_workers[cpu].thread_proc, RFPROC, SCTP_KTHREAD_PAGES, SCTP_MCORE_NAME); } } #endif void sctp_pcb_init() { /* * SCTP initialization for the PCB structures should be called by * the sctp_init() function. */ int i; struct timeval tv; if (SCTP_BASE_VAR(sctp_pcb_initialized) != 0) { /* error I was called twice */ return; } SCTP_BASE_VAR(sctp_pcb_initialized) = 1; #if defined(SCTP_LOCAL_TRACE_BUF) bzero(&SCTP_BASE_SYSCTL(sctp_log), sizeof(struct sctp_log)); #endif #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT) SCTP_MALLOC(SCTP_BASE_STATS, struct sctpstat *, ((mp_maxid + 1) * sizeof(struct sctpstat)), SCTP_M_MCORE); #endif (void)SCTP_GETTIME_TIMEVAL(&tv); #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT) bzero(SCTP_BASE_STATS, (sizeof(struct sctpstat) * (mp_maxid + 1))); SCTP_BASE_STATS[PCPU_GET(cpuid)].sctps_discontinuitytime.tv_sec = (uint32_t)tv.tv_sec; SCTP_BASE_STATS[PCPU_GET(cpuid)].sctps_discontinuitytime.tv_usec = (uint32_t)tv.tv_usec; #else bzero(&SCTP_BASE_STATS, sizeof(struct sctpstat)); SCTP_BASE_STAT(sctps_discontinuitytime).tv_sec = (uint32_t)tv.tv_sec; SCTP_BASE_STAT(sctps_discontinuitytime).tv_usec = (uint32_t)tv.tv_usec; #endif /* init the empty list of (All) Endpoints */ LIST_INIT(&SCTP_BASE_INFO(listhead)); /* init the hash table of endpoints */ TUNABLE_INT_FETCH("net.inet.sctp.tcbhashsize", &SCTP_BASE_SYSCTL(sctp_hashtblsize)); TUNABLE_INT_FETCH("net.inet.sctp.pcbhashsize", &SCTP_BASE_SYSCTL(sctp_pcbtblsize)); TUNABLE_INT_FETCH("net.inet.sctp.chunkscale", &SCTP_BASE_SYSCTL(sctp_chunkscale)); SCTP_BASE_INFO(sctp_asochash) = SCTP_HASH_INIT((SCTP_BASE_SYSCTL(sctp_hashtblsize) * 31), &SCTP_BASE_INFO(hashasocmark)); SCTP_BASE_INFO(sctp_ephash) = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_hashtblsize), &SCTP_BASE_INFO(hashmark)); SCTP_BASE_INFO(sctp_tcpephash) = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_hashtblsize), &SCTP_BASE_INFO(hashtcpmark)); SCTP_BASE_INFO(hashtblsize) = SCTP_BASE_SYSCTL(sctp_hashtblsize); SCTP_BASE_INFO(sctp_vrfhash) = SCTP_HASH_INIT(SCTP_SIZE_OF_VRF_HASH, &SCTP_BASE_INFO(hashvrfmark)); SCTP_BASE_INFO(vrf_ifn_hash) = SCTP_HASH_INIT(SCTP_VRF_IFN_HASH_SIZE, &SCTP_BASE_INFO(vrf_ifn_hashmark)); /* init the zones */ /* * FIX ME: Should check for NULL returns, but if it does fail we are * doomed to panic anyways... add later maybe. */ SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_ep), "sctp_ep", sizeof(struct sctp_inpcb), maxsockets); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asoc), "sctp_asoc", sizeof(struct sctp_tcb), sctp_max_number_of_assoc); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_laddr), "sctp_laddr", sizeof(struct sctp_laddr), (sctp_max_number_of_assoc * sctp_scale_up_for_address)); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_net), "sctp_raddr", sizeof(struct sctp_nets), (sctp_max_number_of_assoc * sctp_scale_up_for_address)); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_chunk), "sctp_chunk", sizeof(struct sctp_tmit_chunk), (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale))); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_readq), "sctp_readq", sizeof(struct sctp_queued_to_read), (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale))); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_strmoq), "sctp_stream_msg_out", sizeof(struct sctp_stream_queue_pending), (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale))); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asconf), "sctp_asconf", sizeof(struct sctp_asconf), (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale))); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asconf_ack), "sctp_asconf_ack", sizeof(struct sctp_asconf_ack), (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale))); /* Master Lock INIT for info structure */ SCTP_INP_INFO_LOCK_INIT(); SCTP_STATLOG_INIT_LOCK(); SCTP_IPI_COUNT_INIT(); SCTP_IPI_ADDR_INIT(); #ifdef SCTP_PACKET_LOGGING SCTP_IP_PKTLOG_INIT(); #endif LIST_INIT(&SCTP_BASE_INFO(addr_wq)); SCTP_WQ_ADDR_INIT(); /* not sure if we need all the counts */ SCTP_BASE_INFO(ipi_count_ep) = 0; /* assoc/tcb zone info */ SCTP_BASE_INFO(ipi_count_asoc) = 0; /* local addrlist zone info */ SCTP_BASE_INFO(ipi_count_laddr) = 0; /* remote addrlist zone info */ SCTP_BASE_INFO(ipi_count_raddr) = 0; /* chunk info */ SCTP_BASE_INFO(ipi_count_chunk) = 0; /* socket queue zone info */ SCTP_BASE_INFO(ipi_count_readq) = 0; /* stream out queue cont */ SCTP_BASE_INFO(ipi_count_strmoq) = 0; SCTP_BASE_INFO(ipi_free_strmoq) = 0; SCTP_BASE_INFO(ipi_free_chunks) = 0; SCTP_OS_TIMER_INIT(&SCTP_BASE_INFO(addr_wq_timer.timer)); /* Init the TIMEWAIT list */ for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE; i++) { LIST_INIT(&SCTP_BASE_INFO(vtag_timewait)[i]); } sctp_startup_iterator(); #if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP) sctp_startup_mcore_threads(); #endif /* * INIT the default VRF which for BSD is the only one, other O/S's * may have more. But initially they must start with one and then * add the VRF's as addresses are added. */ sctp_init_vrf_list(SCTP_DEFAULT_VRF); } /* * Assumes that the SCTP_BASE_INFO() lock is NOT held. */ void sctp_pcb_finish(void) { struct sctp_vrflist *vrf_bucket; struct sctp_vrf *vrf, *nvrf; struct sctp_ifn *ifn, *nifn; struct sctp_ifa *ifa, *nifa; struct sctpvtaghead *chain; struct sctp_tagblock *twait_block, *prev_twait_block; struct sctp_laddr *wi, *nwi; int i; struct sctp_iterator *it, *nit; if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) { SCTP_PRINTF("%s: race condition on teardown.\n", __func__); return; } SCTP_BASE_VAR(sctp_pcb_initialized) = 0; /* * In FreeBSD the iterator thread never exits but we do clean up. * The only way FreeBSD reaches here is if we have VRF's but we * still add the ifdef to make it compile on old versions. */ retry: SCTP_IPI_ITERATOR_WQ_LOCK(); /* * sctp_iterator_worker() might be working on an it entry without * holding the lock. We won't find it on the list either and * continue and free/destroy it. While holding the lock, spin, to * avoid the race condition as sctp_iterator_worker() will have to * wait to re-aquire the lock. */ if (sctp_it_ctl.iterator_running != 0 || sctp_it_ctl.cur_it != NULL) { SCTP_IPI_ITERATOR_WQ_UNLOCK(); SCTP_PRINTF("%s: Iterator running while we held the lock. Retry. " "cur_it=%p\n", __func__, sctp_it_ctl.cur_it); DELAY(10); goto retry; } TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) { if (it->vn != curvnet) { continue; } TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); if (it->function_atend != NULL) { (*it->function_atend) (it->pointer, it->val); } SCTP_FREE(it, SCTP_M_ITER); } SCTP_IPI_ITERATOR_WQ_UNLOCK(); SCTP_ITERATOR_LOCK(); if ((sctp_it_ctl.cur_it) && (sctp_it_ctl.cur_it->vn == curvnet)) { sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT; } SCTP_ITERATOR_UNLOCK(); SCTP_OS_TIMER_STOP_DRAIN(&SCTP_BASE_INFO(addr_wq_timer.timer)); SCTP_WQ_ADDR_LOCK(); LIST_FOREACH_SAFE(wi, &SCTP_BASE_INFO(addr_wq), sctp_nxt_addr, nwi) { LIST_REMOVE(wi, sctp_nxt_addr); SCTP_DECR_LADDR_COUNT(); if (wi->action == SCTP_DEL_IP_ADDRESS) { SCTP_FREE(wi->ifa, SCTP_M_IFA); } SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), wi); } SCTP_WQ_ADDR_UNLOCK(); /* * free the vrf/ifn/ifa lists and hashes (be sure address monitor is * destroyed first). */ vrf_bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(SCTP_DEFAULT_VRFID & SCTP_BASE_INFO(hashvrfmark))]; LIST_FOREACH_SAFE(vrf, vrf_bucket, next_vrf, nvrf) { LIST_FOREACH_SAFE(ifn, &vrf->ifnlist, next_ifn, nifn) { LIST_FOREACH_SAFE(ifa, &ifn->ifalist, next_ifa, nifa) { /* free the ifa */ LIST_REMOVE(ifa, next_bucket); LIST_REMOVE(ifa, next_ifa); SCTP_FREE(ifa, SCTP_M_IFA); } /* free the ifn */ LIST_REMOVE(ifn, next_bucket); LIST_REMOVE(ifn, next_ifn); SCTP_FREE(ifn, SCTP_M_IFN); } SCTP_HASH_FREE(vrf->vrf_addr_hash, vrf->vrf_addr_hashmark); /* free the vrf */ LIST_REMOVE(vrf, next_vrf); SCTP_FREE(vrf, SCTP_M_VRF); } /* free the vrf hashes */ SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_vrfhash), SCTP_BASE_INFO(hashvrfmark)); SCTP_HASH_FREE(SCTP_BASE_INFO(vrf_ifn_hash), SCTP_BASE_INFO(vrf_ifn_hashmark)); /* * free the TIMEWAIT list elements malloc'd in the function * sctp_add_vtag_to_timewait()... */ for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE; i++) { chain = &SCTP_BASE_INFO(vtag_timewait)[i]; if (!LIST_EMPTY(chain)) { prev_twait_block = NULL; LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) { if (prev_twait_block) { SCTP_FREE(prev_twait_block, SCTP_M_TIMW); } prev_twait_block = twait_block; } SCTP_FREE(prev_twait_block, SCTP_M_TIMW); } } /* free the locks and mutexes */ #ifdef SCTP_PACKET_LOGGING SCTP_IP_PKTLOG_DESTROY(); #endif SCTP_IPI_ADDR_DESTROY(); SCTP_STATLOG_DESTROY(); SCTP_INP_INFO_LOCK_DESTROY(); SCTP_WQ_ADDR_DESTROY(); /* Get rid of other stuff too. */ if (SCTP_BASE_INFO(sctp_asochash) != NULL) SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_asochash), SCTP_BASE_INFO(hashasocmark)); if (SCTP_BASE_INFO(sctp_ephash) != NULL) SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_ephash), SCTP_BASE_INFO(hashmark)); if (SCTP_BASE_INFO(sctp_tcpephash) != NULL) SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_tcpephash), SCTP_BASE_INFO(hashtcpmark)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_ep)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asoc)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_laddr)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_net)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_chunk)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_readq)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_strmoq)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf_ack)); #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT) SCTP_FREE(SCTP_BASE_STATS, SCTP_M_MCORE); #endif } int sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, int offset, int limit, struct sockaddr *src, struct sockaddr *dst, struct sockaddr *altsa, uint16_t port) { /* * grub through the INIT pulling addresses and loading them to the * nets structure in the asoc. The from address in the mbuf should * also be loaded (if it is not already). This routine can be called * with either INIT or INIT-ACK's as long as the m points to the IP * packet and the offset points to the beginning of the parameters. */ struct sctp_inpcb *inp; struct sctp_nets *net, *nnet, *net_tmp; struct sctp_paramhdr *phdr, parm_buf; struct sctp_tcb *stcb_tmp; uint16_t ptype, plen; struct sockaddr *sa; uint8_t random_store[SCTP_PARAM_BUFFER_SIZE]; struct sctp_auth_random *p_random = NULL; uint16_t random_len = 0; uint8_t hmacs_store[SCTP_PARAM_BUFFER_SIZE]; struct sctp_auth_hmac_algo *hmacs = NULL; uint16_t hmacs_len = 0; uint8_t saw_asconf = 0; uint8_t saw_asconf_ack = 0; uint8_t chunks_store[SCTP_PARAM_BUFFER_SIZE]; struct sctp_auth_chunk_list *chunks = NULL; uint16_t num_chunks = 0; sctp_key_t *new_key; uint32_t keylen; int got_random = 0, got_hmacs = 0, got_chklist = 0; uint8_t peer_supports_ecn; uint8_t peer_supports_prsctp; uint8_t peer_supports_auth; uint8_t peer_supports_asconf; uint8_t peer_supports_asconf_ack; uint8_t peer_supports_reconfig; uint8_t peer_supports_nrsack; uint8_t peer_supports_pktdrop; uint8_t peer_supports_idata; #ifdef INET struct sockaddr_in sin; #endif #ifdef INET6 struct sockaddr_in6 sin6; #endif /* First get the destination address setup too. */ #ifdef INET memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_port = stcb->rport; #endif #ifdef INET6 memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_port = stcb->rport; #endif if (altsa) { sa = altsa; } else { sa = src; } peer_supports_idata = 0; peer_supports_ecn = 0; peer_supports_prsctp = 0; peer_supports_auth = 0; peer_supports_asconf = 0; peer_supports_reconfig = 0; peer_supports_nrsack = 0; peer_supports_pktdrop = 0; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { /* mark all addresses that we have currently on the list */ net->dest_state |= SCTP_ADDR_NOT_IN_ASSOC; } /* does the source address already exist? if so skip it */ inp = stcb->sctp_ep; atomic_add_int(&stcb->asoc.refcnt, 1); stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, dst, stcb); atomic_add_int(&stcb->asoc.refcnt, -1); if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) { /* we must add the source address */ /* no scope set here since we have a tcb already. */ switch (sa->sa_family) { #ifdef INET case AF_INET: if (stcb->asoc.scope.ipv4_addr_legal) { if (sctp_add_remote_addr(stcb, sa, NULL, port, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) { return (-1); } } break; #endif #ifdef INET6 case AF_INET6: if (stcb->asoc.scope.ipv6_addr_legal) { if (sctp_add_remote_addr(stcb, sa, NULL, port, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) { return (-2); } } break; #endif default: break; } } else { if (net_tmp != NULL && stcb_tmp == stcb) { net_tmp->dest_state &= ~SCTP_ADDR_NOT_IN_ASSOC; } else if (stcb_tmp != stcb) { /* It belongs to another association? */ if (stcb_tmp) SCTP_TCB_UNLOCK(stcb_tmp); return (-3); } } if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-4); } /* now we must go through each of the params. */ phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf)); while (phdr) { ptype = ntohs(phdr->param_type); plen = ntohs(phdr->param_length); /* * SCTP_PRINTF("ptype => %0x, plen => %d\n", * (uint32_t)ptype, (int)plen); */ if (offset + plen > limit) { break; } if (plen == 0) { break; } #ifdef INET if (ptype == SCTP_IPV4_ADDRESS) { if (stcb->asoc.scope.ipv4_addr_legal) { struct sctp_ipv4addr_param *p4, p4_buf; /* ok get the v4 address and check/add */ phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf)); if (plen != sizeof(struct sctp_ipv4addr_param) || phdr == NULL) { return (-5); } p4 = (struct sctp_ipv4addr_param *)phdr; sin.sin_addr.s_addr = p4->addr; if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) { /* Skip multi-cast addresses */ goto next_param; } if ((sin.sin_addr.s_addr == INADDR_BROADCAST) || (sin.sin_addr.s_addr == INADDR_ANY)) { goto next_param; } sa = (struct sockaddr *)&sin; inp = stcb->sctp_ep; atomic_add_int(&stcb->asoc.refcnt, 1); stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net, dst, stcb); atomic_add_int(&stcb->asoc.refcnt, -1); if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) { /* we must add the source address */ /* * no scope set since we have a tcb * already */ /* * we must validate the state again * here */ add_it_now: if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-7); } if (sctp_add_remote_addr(stcb, sa, NULL, port, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_4)) { return (-8); } } else if (stcb_tmp == stcb) { if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-10); } if (net != NULL) { /* clear flag */ net->dest_state &= ~SCTP_ADDR_NOT_IN_ASSOC; } } else { /* * strange, address is in another * assoc? straighten out locks. */ if (stcb_tmp) { if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) { struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; /* * in setup state we * abort this guy */ snprintf(msg, sizeof(msg), "%s:%d at %s", __FILE__, __LINE__, __func__); op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), msg); sctp_abort_an_association(stcb_tmp->sctp_ep, stcb_tmp, op_err, SCTP_SO_NOT_LOCKED); goto add_it_now; } SCTP_TCB_UNLOCK(stcb_tmp); } if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-12); } return (-13); } } } else #endif #ifdef INET6 if (ptype == SCTP_IPV6_ADDRESS) { if (stcb->asoc.scope.ipv6_addr_legal) { /* ok get the v6 address and check/add */ struct sctp_ipv6addr_param *p6, p6_buf; phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf)); if (plen != sizeof(struct sctp_ipv6addr_param) || phdr == NULL) { return (-14); } p6 = (struct sctp_ipv6addr_param *)phdr; memcpy((caddr_t)&sin6.sin6_addr, p6->addr, sizeof(p6->addr)); if (IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) { /* Skip multi-cast addresses */ goto next_param; } if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) { /* * Link local make no sense without * scope */ goto next_param; } sa = (struct sockaddr *)&sin6; inp = stcb->sctp_ep; atomic_add_int(&stcb->asoc.refcnt, 1); stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net, dst, stcb); atomic_add_int(&stcb->asoc.refcnt, -1); if (stcb_tmp == NULL && (inp == stcb->sctp_ep || inp == NULL)) { /* * we must validate the state again * here */ add_it_now6: if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-16); } /* * we must add the address, no scope * set */ if (sctp_add_remote_addr(stcb, sa, NULL, port, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_5)) { return (-17); } } else if (stcb_tmp == stcb) { /* * we must validate the state again * here */ if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-19); } if (net != NULL) { /* clear flag */ net->dest_state &= ~SCTP_ADDR_NOT_IN_ASSOC; } } else { /* * strange, address is in another * assoc? straighten out locks. */ if (stcb_tmp) { if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) { struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; /* * in setup state we * abort this guy */ snprintf(msg, sizeof(msg), "%s:%d at %s", __FILE__, __LINE__, __func__); op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), msg); sctp_abort_an_association(stcb_tmp->sctp_ep, stcb_tmp, op_err, SCTP_SO_NOT_LOCKED); goto add_it_now6; } SCTP_TCB_UNLOCK(stcb_tmp); } if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-21); } return (-22); } } } else #endif if (ptype == SCTP_ECN_CAPABLE) { peer_supports_ecn = 1; } else if (ptype == SCTP_ULP_ADAPTATION) { if (stcb->asoc.state != SCTP_STATE_OPEN) { struct sctp_adaptation_layer_indication ai, *aip; phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&ai, sizeof(ai)); aip = (struct sctp_adaptation_layer_indication *)phdr; if (aip) { stcb->asoc.peers_adaptation = ntohl(aip->indication); stcb->asoc.adaptation_needed = 1; } } } else if (ptype == SCTP_SET_PRIM_ADDR) { struct sctp_asconf_addr_param lstore, *fee; int lptype; struct sockaddr *lsa = NULL; #ifdef INET struct sctp_asconf_addrv4_param *fii; #endif if (stcb->asoc.asconf_supported == 0) { return (-100); } if (plen > sizeof(lstore)) { return (-23); } phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&lstore, min(plen, sizeof(lstore))); if (phdr == NULL) { return (-24); } fee = (struct sctp_asconf_addr_param *)phdr; lptype = ntohs(fee->addrp.ph.param_type); switch (lptype) { #ifdef INET case SCTP_IPV4_ADDRESS: if (plen != sizeof(struct sctp_asconf_addrv4_param)) { SCTP_PRINTF("Sizeof setprim in init/init ack not %d but %d - ignored\n", (int)sizeof(struct sctp_asconf_addrv4_param), plen); } else { fii = (struct sctp_asconf_addrv4_param *)fee; sin.sin_addr.s_addr = fii->addrp.addr; lsa = (struct sockaddr *)&sin; } break; #endif #ifdef INET6 case SCTP_IPV6_ADDRESS: if (plen != sizeof(struct sctp_asconf_addr_param)) { SCTP_PRINTF("Sizeof setprim (v6) in init/init ack not %d but %d - ignored\n", (int)sizeof(struct sctp_asconf_addr_param), plen); } else { memcpy(sin6.sin6_addr.s6_addr, fee->addrp.addr, sizeof(fee->addrp.addr)); lsa = (struct sockaddr *)&sin6; } break; #endif default: break; } if (lsa) { (void)sctp_set_primary_addr(stcb, sa, NULL); } } else if (ptype == SCTP_HAS_NAT_SUPPORT) { stcb->asoc.peer_supports_nat = 1; } else if (ptype == SCTP_PRSCTP_SUPPORTED) { /* Peer supports pr-sctp */ peer_supports_prsctp = 1; } else if (ptype == SCTP_SUPPORTED_CHUNK_EXT) { /* A supported extension chunk */ struct sctp_supported_chunk_types_param *pr_supported; uint8_t local_store[SCTP_PARAM_BUFFER_SIZE]; int num_ent, i; phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&local_store, min(sizeof(local_store), plen)); if (phdr == NULL) { return (-25); } pr_supported = (struct sctp_supported_chunk_types_param *)phdr; num_ent = plen - sizeof(struct sctp_paramhdr); for (i = 0; i < num_ent; i++) { switch (pr_supported->chunk_types[i]) { case SCTP_ASCONF: peer_supports_asconf = 1; break; case SCTP_ASCONF_ACK: peer_supports_asconf_ack = 1; break; case SCTP_FORWARD_CUM_TSN: peer_supports_prsctp = 1; break; case SCTP_PACKET_DROPPED: peer_supports_pktdrop = 1; break; case SCTP_NR_SELECTIVE_ACK: peer_supports_nrsack = 1; break; case SCTP_STREAM_RESET: peer_supports_reconfig = 1; break; case SCTP_AUTHENTICATION: peer_supports_auth = 1; break; case SCTP_IDATA: peer_supports_idata = 1; break; default: /* one I have not learned yet */ break; } } } else if (ptype == SCTP_RANDOM) { if (plen > sizeof(random_store)) break; if (got_random) { /* already processed a RANDOM */ goto next_param; } phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)random_store, min(sizeof(random_store), plen)); if (phdr == NULL) return (-26); p_random = (struct sctp_auth_random *)phdr; random_len = plen - sizeof(*p_random); /* enforce the random length */ if (random_len != SCTP_AUTH_RANDOM_SIZE_REQUIRED) { SCTPDBG(SCTP_DEBUG_AUTH1, "SCTP: invalid RANDOM len\n"); return (-27); } got_random = 1; } else if (ptype == SCTP_HMAC_LIST) { uint16_t num_hmacs; uint16_t i; if (plen > sizeof(hmacs_store)) break; if (got_hmacs) { /* already processed a HMAC list */ goto next_param; } phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)hmacs_store, min(plen, sizeof(hmacs_store))); if (phdr == NULL) return (-28); hmacs = (struct sctp_auth_hmac_algo *)phdr; hmacs_len = plen - sizeof(*hmacs); num_hmacs = hmacs_len / sizeof(hmacs->hmac_ids[0]); /* validate the hmac list */ if (sctp_verify_hmac_param(hmacs, num_hmacs)) { return (-29); } if (stcb->asoc.peer_hmacs != NULL) sctp_free_hmaclist(stcb->asoc.peer_hmacs); stcb->asoc.peer_hmacs = sctp_alloc_hmaclist(num_hmacs); if (stcb->asoc.peer_hmacs != NULL) { for (i = 0; i < num_hmacs; i++) { (void)sctp_auth_add_hmacid(stcb->asoc.peer_hmacs, ntohs(hmacs->hmac_ids[i])); } } got_hmacs = 1; } else if (ptype == SCTP_CHUNK_LIST) { int i; if (plen > sizeof(chunks_store)) break; if (got_chklist) { /* already processed a Chunks list */ goto next_param; } phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)chunks_store, min(plen, sizeof(chunks_store))); if (phdr == NULL) return (-30); chunks = (struct sctp_auth_chunk_list *)phdr; num_chunks = plen - sizeof(*chunks); if (stcb->asoc.peer_auth_chunks != NULL) sctp_clear_chunklist(stcb->asoc.peer_auth_chunks); else stcb->asoc.peer_auth_chunks = sctp_alloc_chunklist(); for (i = 0; i < num_chunks; i++) { (void)sctp_auth_add_chunk(chunks->chunk_types[i], stcb->asoc.peer_auth_chunks); /* record asconf/asconf-ack if listed */ if (chunks->chunk_types[i] == SCTP_ASCONF) saw_asconf = 1; if (chunks->chunk_types[i] == SCTP_ASCONF_ACK) saw_asconf_ack = 1; } got_chklist = 1; } else if ((ptype == SCTP_HEARTBEAT_INFO) || (ptype == SCTP_STATE_COOKIE) || (ptype == SCTP_UNRECOG_PARAM) || (ptype == SCTP_COOKIE_PRESERVE) || (ptype == SCTP_SUPPORTED_ADDRTYPE) || (ptype == SCTP_ADD_IP_ADDRESS) || (ptype == SCTP_DEL_IP_ADDRESS) || (ptype == SCTP_ERROR_CAUSE_IND) || (ptype == SCTP_SUCCESS_REPORT)) { /* don't care */ ; } else { if ((ptype & 0x8000) == 0x0000) { /* * must stop processing the rest of the * param's. Any report bits were handled * with the call to * sctp_arethere_unrecognized_parameters() * when the INIT or INIT-ACK was first seen. */ break; } } next_param: offset += SCTP_SIZE32(plen); if (offset >= limit) { break; } phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf)); } /* Now check to see if we need to purge any addresses */ TAILQ_FOREACH_SAFE(net, &stcb->asoc.nets, sctp_next, nnet) { if ((net->dest_state & SCTP_ADDR_NOT_IN_ASSOC) == SCTP_ADDR_NOT_IN_ASSOC) { /* This address has been removed from the asoc */ /* remove and free it */ stcb->asoc.numnets--; TAILQ_REMOVE(&stcb->asoc.nets, net, sctp_next); sctp_free_remote_addr(net); if (net == stcb->asoc.primary_destination) { stcb->asoc.primary_destination = NULL; sctp_select_primary_destination(stcb); } } } if ((stcb->asoc.ecn_supported == 1) && (peer_supports_ecn == 0)) { stcb->asoc.ecn_supported = 0; } if ((stcb->asoc.prsctp_supported == 1) && (peer_supports_prsctp == 0)) { stcb->asoc.prsctp_supported = 0; } if ((stcb->asoc.auth_supported == 1) && ((peer_supports_auth == 0) || (got_random == 0) || (got_hmacs == 0))) { stcb->asoc.auth_supported = 0; } if ((stcb->asoc.asconf_supported == 1) && ((peer_supports_asconf == 0) || (peer_supports_asconf_ack == 0) || (stcb->asoc.auth_supported == 0) || (saw_asconf == 0) || (saw_asconf_ack == 0))) { stcb->asoc.asconf_supported = 0; } if ((stcb->asoc.reconfig_supported == 1) && (peer_supports_reconfig == 0)) { stcb->asoc.reconfig_supported = 0; } if ((stcb->asoc.idata_supported == 1) && (peer_supports_idata == 0)) { stcb->asoc.idata_supported = 0; } if ((stcb->asoc.nrsack_supported == 1) && (peer_supports_nrsack == 0)) { stcb->asoc.nrsack_supported = 0; } if ((stcb->asoc.pktdrop_supported == 1) && (peer_supports_pktdrop == 0)) { stcb->asoc.pktdrop_supported = 0; } /* validate authentication required parameters */ if ((peer_supports_auth == 0) && (got_chklist == 1)) { /* peer does not support auth but sent a chunks list? */ return (-31); } if ((peer_supports_asconf == 1) && (peer_supports_auth == 0)) { /* peer supports asconf but not auth? */ return (-32); } else if ((peer_supports_asconf == 1) && (peer_supports_auth == 1) && ((saw_asconf == 0) || (saw_asconf_ack == 0))) { return (-33); } /* concatenate the full random key */ keylen = sizeof(*p_random) + random_len + sizeof(*hmacs) + hmacs_len; if (chunks != NULL) { keylen += sizeof(*chunks) + num_chunks; } new_key = sctp_alloc_key(keylen); if (new_key != NULL) { /* copy in the RANDOM */ if (p_random != NULL) { keylen = sizeof(*p_random) + random_len; bcopy(p_random, new_key->key, keylen); } /* append in the AUTH chunks */ if (chunks != NULL) { bcopy(chunks, new_key->key + keylen, sizeof(*chunks) + num_chunks); keylen += sizeof(*chunks) + num_chunks; } /* append in the HMACs */ if (hmacs != NULL) { bcopy(hmacs, new_key->key + keylen, sizeof(*hmacs) + hmacs_len); } } else { /* failed to get memory for the key */ return (-34); } if (stcb->asoc.authinfo.peer_random != NULL) sctp_free_key(stcb->asoc.authinfo.peer_random); stcb->asoc.authinfo.peer_random = new_key; sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.assoc_keyid); sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.recv_keyid); return (0); } int sctp_set_primary_addr(struct sctp_tcb *stcb, struct sockaddr *sa, struct sctp_nets *net) { /* make sure the requested primary address exists in the assoc */ if (net == NULL && sa) net = sctp_findnet(stcb, sa); if (net == NULL) { /* didn't find the requested primary address! */ return (-1); } else { /* set the primary address */ if (net->dest_state & SCTP_ADDR_UNCONFIRMED) { /* Must be confirmed, so queue to set */ net->dest_state |= SCTP_ADDR_REQ_PRIMARY; return (0); } stcb->asoc.primary_destination = net; if (!(net->dest_state & SCTP_ADDR_PF) && (stcb->asoc.alternate)) { sctp_free_remote_addr(stcb->asoc.alternate); stcb->asoc.alternate = NULL; } net = TAILQ_FIRST(&stcb->asoc.nets); if (net != stcb->asoc.primary_destination) { /* * first one on the list is NOT the primary * sctp_cmpaddr() is much more efficient if the * primary is the first on the list, make it so. */ TAILQ_REMOVE(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next); TAILQ_INSERT_HEAD(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next); } return (0); } } int sctp_is_vtag_good(uint32_t tag, uint16_t lport, uint16_t rport, struct timeval *now) { /* * This function serves two purposes. It will see if a TAG can be * re-used and return 1 for yes it is ok and 0 for don't use that * tag. A secondary function it will do is purge out old tags that * can be removed. */ struct sctpvtaghead *chain; struct sctp_tagblock *twait_block; struct sctpasochead *head; struct sctp_tcb *stcb; int i; SCTP_INP_INFO_RLOCK(); head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(tag, SCTP_BASE_INFO(hashasocmark))]; LIST_FOREACH(stcb, head, sctp_asocs) { /* * We choose not to lock anything here. TCB's can't be * removed since we have the read lock, so they can't be * freed on us, same thing for the INP. I may be wrong with * this assumption, but we will go with it for now :-) */ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { continue; } if (stcb->asoc.my_vtag == tag) { /* candidate */ if (stcb->rport != rport) { continue; } if (stcb->sctp_ep->sctp_lport != lport) { continue; } /* Its a used tag set */ SCTP_INP_INFO_RUNLOCK(); return (0); } } chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)]; /* Now what about timed wait ? */ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) { /* * Block(s) are present, lets see if we have this tag in the * list */ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) { if (twait_block->vtag_block[i].v_tag == 0) { /* not used */ continue; } else if ((long)twait_block->vtag_block[i].tv_sec_at_expire < now->tv_sec) { /* Audit expires this guy */ twait_block->vtag_block[i].tv_sec_at_expire = 0; twait_block->vtag_block[i].v_tag = 0; twait_block->vtag_block[i].lport = 0; twait_block->vtag_block[i].rport = 0; } else if ((twait_block->vtag_block[i].v_tag == tag) && (twait_block->vtag_block[i].lport == lport) && (twait_block->vtag_block[i].rport == rport)) { /* Bad tag, sorry :< */ SCTP_INP_INFO_RUNLOCK(); return (0); } } } SCTP_INP_INFO_RUNLOCK(); return (1); } static void sctp_drain_mbufs(struct sctp_tcb *stcb) { /* * We must hunt this association for MBUF's past the cumack (i.e. * out of order data that we can renege on). */ struct sctp_association *asoc; struct sctp_tmit_chunk *chk, *nchk; uint32_t cumulative_tsn_p1; struct sctp_queued_to_read *ctl, *nctl; int cnt, strmat; uint32_t gap, i; int fnd = 0; /* We look for anything larger than the cum-ack + 1 */ asoc = &stcb->asoc; if (asoc->cumulative_tsn == asoc->highest_tsn_inside_map) { /* none we can reneg on. */ return; } SCTP_STAT_INCR(sctps_protocol_drains_done); cumulative_tsn_p1 = asoc->cumulative_tsn + 1; cnt = 0; /* Ok that was fun, now we will drain all the inbound streams? */ for (strmat = 0; strmat < asoc->streamincnt; strmat++) { TAILQ_FOREACH_SAFE(ctl, &asoc->strmin[strmat].inqueue, next_instrm, nctl) { #ifdef INVARIANTS if (ctl->on_strm_q != SCTP_ON_ORDERED) { panic("Huh control: %p on_q: %d -- not ordered?", ctl, ctl->on_strm_q); } #endif if (SCTP_TSN_GT(ctl->sinfo_tsn, cumulative_tsn_p1)) { /* Yep it is above cum-ack */ cnt++; SCTP_CALC_TSN_TO_GAP(gap, ctl->sinfo_tsn, asoc->mapping_array_base_tsn); asoc->size_on_all_streams = sctp_sbspace_sub(asoc->size_on_all_streams, ctl->length); sctp_ucount_decr(asoc->cnt_on_all_streams); SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap); if (ctl->on_read_q) { TAILQ_REMOVE(&stcb->sctp_ep->read_queue, ctl, next); ctl->on_read_q = 0; } TAILQ_REMOVE(&asoc->strmin[strmat].inqueue, ctl, next_instrm); ctl->on_strm_q = 0; if (ctl->data) { sctp_m_freem(ctl->data); ctl->data = NULL; } sctp_free_remote_addr(ctl->whoFrom); /* Now its reasm? */ TAILQ_FOREACH_SAFE(chk, &ctl->reasm, sctp_next, nchk) { cnt++; SCTP_CALC_TSN_TO_GAP(gap, chk->rec.data.tsn, asoc->mapping_array_base_tsn); asoc->size_on_reasm_queue = sctp_sbspace_sub(asoc->size_on_reasm_queue, chk->send_size); sctp_ucount_decr(asoc->cnt_on_reasm_queue); SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap); TAILQ_REMOVE(&ctl->reasm, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); } sctp_free_a_readq(stcb, ctl); } } TAILQ_FOREACH_SAFE(ctl, &asoc->strmin[strmat].uno_inqueue, next_instrm, nctl) { #ifdef INVARIANTS if (ctl->on_strm_q != SCTP_ON_UNORDERED) { panic("Huh control: %p on_q: %d -- not unordered?", ctl, ctl->on_strm_q); } #endif if (SCTP_TSN_GT(ctl->sinfo_tsn, cumulative_tsn_p1)) { /* Yep it is above cum-ack */ cnt++; SCTP_CALC_TSN_TO_GAP(gap, ctl->sinfo_tsn, asoc->mapping_array_base_tsn); asoc->size_on_all_streams = sctp_sbspace_sub(asoc->size_on_all_streams, ctl->length); sctp_ucount_decr(asoc->cnt_on_all_streams); SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap); if (ctl->on_read_q) { TAILQ_REMOVE(&stcb->sctp_ep->read_queue, ctl, next); ctl->on_read_q = 0; } TAILQ_REMOVE(&asoc->strmin[strmat].uno_inqueue, ctl, next_instrm); ctl->on_strm_q = 0; if (ctl->data) { sctp_m_freem(ctl->data); ctl->data = NULL; } sctp_free_remote_addr(ctl->whoFrom); /* Now its reasm? */ TAILQ_FOREACH_SAFE(chk, &ctl->reasm, sctp_next, nchk) { cnt++; SCTP_CALC_TSN_TO_GAP(gap, chk->rec.data.tsn, asoc->mapping_array_base_tsn); asoc->size_on_reasm_queue = sctp_sbspace_sub(asoc->size_on_reasm_queue, chk->send_size); sctp_ucount_decr(asoc->cnt_on_reasm_queue); SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap); TAILQ_REMOVE(&ctl->reasm, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); } sctp_free_a_readq(stcb, ctl); } } } if (cnt) { /* We must back down to see what the new highest is */ for (i = asoc->highest_tsn_inside_map; SCTP_TSN_GE(i, asoc->mapping_array_base_tsn); i--) { SCTP_CALC_TSN_TO_GAP(gap, i, asoc->mapping_array_base_tsn); if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) { asoc->highest_tsn_inside_map = i; fnd = 1; break; } } if (!fnd) { asoc->highest_tsn_inside_map = asoc->mapping_array_base_tsn - 1; } /* * Question, should we go through the delivery queue? The * only reason things are on here is the app not reading OR * a p-d-api up. An attacker COULD send enough in to * initiate the PD-API and then send a bunch of stuff to * other streams... these would wind up on the delivery * queue.. and then we would not get to them. But in order * to do this I then have to back-track and un-deliver * sequence numbers in streams.. el-yucko. I think for now * we will NOT look at the delivery queue and leave it to be * something to consider later. An alternative would be to * abort the P-D-API with a notification and then deliver * the data.... Or another method might be to keep track of * how many times the situation occurs and if we see a * possible attack underway just abort the association. */ #ifdef SCTP_DEBUG SCTPDBG(SCTP_DEBUG_PCB1, "Freed %d chunks from reneg harvest\n", cnt); #endif /* * Now do we need to find a new * asoc->highest_tsn_inside_map? */ asoc->last_revoke_count = cnt; (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer); /* sa_ignore NO_NULL_CHK */ sctp_send_sack(stcb, SCTP_SO_NOT_LOCKED); sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_DRAIN, SCTP_SO_NOT_LOCKED); } /* * Another issue, in un-setting the TSN's in the mapping array we * DID NOT adjust the highest_tsn marker. This will cause one of * two things to occur. It may cause us to do extra work in checking * for our mapping array movement. More importantly it may cause us * to SACK every datagram. This may not be a bad thing though since * we will recover once we get our cum-ack above and all this stuff * we dumped recovered. */ } void sctp_drain() { /* * We must walk the PCB lists for ALL associations here. The system * is LOW on MBUF's and needs help. This is where reneging will * occur. We really hope this does NOT happen! */ VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); struct sctp_inpcb *inp; struct sctp_tcb *stcb; SCTP_STAT_INCR(sctps_protocol_drain_calls); if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) { #ifdef VIMAGE continue; #else return; #endif } SCTP_INP_INFO_RLOCK(); LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) { /* For each endpoint */ SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { /* For each association */ SCTP_TCB_LOCK(stcb); sctp_drain_mbufs(stcb); SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } SCTP_INP_INFO_RUNLOCK(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * start a new iterator * iterates through all endpoints and associations based on the pcb_state * flags and asoc_state. "af" (mandatory) is executed for all matching * assocs and "ef" (optional) is executed when the iterator completes. * "inpf" (optional) is executed for each new endpoint as it is being * iterated through. inpe (optional) is called when the inp completes * its way through all the stcbs. */ int sctp_initiate_iterator(inp_func inpf, asoc_func af, inp_func inpe, uint32_t pcb_state, uint32_t pcb_features, uint32_t asoc_state, void *argp, uint32_t argi, end_func ef, struct sctp_inpcb *s_inp, uint8_t chunk_output_off) { struct sctp_iterator *it = NULL; if (af == NULL) { return (-1); } if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) { SCTP_PRINTF("%s: abort on initialize being %d\n", __func__, SCTP_BASE_VAR(sctp_pcb_initialized)); return (-1); } SCTP_MALLOC(it, struct sctp_iterator *, sizeof(struct sctp_iterator), SCTP_M_ITER); if (it == NULL) { SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM); return (ENOMEM); } memset(it, 0, sizeof(*it)); it->function_assoc = af; it->function_inp = inpf; if (inpf) it->done_current_ep = 0; else it->done_current_ep = 1; it->function_atend = ef; it->pointer = argp; it->val = argi; it->pcb_flags = pcb_state; it->pcb_features = pcb_features; it->asoc_state = asoc_state; it->function_inp_end = inpe; it->no_chunk_output = chunk_output_off; it->vn = curvnet; if (s_inp) { /* Assume lock is held here */ it->inp = s_inp; SCTP_INP_INCR_REF(it->inp); it->iterator_flags = SCTP_ITERATOR_DO_SINGLE_INP; } else { SCTP_INP_INFO_RLOCK(); it->inp = LIST_FIRST(&SCTP_BASE_INFO(listhead)); if (it->inp) { SCTP_INP_INCR_REF(it->inp); } SCTP_INP_INFO_RUNLOCK(); it->iterator_flags = SCTP_ITERATOR_DO_ALL_INP; } SCTP_IPI_ITERATOR_WQ_LOCK(); if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) { SCTP_IPI_ITERATOR_WQ_UNLOCK(); SCTP_PRINTF("%s: rollback on initialize being %d it=%p\n", __func__, SCTP_BASE_VAR(sctp_pcb_initialized), it); SCTP_FREE(it, SCTP_M_ITER); return (-1); } TAILQ_INSERT_TAIL(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); if (sctp_it_ctl.iterator_running == 0) { sctp_wakeup_iterator(); } SCTP_IPI_ITERATOR_WQ_UNLOCK(); /* sa_ignore MEMLEAK {memory is put on the tailq for the iterator} */ return (0); } diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index e3b087dfcb5d..607756134a52 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1,3921 +1,3895 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * Copyright (c) 2007-2008,2010 * Swinburne University of Technology, Melbourne, Australia. * Copyright (c) 2009-2010 Lawrence Stewart * Copyright (c) 2010 The FreeBSD Foundation * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * Portions of this software were developed at the Centre for Advanced Internet * Architectures, Swinburne University of Technology, by Lawrence Stewart, * James Healy and David Hayes, made possible in part by a grant from the Cisco * University Research Program Fund at Community Foundation Silicon Valley. * * Portions of this software were developed at the Centre for Advanced * Internet Architectures, Swinburne University of Technology, Melbourne, * Australia by David Hayes under sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #ifdef TCP_HHOOK #include #endif #include #include #include /* for proc0 declaration */ #include #include #include #include #include #include #include #include #include /* before tcp_seq.h, for tcp_random18() */ #include #include #include #include #include #define TCPSTATES /* for logging */ #include #include #include #include #include #include /* required for icmp_var.h */ #include /* for ICMP_BANDLIM */ #include #include #include #include #include #include #include #include #ifdef TCP_RFC7413 #include #endif #include #include #include #include #include #include #include #include #ifdef TCPPCAP #include #endif #include #ifdef TCPDEBUG #include #endif /* TCPDEBUG */ #ifdef TCP_OFFLOAD #include #endif -#ifdef IPSEC -#include -#include -#endif /*IPSEC*/ +#include #include #include const int tcprexmtthresh = 3; int tcp_log_in_vain = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW, &tcp_log_in_vain, 0, "Log all incoming TCP segments to closed ports"); VNET_DEFINE(int, blackhole) = 0; #define V_blackhole VNET(blackhole) SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(blackhole), 0, "Do not send RST on segments to closed ports"); VNET_DEFINE(int, tcp_delack_enabled) = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_delack_enabled), 0, "Delay ACK to try and piggyback it onto a data packet"); VNET_DEFINE(int, drop_synfin) = 0; #define V_drop_synfin VNET(drop_synfin) SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(drop_synfin), 0, "Drop TCP packets with SYN+FIN set"); VNET_DEFINE(int, tcp_do_rfc6675_pipe) = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc6675_pipe, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc6675_pipe), 0, "Use calculated pipe/in-flight bytes per RFC 6675"); VNET_DEFINE(int, tcp_do_rfc3042) = 1; #define V_tcp_do_rfc3042 VNET(tcp_do_rfc3042) SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3042), 0, "Enable RFC 3042 (Limited Transmit)"); VNET_DEFINE(int, tcp_do_rfc3390) = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3390), 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); VNET_DEFINE(int, tcp_initcwnd_segments) = 10; SYSCTL_INT(_net_inet_tcp, OID_AUTO, initcwnd_segments, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_initcwnd_segments), 0, "Slow-start flight size (initial congestion window) in number of segments"); VNET_DEFINE(int, tcp_do_rfc3465) = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3465), 0, "Enable RFC 3465 (Appropriate Byte Counting)"); VNET_DEFINE(int, tcp_abc_l_var) = 2; SYSCTL_INT(_net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_abc_l_var), 2, "Cap the max cwnd increment during slow-start to this number of segments"); static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN"); VNET_DEFINE(int, tcp_do_ecn) = 2; SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_ecn), 0, "TCP ECN support"); VNET_DEFINE(int, tcp_ecn_maxretries) = 1; SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_maxretries), 0, "Max retries before giving up on ECN"); VNET_DEFINE(int, tcp_insecure_syn) = 0; #define V_tcp_insecure_syn VNET(tcp_insecure_syn) SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_insecure_syn), 0, "Follow RFC793 instead of RFC5961 criteria for accepting SYN packets"); VNET_DEFINE(int, tcp_insecure_rst) = 0; #define V_tcp_insecure_rst VNET(tcp_insecure_rst) SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_insecure_rst), 0, "Follow RFC793 instead of RFC5961 criteria for accepting RST packets"); VNET_DEFINE(int, tcp_recvspace) = 1024*64; #define V_tcp_recvspace VNET(tcp_recvspace) SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_recvspace), 0, "Initial receive socket buffer size"); VNET_DEFINE(int, tcp_do_autorcvbuf) = 1; #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_autorcvbuf), 0, "Enable automatic receive buffer sizing"); VNET_DEFINE(int, tcp_autorcvbuf_inc) = 16*1024; #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autorcvbuf_inc), 0, "Incrementor step size of automatic receive buffer"); VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024; #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autorcvbuf_max), 0, "Max size of automatic receive buffer"); VNET_DEFINE(struct inpcbhead, tcb); #define tcb6 tcb /* for KAME src sync over BSD*'s */ VNET_DEFINE(struct inpcbinfo, tcbinfo); /* * TCP statistics are stored in an array of counter(9)s, which size matches * size of struct tcpstat. TCP running connection count is a regular array. */ VNET_PCPUSTAT_DEFINE(struct tcpstat, tcpstat); SYSCTL_VNET_PCPUSTAT(_net_inet_tcp, TCPCTL_STATS, stats, struct tcpstat, tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)"); VNET_DEFINE(counter_u64_t, tcps_states[TCP_NSTATES]); SYSCTL_COUNTER_U64_ARRAY(_net_inet_tcp, TCPCTL_STATES, states, CTLFLAG_RD | CTLFLAG_VNET, &VNET_NAME(tcps_states)[0], TCP_NSTATES, "TCP connection counts by TCP state"); static void tcp_vnet_init(const void *unused) { COUNTER_ARRAY_ALLOC(V_tcps_states, TCP_NSTATES, M_WAITOK); VNET_PCPUSTAT_ALLOC(tcpstat, M_WAITOK); } VNET_SYSINIT(tcp_vnet_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, tcp_vnet_init, NULL); #ifdef VIMAGE static void tcp_vnet_uninit(const void *unused) { COUNTER_ARRAY_FREE(V_tcps_states, TCP_NSTATES); VNET_PCPUSTAT_FREE(tcpstat); } VNET_SYSUNINIT(tcp_vnet_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, tcp_vnet_uninit, NULL); #endif /* VIMAGE */ /* * Kernel module interface for updating tcpstat. The argument is an index * into tcpstat treated as an array. */ void kmod_tcpstat_inc(int statnum) { counter_u64_add(VNET(tcpstat)[statnum], 1); } #ifdef TCP_HHOOK /* * Wrapper for the TCP established input helper hook. */ void hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to) { struct tcp_hhook_data hhook_data; if (V_tcp_hhh[HHOOK_TCP_EST_IN]->hhh_nhooks > 0) { hhook_data.tp = tp; hhook_data.th = th; hhook_data.to = to; hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_IN], &hhook_data, tp->osd); } } #endif /* * CC wrapper hook functions */ void cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs, uint16_t type) { INP_WLOCK_ASSERT(tp->t_inpcb); tp->ccv->nsegs = nsegs; tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th); if (tp->snd_cwnd <= tp->snd_wnd) tp->ccv->flags |= CCF_CWND_LIMITED; else tp->ccv->flags &= ~CCF_CWND_LIMITED; if (type == CC_ACK) { if (tp->snd_cwnd > tp->snd_ssthresh) { tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, nsegs * V_tcp_abc_l_var * tcp_maxseg(tp)); if (tp->t_bytes_acked >= tp->snd_cwnd) { tp->t_bytes_acked -= tp->snd_cwnd; tp->ccv->flags |= CCF_ABC_SENTAWND; } } else { tp->ccv->flags &= ~CCF_ABC_SENTAWND; tp->t_bytes_acked = 0; } } if (CC_ALGO(tp)->ack_received != NULL) { /* XXXLAS: Find a way to live without this */ tp->ccv->curack = th->th_ack; CC_ALGO(tp)->ack_received(tp->ccv, type); } } void cc_conn_init(struct tcpcb *tp) { struct hc_metrics_lite metrics; struct inpcb *inp = tp->t_inpcb; u_int maxseg; int rtt; INP_WLOCK_ASSERT(tp->t_inpcb); tcp_hc_get(&inp->inp_inc, &metrics); maxseg = tcp_maxseg(tp); if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { tp->t_srtt = rtt; tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; TCPSTAT_INC(tcps_usedrtt); if (metrics.rmx_rttvar) { tp->t_rttvar = metrics.rmx_rttvar; TCPSTAT_INC(tcps_usedrttvar); } else { /* default variation is +- 1 rtt */ tp->t_rttvar = tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; } TCPT_RANGESET(tp->t_rxtcur, ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, tp->t_rttmin, TCPTV_REXMTMAX); } if (metrics.rmx_ssthresh) { /* * There's some sort of gateway or interface * buffer limit on the path. Use this to set * the slow start threshold, but set the * threshold to no less than 2*mss. */ tp->snd_ssthresh = max(2 * maxseg, metrics.rmx_ssthresh); TCPSTAT_INC(tcps_usedssthresh); } /* * Set the initial slow-start flight size. * * RFC5681 Section 3.1 specifies the default conservative values. * RFC3390 specifies slightly more aggressive values. * RFC6928 increases it to ten segments. * Support for user specified value for initial flight size. * * If a SYN or SYN/ACK was lost and retransmitted, we have to * reduce the initial CWND to one segment as congestion is likely * requiring us to be cautious. */ if (tp->snd_cwnd == 1) tp->snd_cwnd = maxseg; /* SYN(-ACK) lost */ else if (V_tcp_initcwnd_segments) tp->snd_cwnd = min(V_tcp_initcwnd_segments * maxseg, max(2 * maxseg, V_tcp_initcwnd_segments * 1460)); else if (V_tcp_do_rfc3390) tp->snd_cwnd = min(4 * maxseg, max(2 * maxseg, 4380)); else { /* Per RFC5681 Section 3.1 */ if (maxseg > 2190) tp->snd_cwnd = 2 * maxseg; else if (maxseg > 1095) tp->snd_cwnd = 3 * maxseg; else tp->snd_cwnd = 4 * maxseg; } if (CC_ALGO(tp)->conn_init != NULL) CC_ALGO(tp)->conn_init(tp->ccv); } void inline cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) { u_int maxseg; INP_WLOCK_ASSERT(tp->t_inpcb); switch(type) { case CC_NDUPACK: if (!IN_FASTRECOVERY(tp->t_flags)) { tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_ECN_PERMIT) tp->t_flags |= TF_ECN_SND_CWR; } break; case CC_ECN: if (!IN_CONGRECOVERY(tp->t_flags)) { TCPSTAT_INC(tcps_ecn_rcwnd); tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_ECN_PERMIT) tp->t_flags |= TF_ECN_SND_CWR; } break; case CC_RTO: maxseg = tcp_maxseg(tp); tp->t_dupacks = 0; tp->t_bytes_acked = 0; EXIT_RECOVERY(tp->t_flags); if (CC_ALGO(tp)->cong_signal == NULL) { /* * RFC5681 Section 3.1 * ssthresh = max (FlightSize / 2, 2*SMSS) eq (4) */ tp->snd_ssthresh = max((tp->snd_max - tp->snd_una) / 2 / maxseg, 2) * maxseg; tp->snd_cwnd = maxseg; } break; case CC_RTO_ERR: TCPSTAT_INC(tcps_sndrexmitbad); /* RTO was unnecessary, so reset everything. */ tp->snd_cwnd = tp->snd_cwnd_prev; tp->snd_ssthresh = tp->snd_ssthresh_prev; tp->snd_recover = tp->snd_recover_prev; if (tp->t_flags & TF_WASFRECOVERY) ENTER_FASTRECOVERY(tp->t_flags); if (tp->t_flags & TF_WASCRECOVERY) ENTER_CONGRECOVERY(tp->t_flags); tp->snd_nxt = tp->snd_max; tp->t_flags &= ~TF_PREVVALID; tp->t_badrxtwin = 0; break; } if (CC_ALGO(tp)->cong_signal != NULL) { if (th != NULL) tp->ccv->curack = th->th_ack; CC_ALGO(tp)->cong_signal(tp->ccv, type); } } void inline cc_post_recovery(struct tcpcb *tp, struct tcphdr *th) { INP_WLOCK_ASSERT(tp->t_inpcb); /* XXXLAS: KASSERT that we're in recovery? */ if (CC_ALGO(tp)->post_recovery != NULL) { tp->ccv->curack = th->th_ack; CC_ALGO(tp)->post_recovery(tp->ccv); } /* XXXLAS: EXIT_RECOVERY ? */ tp->t_bytes_acked = 0; } -#ifdef TCP_SIGNATURE -static inline int -tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen, - struct tcpopt *to, struct tcphdr *th, u_int tcpbflag) -{ - int ret; - - tcp_fields_to_net(th); - ret = tcp_signature_verify(m, off0, tlen, optlen, to, th, tcpbflag); - tcp_fields_to_host(th); - return (ret); -} -#endif - /* * Indicate whether this ack should be delayed. We can delay the ack if * following conditions are met: * - There is no delayed ack timer in progress. * - Our last ack wasn't a 0-sized window. We never want to delay * the ack that opens up a 0-sized window. * - LRO wasn't used for this segment. We make sure by checking that the * segment size is not larger than the MSS. */ #define DELAY_ACK(tp, tlen) \ ((!tcp_timer_active(tp, TT_DELACK) && \ (tp->t_flags & TF_RXWIN0SENT) == 0) && \ (tlen <= tp->t_maxseg) && \ (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) static void inline cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos) { INP_WLOCK_ASSERT(tp->t_inpcb); if (CC_ALGO(tp)->ecnpkt_handler != NULL) { switch (iptos & IPTOS_ECN_MASK) { case IPTOS_ECN_CE: tp->ccv->flags |= CCF_IPHDR_CE; break; case IPTOS_ECN_ECT0: tp->ccv->flags &= ~CCF_IPHDR_CE; break; case IPTOS_ECN_ECT1: tp->ccv->flags &= ~CCF_IPHDR_CE; break; } if (th->th_flags & TH_CWR) tp->ccv->flags |= CCF_TCPHDR_CWR; else tp->ccv->flags &= ~CCF_TCPHDR_CWR; if (tp->t_flags & TF_DELACK) tp->ccv->flags |= CCF_DELACK; else tp->ccv->flags &= ~CCF_DELACK; CC_ALGO(tp)->ecnpkt_handler(tp->ccv); if (tp->ccv->flags & CCF_ACKNOW) tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); } } /* * TCP input handling is split into multiple parts: * tcp6_input is a thin wrapper around tcp_input for the extended * ip6_protox[] call format in ip6_input * tcp_input handles primary segment validation, inpcb lookup and * SYN processing on listen sockets * tcp_do_segment processes the ACK and text of the segment for * establishing, established and closing connections */ #ifdef INET6 int tcp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct in6_ifaddr *ia6; struct ip6_hdr *ip6; IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE); /* * draft-itojun-ipv6-tcp-to-anycast * better place to put this in? */ ip6 = mtod(m, struct ip6_hdr *); ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) { struct ip6_hdr *ip6; ifa_free(&ia6->ia_ifa); ip6 = mtod(m, struct ip6_hdr *); icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, (caddr_t)&ip6->ip6_dst - (caddr_t)ip6); return (IPPROTO_DONE); } if (ia6) ifa_free(&ia6->ia_ifa); return (tcp_input(mp, offp, proto)); } #endif /* INET6 */ int tcp_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct tcphdr *th = NULL; struct ip *ip = NULL; struct inpcb *inp = NULL; struct tcpcb *tp = NULL; struct socket *so = NULL; u_char *optp = NULL; int off0; int optlen = 0; #ifdef INET int len; #endif int tlen = 0, off; int drop_hdrlen; int thflags; int rstreason = 0; /* For badport_bandlim accounting purposes */ -#ifdef TCP_SIGNATURE - uint8_t sig_checked = 0; -#endif uint8_t iptos; struct m_tag *fwd_tag = NULL; #ifdef INET6 struct ip6_hdr *ip6 = NULL; int isipv6; #else const void *ip6 = NULL; #endif /* INET6 */ struct tcpopt to; /* options in this segment */ char *s = NULL; /* address and port logging */ int ti_locked; #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, * now IPv6. */ u_char tcp_saveipgen[IP6_HDR_LEN]; struct tcphdr tcp_savetcp; short ostate = 0; #endif #ifdef INET6 isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; #endif off0 = *offp; m = *mp; *mp = NULL; to.to_flags = 0; TCPSTAT_INC(tcps_rcvtotal); #ifdef INET6 if (isipv6) { /* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */ if (m->m_len < (sizeof(*ip6) + sizeof(*th))) { m = m_pullup(m, sizeof(*ip6) + sizeof(*th)); if (m == NULL) { TCPSTAT_INC(tcps_rcvshort); return (IPPROTO_DONE); } } ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)((caddr_t)ip6 + off0); tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) th->th_sum = m->m_pkthdr.csum_data; else th->th_sum = in6_cksum_pseudo(ip6, tlen, IPPROTO_TCP, m->m_pkthdr.csum_data); th->th_sum ^= 0xffff; } else th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen); if (th->th_sum) { TCPSTAT_INC(tcps_rcvbadsum); goto drop; } /* * Be proactive about unspecified IPv6 address in source. * As we use all-zero to indicate unbounded/unconnected pcb, * unspecified IPv6 address can be used to confuse us. * * Note that packets with unspecified IPv6 destination is * already dropped in ip6_input. */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { /* XXX stat */ goto drop; } iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { /* * Get IP and TCP header together in first mbuf. * Note: IP leaves IP header in first mbuf. */ if (off0 > sizeof (struct ip)) { ip_stripoptions(m); off0 = sizeof(struct ip); } if (m->m_len < sizeof (struct tcpiphdr)) { if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == NULL) { TCPSTAT_INC(tcps_rcvshort); return (IPPROTO_DONE); } } ip = mtod(m, struct ip *); th = (struct tcphdr *)((caddr_t)ip + off0); tlen = ntohs(ip->ip_len) - off0; iptos = ip->ip_tos; if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) th->th_sum = m->m_pkthdr.csum_data; else th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl(m->m_pkthdr.csum_data + tlen + IPPROTO_TCP)); th->th_sum ^= 0xffff; } else { struct ipovly *ipov = (struct ipovly *)ip; /* * Checksum extended TCP header and data. */ len = off0 + tlen; bzero(ipov->ih_x1, sizeof(ipov->ih_x1)); ipov->ih_len = htons(tlen); th->th_sum = in_cksum(m, len); /* Reset length for SDT probes. */ ip->ip_len = htons(len); /* Reset TOS bits */ ip->ip_tos = iptos; /* Re-initialization for later version check */ ip->ip_v = IPVERSION; } if (th->th_sum) { TCPSTAT_INC(tcps_rcvbadsum); goto drop; } } #endif /* INET */ /* * Check that TCP offset makes sense, * pull out TCP options and adjust length. XXX */ off = th->th_off << 2; if (off < sizeof (struct tcphdr) || off > tlen) { TCPSTAT_INC(tcps_rcvbadoff); goto drop; } tlen -= off; /* tlen is used instead of ti->ti_len */ if (off > sizeof (struct tcphdr)) { #ifdef INET6 if (isipv6) { IP6_EXTHDR_CHECK(m, off0, off, IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)((caddr_t)ip6 + off0); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { if (m->m_len < sizeof(struct ip) + off) { if ((m = m_pullup(m, sizeof (struct ip) + off)) == NULL) { TCPSTAT_INC(tcps_rcvshort); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); th = (struct tcphdr *)((caddr_t)ip + off0); } } #endif optlen = off - sizeof (struct tcphdr); optp = (u_char *)(th + 1); } thflags = th->th_flags; /* * Convert TCP protocol specific fields to host format. */ tcp_fields_to_host(th); /* * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options. */ drop_hdrlen = off0 + off; /* * Locate pcb for segment; if we're likely to add or remove a * connection then first acquire pcbinfo lock. There are three cases * where we might discover later we need a write lock despite the * flags: ACKs moving a connection out of the syncache, ACKs for a * connection in TIMEWAIT and SYNs not targeting a listening socket. */ if ((thflags & (TH_FIN | TH_RST)) != 0) { INP_INFO_RLOCK(&V_tcbinfo); ti_locked = TI_RLOCKED; } else ti_locked = TI_UNLOCKED; /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ if ( #ifdef INET6 (isipv6 && (m->m_flags & M_IP6_NEXTHOP)) #ifdef INET || (!isipv6 && (m->m_flags & M_IP_NEXTHOP)) #endif #endif #if defined(INET) && !defined(INET6) (m->m_flags & M_IP_NEXTHOP) #endif ) fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); findpcb: #ifdef INVARIANTS if (ti_locked == TI_RLOCKED) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } #endif #ifdef INET6 if (isipv6 && fwd_tag != NULL) { struct sockaddr_in6 *next_hop6; next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1); /* * Transparently forwarded. Pretend to be the destination. * Already got one like this? */ inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m); if (!inp) { /* * It's new. Try to find the ambushing socket. * Because we've rewritten the destination address, * any hardware-generated hash is ignored. */ inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src, th->th_sport, &next_hop6->sin6_addr, next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) : th->th_dport, INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif); } } else if (isipv6) { inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m); } #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET if (fwd_tag != NULL) { struct sockaddr_in *next_hop; next_hop = (struct sockaddr_in *)(fwd_tag+1); /* * Transparently forwarded. Pretend to be the destination. * already got one like this? */ inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport, ip->ip_dst, th->th_dport, INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m); if (!inp) { /* * It's new. Try to find the ambushing socket. * Because we've rewritten the destination address, * any hardware-generated hash is ignored. */ inp = in_pcblookup(&V_tcbinfo, ip->ip_src, th->th_sport, next_hop->sin_addr, next_hop->sin_port ? ntohs(next_hop->sin_port) : th->th_dport, INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif); } } else inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport, ip->ip_dst, th->th_dport, INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m); #endif /* INET */ /* * If the INPCB does not exist then all data in the incoming * segment is discarded and an appropriate RST is sent back. * XXX MRT Send RST using which routing table? */ if (inp == NULL) { /* * Log communication attempts to ports that are not * in use. */ if ((tcp_log_in_vain == 1 && (thflags & TH_SYN)) || tcp_log_in_vain == 2) { if ((s = tcp_log_vain(NULL, th, (void *)ip, ip6))) log(LOG_INFO, "%s; %s: Connection attempt " "to closed port\n", s, __func__); } /* * When blackholing do not respond with a RST but * completely ignore the segment and drop it. */ if ((V_blackhole == 1 && (thflags & TH_SYN)) || V_blackhole == 2) goto dropunlock; rstreason = BANDLIM_RST_CLOSEDPORT; goto dropwithreset; } INP_WLOCK_ASSERT(inp); /* * While waiting for inp lock during the lookup, another thread * can have dropped the inpcb, in which case we need to loop back * and try to find a new inpcb to deliver to. */ if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); inp = NULL; goto findpcb; } if ((inp->inp_flowtype == M_HASHTYPE_NONE) && (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) && ((inp->inp_socket == NULL) || (inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) { inp->inp_flowid = m->m_pkthdr.flowid; inp->inp_flowtype = M_HASHTYPE_GET(m); } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) #ifdef INET6 - if (isipv6 && ipsec6_in_reject(m, inp)) { + if (isipv6 && IPSEC_ENABLED(ipv6) && + IPSEC_CHECK_POLICY(ipv6, m, inp) != 0) { goto dropunlock; - } else + } +#ifdef INET + else +#endif #endif /* INET6 */ - if (ipsec4_in_reject(m, inp) != 0) { +#ifdef INET + if (IPSEC_ENABLED(ipv4) && + IPSEC_CHECK_POLICY(ipv4, m, inp) != 0) { goto dropunlock; } +#endif /* INET */ #endif /* IPSEC */ /* * Check the minimum TTL for socket. */ if (inp->inp_ip_minttl != 0) { #ifdef INET6 if (isipv6) { if (inp->inp_ip_minttl > ip6->ip6_hlim) goto dropunlock; } else #endif if (inp->inp_ip_minttl > ip->ip_ttl) goto dropunlock; } /* * A previous connection in TIMEWAIT state is supposed to catch stray * or duplicate segments arriving late. If this segment was a * legitimate new connection attempt, the old INPCB gets removed and * we can try again to find a listening socket. * * At this point, due to earlier optimism, we may hold only an inpcb * lock, and not the inpcbinfo write lock. If so, we need to try to * acquire it, or if that fails, acquire a reference on the inpcb, * drop all locks, acquire a global write lock, and then re-acquire * the inpcb lock. We may at that point discover that another thread * has tried to free the inpcb, in which case we need to loop back * and try to find a new inpcb to deliver to. * * XXXRW: It may be time to rethink timewait locking. */ relocked: if (inp->inp_flags & INP_TIMEWAIT) { if (ti_locked == TI_UNLOCKED) { if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); INP_WUNLOCK(inp); INP_INFO_RLOCK(&V_tcbinfo); ti_locked = TI_RLOCKED; INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) { inp = NULL; goto findpcb; } else if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); inp = NULL; goto findpcb; } } else ti_locked = TI_RLOCKED; } INP_INFO_RLOCK_ASSERT(&V_tcbinfo); if (thflags & TH_SYN) tcp_dooptions(&to, optp, optlen, TO_SYN); /* * NB: tcp_twcheck unlocks the INP and frees the mbuf. */ if (tcp_twcheck(inp, &to, th, m, tlen)) goto findpcb; INP_INFO_RUNLOCK(&V_tcbinfo); return (IPPROTO_DONE); } /* * The TCPCB may no longer exist if the connection is winding * down or it is in the CLOSED state. Either way we drop the * segment and send an appropriate response. */ tp = intotcpcb(inp); if (tp == NULL || tp->t_state == TCPS_CLOSED) { rstreason = BANDLIM_RST_CLOSEDPORT; goto dropwithreset; } #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) { tcp_offload_input(tp, m); m = NULL; /* consumed by the TOE driver */ goto dropunlock; } #endif /* * We've identified a valid inpcb, but it could be that we need an * inpcbinfo write lock but don't hold it. In this case, attempt to * acquire using the same strategy as the TIMEWAIT case above. If we * relock, we have to jump back to 'relocked' as the connection might * now be in TIMEWAIT. */ #ifdef INVARIANTS if ((thflags & (TH_FIN | TH_RST)) != 0) INP_INFO_RLOCK_ASSERT(&V_tcbinfo); #endif if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) || (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) && !IS_FASTOPEN(tp->t_flags)))) { if (ti_locked == TI_UNLOCKED) { if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); INP_WUNLOCK(inp); INP_INFO_RLOCK(&V_tcbinfo); ti_locked = TI_RLOCKED; INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) { inp = NULL; goto findpcb; } else if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); inp = NULL; goto findpcb; } goto relocked; } else ti_locked = TI_RLOCKED; } INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } #ifdef MAC INP_WLOCK_ASSERT(inp); if (mac_inpcb_check_deliver(inp, m)) goto dropunlock; #endif so = inp->inp_socket; KASSERT(so != NULL, ("%s: so == NULL", __func__)); #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) { ostate = tp->t_state; #ifdef INET6 if (isipv6) { bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6)); } else #endif bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip)); tcp_savetcp = *th; } #endif /* TCPDEBUG */ /* * When the socket is accepting connections (the INPCB is in LISTEN * state) we look into the SYN cache if this is a new connection * attempt or the completion of a previous one. */ KASSERT(tp->t_state == TCPS_LISTEN || !(so->so_options & SO_ACCEPTCONN), ("%s: so accepting but tp %p not listening", __func__, tp)); if (tp->t_state == TCPS_LISTEN && (so->so_options & SO_ACCEPTCONN)) { struct in_conninfo inc; bzero(&inc, sizeof(inc)); #ifdef INET6 if (isipv6) { inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = ip6->ip6_src; inc.inc6_laddr = ip6->ip6_dst; } else #endif { inc.inc_faddr = ip->ip_src; inc.inc_laddr = ip->ip_dst; } inc.inc_fport = th->th_sport; inc.inc_lport = th->th_dport; inc.inc_fibnum = so->so_fibnum; /* * Check for an existing connection attempt in syncache if * the flag is only ACK. A successful lookup creates a new * socket appended to the listen queue in SYN_RECEIVED state. */ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* * Parse the TCP options here because * syncookies need access to the reflected * timestamp. */ tcp_dooptions(&to, optp, optlen, 0); /* * NB: syncache_expand() doesn't unlock * inp and tcpinfo locks. */ - if (!syncache_expand(&inc, &to, th, &so, m)) { + rstreason = syncache_expand(&inc, &to, th, &so, m); + if (rstreason < 0) { + /* + * A failing TCP MD5 signature comparison + * must result in the segment being dropped + * and must not produce any response back + * to the sender. + */ + goto dropunlock; + } else if (rstreason == 0) { /* * No syncache entry or ACK was not * for our SYN/ACK. Send a RST. * NB: syncache did its own logging * of the failure cause. */ rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } #ifdef TCP_RFC7413 tfo_socket_result: #endif if (so == NULL) { /* * We completed the 3-way handshake * but could not allocate a socket * either due to memory shortage, * listen queue length limits or * global socket limits. Send RST * or wait and have the remote end * retransmit the ACK for another * try. */ if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Socket allocation failed due to " "limits or memory shortage, %s\n", s, __func__, V_tcp_sc_rst_sock_fail ? "sending RST" : "try again"); if (V_tcp_sc_rst_sock_fail) { rstreason = BANDLIM_UNLIMITED; goto dropwithreset; } else goto dropunlock; } /* * Socket is created in state SYN_RECEIVED. * Unlock the listen socket, lock the newly * created socket and update the tp variable. */ INP_WUNLOCK(inp); /* listen socket */ inp = sotoinpcb(so); /* * New connection inpcb is already locked by * syncache_expand(). */ INP_WLOCK_ASSERT(inp); tp = intotcpcb(inp); KASSERT(tp->t_state == TCPS_SYN_RECEIVED, ("%s: ", __func__)); -#ifdef TCP_SIGNATURE - if (sig_checked == 0) { - tcp_dooptions(&to, optp, optlen, - (thflags & TH_SYN) ? TO_SYN : 0); - if (!tcp_signature_verify_input(m, off0, tlen, - optlen, &to, th, tp->t_flags)) { - - /* - * In SYN_SENT state if it receives an - * RST, it is allowed for further - * processing. - */ - if ((thflags & TH_RST) == 0 || - (tp->t_state == TCPS_SYN_SENT) == 0) - goto dropunlock; - } - sig_checked = 1; - } -#endif - /* * Process the segment and the data it * contains. tcp_do_segment() consumes * the mbuf chain and unlocks the inpcb. */ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return (IPPROTO_DONE); } /* * Segment flag validation for new connection attempts: * * Our (SYN|ACK) response was rejected. * Check with syncache and remove entry to prevent * retransmits. * * NB: syncache_chkrst does its own logging of failure * causes. */ if (thflags & TH_RST) { syncache_chkrst(&inc, th); goto dropunlock; } /* * We can't do anything without SYN. */ if ((thflags & TH_SYN) == 0) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "SYN is missing, segment ignored\n", s, __func__); TCPSTAT_INC(tcps_badsyn); goto dropunlock; } /* * (SYN|ACK) is bogus on a listen socket. */ if (thflags & TH_ACK) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "SYN|ACK invalid, segment rejected\n", s, __func__); syncache_badack(&inc); /* XXX: Not needed! */ TCPSTAT_INC(tcps_badsyn); rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } /* * If the drop_synfin option is enabled, drop all * segments with both the SYN and FIN bits set. * This prevents e.g. nmap from identifying the * TCP/IP stack. * XXX: Poor reasoning. nmap has other methods * and is constantly refining its stack detection * strategies. * XXX: This is a violation of the TCP specification * and was used by RFC1644. */ if ((thflags & TH_FIN) && V_drop_synfin) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "SYN|FIN segment ignored (based on " "sysctl setting)\n", s, __func__); TCPSTAT_INC(tcps_badsyn); goto dropunlock; } /* * Segment's flags are (SYN) or (SYN|FIN). * * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored * as they do not affect the state of the TCP FSM. * The data pointed to by TH_URG and th_urp is ignored. */ KASSERT((thflags & (TH_RST|TH_ACK)) == 0, ("%s: Listen socket: TH_RST or TH_ACK set", __func__)); KASSERT(thflags & (TH_SYN), ("%s: Listen socket: TH_SYN not set", __func__)); #ifdef INET6 /* * If deprecated address is forbidden, * we do not accept SYN to deprecated interface * address to prevent any new inbound connection from * getting established. * When we do not accept SYN, we send a TCP RST, * with deprecated source address (instead of dropping * it). We compromise it as it is much better for peer * to send a RST, and RST will be the final packet * for the exchange. * * If we do not forbid deprecated addresses, we accept * the SYN packet. RFC2462 does not suggest dropping * SYN in this case. * If we decipher RFC2462 5.5.4, it says like this: * 1. use of deprecated addr with existing * communication is okay - "SHOULD continue to be * used" * 2. use of it with new communication: * (2a) "SHOULD NOT be used if alternate address * with sufficient scope is available" * (2b) nothing mentioned otherwise. * Here we fall into (2b) case as we have no choice in * our source address selection - we must obey the peer. * * The wording in RFC2462 is confusing, and there are * multiple description text for deprecated address * handling - worse, they are not exactly the same. * I believe 5.5.4 is the best one, so we follow 5.5.4. */ if (isipv6 && !V_ip6_use_deprecated) { struct in6_ifaddr *ia6; ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia6 != NULL && (ia6->ia6_flags & IN6_IFF_DEPRECATED)) { ifa_free(&ia6->ia_ifa); if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt to deprecated " "IPv6 address rejected\n", s, __func__); rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } if (ia6) ifa_free(&ia6->ia_ifa); } #endif /* INET6 */ /* * Basic sanity checks on incoming SYN requests: * Don't respond if the destination is a link layer * broadcast according to RFC1122 4.2.3.10, p. 104. * If it is from this socket it must be forged. * Don't respond if the source or destination is a * global or subnet broad- or multicast address. * Note that it is quite possible to receive unicast * link-layer packets with a broadcast IP address. Use * in_broadcast() to find them. */ if (m->m_flags & (M_BCAST|M_MCAST)) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt from broad- or multicast " "link layer address ignored\n", s, __func__); goto dropunlock; } #ifdef INET6 if (isipv6) { if (th->th_dport == th->th_sport && IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt to/from self " "ignored\n", s, __func__); goto dropunlock; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt from/to multicast " "address ignored\n", s, __func__); goto dropunlock; } } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { if (th->th_dport == th->th_sport && ip->ip_dst.s_addr == ip->ip_src.s_addr) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt from/to self " "ignored\n", s, __func__); goto dropunlock; } if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt from/to broad- " "or multicast address ignored\n", s, __func__); goto dropunlock; } } #endif /* * SYN appears to be valid. Create compressed TCP state * for syncache. */ #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); tcp_dooptions(&to, optp, optlen, TO_SYN); #ifdef TCP_RFC7413 if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL)) goto tfo_socket_result; #else syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL); #endif /* * Entry added to syncache and mbuf consumed. * Only the listen socket is unlocked by syncache_add(). */ if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return (IPPROTO_DONE); } else if (tp->t_state == TCPS_LISTEN) { /* * When a listen socket is torn down the SO_ACCEPTCONN * flag is removed first while connections are drained * from the accept queue in a unlock/lock cycle of the * ACCEPT_LOCK, opening a race condition allowing a SYN * attempt go through unhandled. */ goto dropunlock; } - -#ifdef TCP_SIGNATURE - if (sig_checked == 0) { - tcp_dooptions(&to, optp, optlen, - (thflags & TH_SYN) ? TO_SYN : 0); - if (!tcp_signature_verify_input(m, off0, tlen, optlen, &to, - th, tp->t_flags)) { - - /* - * In SYN_SENT state if it receives an RST, it is - * allowed for further processing. - */ - if ((thflags & TH_RST) == 0 || - (tp->t_state == TCPS_SYN_SENT) == 0) - goto dropunlock; +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (tp->t_flags & TF_SIGNATURE) { + tcp_dooptions(&to, optp, optlen, thflags); + if ((to.to_flags & TOF_SIGNATURE) == 0) { + TCPSTAT_INC(tcps_sig_err_nosigopt); + goto dropunlock; } - sig_checked = 1; + if (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to.to_signature) != 0) + goto dropunlock; } #endif - TCP_PROBE5(receive, NULL, tp, m, tp, th); /* * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later * state. tcp_do_segment() always consumes the mbuf chain, unlocks * the inpcb, and unlocks pcbinfo. */ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return (IPPROTO_DONE); dropwithreset: TCP_PROBE5(receive, NULL, tp, m, tp, th); if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset " "ti_locked: %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } #endif if (inp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); INP_WUNLOCK(inp); } else tcp_dropwithreset(m, th, NULL, tlen, rstreason); m = NULL; /* mbuf chain got consumed. */ goto drop; dropunlock: if (m != NULL) TCP_PROBE5(receive, NULL, tp, m, tp, th); if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock " "ti_locked: %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } #endif if (inp != NULL) INP_WUNLOCK(inp); drop: INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); if (s != NULL) free(s, M_TCPLOG); if (m != NULL) m_freem(m); return (IPPROTO_DONE); } void tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, int ti_locked) { int thflags, acked, ourfinisacked, needoutput = 0, sack_changed; int rstreason, todrop, win; uint32_t tiwin; uint16_t nsegs; char *s; struct in_conninfo *inc; struct mbuf *mfree; struct tcpopt to; #ifdef TCP_RFC7413 int tfo_syn; #endif #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, * now IPv6. */ u_char tcp_saveipgen[IP6_HDR_LEN]; struct tcphdr tcp_savetcp; short ostate = 0; #endif thflags = th->th_flags; inc = &tp->t_inpcb->inp_inc; tp->sackhint.last_sack_ack = 0; sack_changed = 0; nsegs = max(1, m->m_pkthdr.lro_nsegs); /* * If this is either a state-changing packet or current state isn't * established, we require a write lock on tcbinfo. Otherwise, we * allow the tcbinfo to be in either alocked or unlocked, as the * caller may have unnecessarily acquired a write lock due to a race. */ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || tp->t_state != TCPS_ESTABLISHED) { KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " "SYN/FIN/RST/!EST", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { #ifdef INVARIANTS if (ti_locked == TI_RLOCKED) INP_INFO_RLOCK_ASSERT(&V_tcbinfo); else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " "ti_locked: %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } #endif } INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", __func__)); KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", __func__)); #ifdef TCPPCAP /* Save segment, if requested. */ tcp_pcap_add(th, m, &(tp->t_inpkts)); #endif /* * Segment received on connection. * Reset idle time and keep-alive timer. * XXX: This should be done after segment * validation to ignore broken/spoofed segs. */ tp->t_rcvtime = ticks; /* * Scale up the window into a 32-bit value. * For the SYN_SENT state the scale is zero. */ tiwin = th->th_win << tp->snd_scale; /* * TCP ECN processing. */ if (tp->t_flags & TF_ECN_PERMIT) { if (thflags & TH_CWR) tp->t_flags &= ~TF_ECN_SND_ECE; switch (iptos & IPTOS_ECN_MASK) { case IPTOS_ECN_CE: tp->t_flags |= TF_ECN_SND_ECE; TCPSTAT_INC(tcps_ecn_ce); break; case IPTOS_ECN_ECT0: TCPSTAT_INC(tcps_ecn_ect0); break; case IPTOS_ECN_ECT1: TCPSTAT_INC(tcps_ecn_ect1); break; } /* Process a packet differently from RFC3168. */ cc_ecnpkt_handler(tp, th, iptos); /* Congestion experienced. */ if (thflags & TH_ECE) { cc_cong_signal(tp, th, CC_ECN); } } /* * Parse options on any incoming segment. */ tcp_dooptions(&to, (u_char *)(th + 1), (th->th_off << 2) - sizeof(struct tcphdr), (thflags & TH_SYN) ? TO_SYN : 0); +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if ((tp->t_flags & TF_SIGNATURE) != 0 && + (to.to_flags & TOF_SIGNATURE) == 0) { + TCPSTAT_INC(tcps_sig_err_sigopt); + /* XXX: should drop? */ + } +#endif /* * If echoed timestamp is later than the current time, * fall back to non RFC1323 RTT calculation. Normalize * timestamp if syncookies were used when this connection * was established. */ if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) { to.to_tsecr -= tp->ts_offset; if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks())) to.to_tsecr = 0; } /* * If timestamps were negotiated during SYN/ACK they should * appear on every segment during this session and vice versa. */ if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Timestamp missing, " "no action\n", s, __func__); free(s, M_TCPLOG); } } if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Timestamp not expected, " "no action\n", s, __func__); free(s, M_TCPLOG); } } /* * Process options only when we get SYN/ACK back. The SYN case * for incoming connections is handled in tcp_syncache. * According to RFC1323 the window field in a SYN (i.e., a * or ) segment itself is never scaled. * XXX this is traditional behavior, may need to be cleaned up. */ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; tp->snd_scale = to.to_wscale; } /* * Initial send window. It will be updated with * the next incoming segment to the scaled value. */ tp->snd_wnd = th->th_win; if (to.to_flags & TOF_TS) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; tp->ts_recent_age = tcp_ts_getticks(); } if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss); if ((tp->t_flags & TF_SACK_PERMIT) && (to.to_flags & TOF_SACKPERM) == 0) tp->t_flags &= ~TF_SACK_PERMIT; } /* * Header prediction: check for the two common cases * of a uni-directional data xfer. If the packet has * no control flags, is in-sequence, the window didn't * change and we're not retransmitting, it's a * candidate. If the length is zero and the ack moved * forward, we're the sender side of the xfer. Just * free the data acked & wake any higher level process * that was blocked waiting for space. If the length * is non-zero and the ack didn't move, we're the * receiver side. If we're getting packets in-order * (the reassembly queue is empty), add the data to * the socket buffer and note that we need a delayed ack. * Make sure that the hidden state-flags are also off. * Since we check for TCPS_ESTABLISHED first, it can only * be TH_NEEDSYN. */ if (tp->t_state == TCPS_ESTABLISHED && th->th_seq == tp->rcv_nxt && (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && tp->snd_nxt == tp->snd_max && tiwin && tiwin == tp->snd_wnd && ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && LIST_EMPTY(&tp->t_segq) && ((to.to_flags & TOF_TS) == 0 || TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) { /* * If last ACK falls within this segment's sequence numbers, * record the timestamp. * NOTE that the test is modified according to the latest * proposal of the tcplw@cray.com list (Braden 1993/04/26). */ if ((to.to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to.to_tsval; } if (tlen == 0) { if (SEQ_GT(th->th_ack, tp->snd_una) && SEQ_LEQ(th->th_ack, tp->snd_max) && !IN_RECOVERY(tp->t_flags) && (to.to_flags & TOF_SACK) == 0 && TAILQ_EMPTY(&tp->snd_holes)) { /* * This is a pure ack for outstanding data. */ if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; TCPSTAT_INC(tcps_predack); /* * "bad retransmit" recovery. */ if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && (int)(ticks - tp->t_badrxtwin) < 0) { cc_cong_signal(tp, th, CC_RTO_ERR); } /* * Recalculate the transmit timer / rtt. * * Some boxes send broken timestamp replies * during the SYN+ACK phase, ignore * timestamps of 0 or we could calculate a * huge RTT and blow up the retransmit timer. */ if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) { uint32_t t; t = tcp_ts_getticks() - to.to_tsecr; if (!tp->t_rttlow || tp->t_rttlow > t) tp->t_rttlow = t; tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1); } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) tp->t_rttlow = ticks - tp->t_rtttime; tcp_xmit_timer(tp, ticks - tp->t_rtttime); } acked = BYTES_THIS_ACK(tp, th); #ifdef TCP_HHOOK /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ hhook_run_tcp_est_in(tp, th, &to); #endif TCPSTAT_ADD(tcps_rcvackpack, nsegs); TCPSTAT_ADD(tcps_rcvackbyte, acked); sbdrop(&so->so_snd, acked); if (SEQ_GT(tp->snd_una, tp->snd_recover) && SEQ_LEQ(th->th_ack, tp->snd_recover)) tp->snd_recover = th->th_ack - 1; /* * Let the congestion control algorithm update * congestion control related information. This * typically means increasing the congestion * window. */ cc_ack_received(tp, th, nsegs, CC_ACK); tp->snd_una = th->th_ack; /* * Pull snd_wl2 up to prevent seq wrap relative * to th_ack. */ tp->snd_wl2 = th->th_ack; tp->t_dupacks = 0; m_freem(m); /* * If all outstanding data are acked, stop * retransmit timer, otherwise restart timer * using current (possibly backed-off) value. * If process is waiting for space, * wakeup/selwakeup/signal. If data * are ready to send, let tcp_output * decide between more output or persist. */ #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); if (tp->snd_una == tp->snd_max) tcp_timer_activate(tp, TT_REXMT, 0); else if (!tcp_timer_active(tp, TT_PERSIST)) tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); sowwakeup(so); if (sbavail(&so->so_snd)) (void) tp->t_fb->tfb_tcp_output(tp); goto check_delack; } } else if (th->th_ack == tp->snd_una && tlen <= sbspace(&so->so_rcv)) { int newsize = 0; /* automatic sockbuf scaling */ /* * This is a pure, in-sequence data packet with * nothing on the reassembly queue and we have enough * buffer space to take it. */ if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; /* Clean receiver SACK report if present */ if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) tcp_clean_sackreport(tp); TCPSTAT_INC(tcps_preddat); tp->rcv_nxt += tlen; /* * Pull snd_wl1 up to prevent seq wrap relative to * th_seq. */ tp->snd_wl1 = th->th_seq; /* * Pull rcv_up up to prevent seq wrap relative to * rcv_nxt. */ tp->rcv_up = tp->rcv_nxt; TCPSTAT_ADD(tcps_rcvpack, nsegs); TCPSTAT_ADD(tcps_rcvbyte, tlen); #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); /* * Automatic sizing of receive socket buffer. Often the send * buffer size is not optimally adjusted to the actual network * conditions at hand (delay bandwidth product). Setting the * buffer size too small limits throughput on links with high * bandwidth and high delay (eg. trans-continental/oceanic links). * * On the receive side the socket buffer memory is only rarely * used to any significant extent. This allows us to be much * more aggressive in scaling the receive socket buffer. For * the case that the buffer space is actually used to a large * extent and we run out of kernel memory we can simply drop * the new segments; TCP on the sender will just retransmit it * later. Setting the buffer size too big may only consume too * much kernel memory if the application doesn't read() from * the socket or packet loss or reordering makes use of the * reassembly queue. * * The criteria to step up the receive buffer one notch are: * 1. Application has not set receive buffer size with * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE. * 2. the number of bytes received during the time it takes * one timestamp to be reflected back to us (the RTT); * 3. received bytes per RTT is within seven eighth of the * current socket buffer size; * 4. receive buffer size has not hit maximal automatic size; * * This algorithm does one step per RTT at most and only if * we receive a bulk stream w/o packet losses or reorderings. * Shrinking the buffer during idle times is not necessary as * it doesn't consume any memory when idle. * * TODO: Only step up if the application is actually serving * the buffer to better manage the socket buffer resources. */ if (V_tcp_do_autorcvbuf && (to.to_flags & TOF_TS) && to.to_tsecr && (so->so_rcv.sb_flags & SB_AUTOSIZE)) { if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) && to.to_tsecr - tp->rfbuf_ts < hz) { if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) && so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) { newsize = min(so->so_rcv.sb_hiwat + V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); } /* Start over with next RTT. */ tp->rfbuf_ts = 0; tp->rfbuf_cnt = 0; } else tp->rfbuf_cnt += tlen; /* add up */ } /* Add data to socket buffer. */ SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { m_freem(m); } else { /* * Set new socket buffer size. * Give up when limit is reached. */ if (newsize) if (!sbreserve_locked(&so->so_rcv, newsize, so, NULL)) so->so_rcv.sb_flags &= ~SB_AUTOSIZE; m_adj(m, drop_hdrlen); /* delayed header drop */ sbappendstream_locked(&so->so_rcv, m, 0); } /* NB: sorwakeup_locked() does an implicit unlock. */ sorwakeup_locked(so); if (DELAY_ACK(tp, tlen)) { tp->t_flags |= TF_DELACK; } else { tp->t_flags |= TF_ACKNOW; tp->t_fb->tfb_tcp_output(tp); } goto check_delack; } } /* * Calculate amount of space in receive window, * and then do TCP input processing. * Receive window is amount of space in rcv queue, * but not less than advertised window. */ win = sbspace(&so->so_rcv); if (win < 0) win = 0; tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); /* Reset receive buffer auto scaling when not in bulk receive mode. */ tp->rfbuf_ts = 0; tp->rfbuf_cnt = 0; switch (tp->t_state) { /* * If the state is SYN_RECEIVED: * if seg contains an ACK, but not for our SYN/ACK, send a RST. */ case TCPS_SYN_RECEIVED: if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } #ifdef TCP_RFC7413 if (IS_FASTOPEN(tp->t_flags)) { /* * When a TFO connection is in SYN_RECEIVED, the * only valid packets are the initial SYN, a * retransmit/copy of the initial SYN (possibly with * a subset of the original data), a valid ACK, a * FIN, or a RST. */ if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) { rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } else if (thflags & TH_SYN) { /* non-initial SYN is ignored */ if ((tcp_timer_active(tp, TT_DELACK) || tcp_timer_active(tp, TT_REXMT))) goto drop; } else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) { goto drop; } } #endif break; /* * If the state is SYN_SENT: * if seg contains an ACK, but not for our SYN, drop the input. * if seg contains a RST, then drop the connection. * if seg does not contain SYN, then drop it. * Otherwise this is an acceptable SYN segment * initialize tp->rcv_nxt and tp->irs * if seg contains ack then advance tp->snd_una * if seg contains an ECE and ECN support is enabled, the stream * is ECN capable. * if SYN has been acked change to ESTABLISHED else SYN_RCVD state * arrange for segment to be acked (eventually) * continue processing rest of data/controls, beginning with URG */ case TCPS_SYN_SENT: if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { rstreason = BANDLIM_UNLIMITED; goto dropwithreset; } if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) { TCP_PROBE5(connect__refused, NULL, tp, m, tp, th); tp = tcp_drop(tp, ECONNREFUSED); } if (thflags & TH_RST) goto drop; if (!(thflags & TH_SYN)) goto drop; tp->irs = th->th_seq; tcp_rcvseqinit(tp); if (thflags & TH_ACK) { TCPSTAT_INC(tcps_connects); soisconnected(so); #ifdef MAC mac_socketpeer_set_from_mbuf(m, so); #endif /* Do window scaling on this connection? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; } tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN << tp->rcv_scale); tp->snd_una++; /* SYN is acked */ /* * If there's data, delay ACK; if there's also a FIN * ACKNOW will be turned on later. */ if (DELAY_ACK(tp, tlen) && tlen != 0) tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); else tp->t_flags |= TF_ACKNOW; if ((thflags & TH_ECE) && V_tcp_do_ecn) { tp->t_flags |= TF_ECN_PERMIT; TCPSTAT_INC(tcps_ecn_shs); } /* * Received in SYN_SENT[*] state. * Transitions: * SYN_SENT --> ESTABLISHED * SYN_SENT* --> FIN_WAIT_1 */ tp->t_starttime = ticks; if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; thflags &= ~TH_SYN; } else { tcp_state_change(tp, TCPS_ESTABLISHED); TCP_PROBE5(connect__established, NULL, tp, m, tp, th); cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } } else { /* * Received initial SYN in SYN-SENT[*] state => * simultaneous open. * If it succeeds, connection is * half-synchronized. * Otherwise, do 3-way handshake: * SYN-SENT -> SYN-RECEIVED * SYN-SENT* -> SYN-RECEIVED* */ tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); tcp_timer_activate(tp, TT_REXMT, 0); tcp_state_change(tp, TCPS_SYN_RECEIVED); } KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: " "ti_locked %d", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); /* * Advance th->th_seq to correspond to first data byte. * If data, trim to stay within window, * dropping FIN if necessary. */ th->th_seq++; if (tlen > tp->rcv_wnd) { todrop = tlen - tp->rcv_wnd; m_adj(m, -todrop); tlen = tp->rcv_wnd; thflags &= ~TH_FIN; TCPSTAT_INC(tcps_rcvpackafterwin); TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop); } tp->snd_wl1 = th->th_seq - 1; tp->rcv_up = th->th_seq; /* * Client side of transaction: already sent SYN and data. * If the remote host used T/TCP to validate the SYN, * our data will be ACK'd; if so, enter normal data segment * processing in the middle of step 5, ack processing. * Otherwise, goto step 6. */ if (thflags & TH_ACK) goto process_ACK; goto step6; /* * If the state is LAST_ACK or CLOSING or TIME_WAIT: * do normal processing. * * NB: Leftover from RFC1644 T/TCP. Cases to be reused later. */ case TCPS_LAST_ACK: case TCPS_CLOSING: break; /* continue normal processing */ } /* * States other than LISTEN or SYN_SENT. * First check the RST flag and sequence number since reset segments * are exempt from the timestamp and connection count tests. This * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix * below which allowed reset segments in half the sequence space * to fall though and be processed (which gives forged reset * segments with a random sequence number a 50 percent chance of * killing a connection). * Then check timestamp, if present. * Then check the connection count, if present. * Then check that at least some bytes of segment are within * receive window. If segment begins before rcv_nxt, * drop leading data (and SYN); if nothing left, just ack. */ if (thflags & TH_RST) { /* * RFC5961 Section 3.2 * * - RST drops connection only if SEG.SEQ == RCV.NXT. * - If RST is in window, we send challenge ACK. * * Note: to take into account delayed ACKs, we should * test against last_ack_sent instead of rcv_nxt. * Note 2: we handle special case of closed window, not * covered by the RFC. */ if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) && SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); KASSERT(ti_locked == TI_RLOCKED, ("%s: TH_RST ti_locked %d, th %p tp %p", __func__, ti_locked, th, tp)); KASSERT(tp->t_state != TCPS_SYN_SENT, ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", __func__, th, tp)); if (V_tcp_insecure_rst || tp->last_ack_sent == th->th_seq) { TCPSTAT_INC(tcps_drops); /* Drop the connection. */ switch (tp->t_state) { case TCPS_SYN_RECEIVED: so->so_error = ECONNREFUSED; goto close; case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: case TCPS_FIN_WAIT_2: case TCPS_CLOSE_WAIT: case TCPS_CLOSING: case TCPS_LAST_ACK: so->so_error = ECONNRESET; close: /* FALLTHROUGH */ default: tp = tcp_close(tp); } } else { TCPSTAT_INC(tcps_badrst); /* Send challenge ACK. */ tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt, tp->snd_nxt, TH_ACK); tp->last_ack_sent = tp->rcv_nxt; m = NULL; } } goto drop; } /* * RFC5961 Section 4.2 * Send challenge ACK for any SYN in synchronized state. */ if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT && tp->t_state != TCPS_SYN_RECEIVED) { KASSERT(ti_locked == TI_RLOCKED, ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); TCPSTAT_INC(tcps_badsyn); if (V_tcp_insecure_syn && SEQ_GEQ(th->th_seq, tp->last_ack_sent) && SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) { tp = tcp_drop(tp, ECONNRESET); rstreason = BANDLIM_UNLIMITED; } else { /* Send challenge ACK. */ tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt, tp->snd_nxt, TH_ACK); tp->last_ack_sent = tp->rcv_nxt; m = NULL; } goto drop; } /* * RFC 1323 PAWS: If we have a timestamp reply on this segment * and it's less than ts_recent, drop it. */ if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to.to_tsval, tp->ts_recent)) { /* Check to see if ts_recent is over 24 days old. */ if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) { /* * Invalidate ts_recent. If this segment updates * ts_recent, the age will be reset later and ts_recent * will get a valid value. If it does not, setting * ts_recent to zero will at least satisfy the * requirement that zero be placed in the timestamp * echo reply when ts_recent isn't valid. The * age isn't reset until we get a valid ts_recent * because we don't want out-of-order segments to be * dropped when ts_recent is old. */ tp->ts_recent = 0; } else { TCPSTAT_INC(tcps_rcvduppack); TCPSTAT_ADD(tcps_rcvdupbyte, tlen); TCPSTAT_INC(tcps_pawsdrop); if (tlen) goto dropafterack; goto drop; } } /* * In the SYN-RECEIVED state, validate that the packet belongs to * this connection before trimming the data to fit the receive * window. Check the sequence number versus IRS since we know * the sequence numbers haven't wrapped. This is a partial fix * for the "LAND" DoS attack. */ if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) { rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } todrop = tp->rcv_nxt - th->th_seq; if (todrop > 0) { if (thflags & TH_SYN) { thflags &= ~TH_SYN; th->th_seq++; if (th->th_urp > 1) th->th_urp--; else thflags &= ~TH_URG; todrop--; } /* * Following if statement from Stevens, vol. 2, p. 960. */ if (todrop > tlen || (todrop == tlen && (thflags & TH_FIN) == 0)) { /* * Any valid FIN must be to the left of the window. * At this point the FIN must be a duplicate or out * of sequence; drop it. */ thflags &= ~TH_FIN; /* * Send an ACK to resynchronize and drop any data. * But keep on processing for RST or ACK. */ tp->t_flags |= TF_ACKNOW; todrop = tlen; TCPSTAT_INC(tcps_rcvduppack); TCPSTAT_ADD(tcps_rcvdupbyte, todrop); } else { TCPSTAT_INC(tcps_rcvpartduppack); TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop); } drop_hdrlen += todrop; /* drop from the top afterwards */ th->th_seq += todrop; tlen -= todrop; if (th->th_urp > todrop) th->th_urp -= todrop; else { thflags &= ~TH_URG; th->th_urp = 0; } } /* * If new data are received on a connection after the * user processes are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && tlen) { KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && " "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data " "after socket was closed, " "sending RST and removing tcpcb\n", s, __func__, tcpstates[tp->t_state], tlen); free(s, M_TCPLOG); } tp = tcp_close(tp); TCPSTAT_INC(tcps_rcvafterclose); rstreason = BANDLIM_UNLIMITED; goto dropwithreset; } /* * If segment ends after window, drop trailing data * (and PUSH and FIN); if nothing left, just ACK. */ todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd); if (todrop > 0) { TCPSTAT_INC(tcps_rcvpackafterwin); if (todrop >= tlen) { TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen); /* * If window is closed can only take segments at * window edge, and have to drop data and PUSH from * incoming segments. Continue processing, but * remember to ack. Otherwise, drop segment * and ack. */ if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) { tp->t_flags |= TF_ACKNOW; TCPSTAT_INC(tcps_rcvwinprobe); } else goto dropafterack; } else TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop); m_adj(m, -todrop); tlen -= todrop; thflags &= ~(TH_PUSH|TH_FIN); } /* * If last ACK falls within this segment's sequence numbers, * record its timestamp. * NOTE: * 1) That the test incorporates suggestions from the latest * proposal of the tcplw@cray.com list (Braden 1993/04/26). * 2) That updating only on newer timestamps interferes with * our earlier PAWS tests, so this check should be solely * predicated on the sequence space of this segment. * 3) That we modify the segment boundary check to be * Last.ACK.Sent <= SEG.SEQ + SEG.Len * instead of RFC1323's * Last.ACK.Sent < SEG.SEQ + SEG.Len, * This modified check allows us to overcome RFC1323's * limitations as described in Stevens TCP/IP Illustrated * Vol. 2 p.869. In such cases, we can still calculate the * RTT correctly when RCV.NXT == Last.ACK.Sent. */ if ((to.to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN|TH_FIN)) != 0))) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to.to_tsval; } /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN * flag is on (half-synchronized state), then queue data for * later processing; else drop segment and return. */ if ((thflags & TH_ACK) == 0) { if (tp->t_state == TCPS_SYN_RECEIVED || (tp->t_flags & TF_NEEDSYN)) { #ifdef TCP_RFC7413 if (tp->t_state == TCPS_SYN_RECEIVED && IS_FASTOPEN(tp->t_flags)) { tp->snd_wnd = tiwin; cc_conn_init(tp); } #endif goto step6; } else if (tp->t_flags & TF_ACKNOW) goto dropafterack; else goto drop; } /* * Ack processing. */ switch (tp->t_state) { /* * In SYN_RECEIVED state, the ack ACKs our SYN, so enter * ESTABLISHED state and continue processing. * The ACK was checked above. */ case TCPS_SYN_RECEIVED: TCPSTAT_INC(tcps_connects); soisconnected(so); /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; tp->snd_wnd = tiwin; } /* * Make transitions: * SYN-RECEIVED -> ESTABLISHED * SYN-RECEIVED* -> FIN-WAIT-1 */ tp->t_starttime = ticks; if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; } else { tcp_state_change(tp, TCPS_ESTABLISHED); TCP_PROBE5(accept__established, NULL, tp, m, tp, th); #ifdef TCP_RFC7413 if (tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; /* * Account for the ACK of our SYN prior to * regular ACK processing below. */ tp->snd_una++; } /* * TFO connections call cc_conn_init() during SYN * processing. Calling it again here for such * connections is not harmless as it would undo the * snd_cwnd reduction that occurs when a TFO SYN|ACK * is retransmitted. */ if (!IS_FASTOPEN(tp->t_flags)) #endif cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } /* * If segment contains data or ACK, will call tcp_reass() * later; if not, do so now to pass queued data to user. */ if (tlen == 0 && (thflags & TH_FIN) == 0) (void) tcp_reass(tp, (struct tcphdr *)0, 0, (struct mbuf *)0); tp->snd_wl1 = th->th_seq - 1; /* FALLTHROUGH */ /* * In ESTABLISHED state: drop duplicate ACKs; ACK out of range * ACKs. If the ack is in the range * tp->snd_una < th->th_ack <= tp->snd_max * then advance tp->snd_una to th->th_ack and drop * data from the retransmission queue. If this ACK reflects * more up to date window information we update our window information. */ case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: case TCPS_FIN_WAIT_2: case TCPS_CLOSE_WAIT: case TCPS_CLOSING: case TCPS_LAST_ACK: if (SEQ_GT(th->th_ack, tp->snd_max)) { TCPSTAT_INC(tcps_rcvacktoomuch); goto dropafterack; } if ((tp->t_flags & TF_SACK_PERMIT) && ((to.to_flags & TOF_SACK) || !TAILQ_EMPTY(&tp->snd_holes))) sack_changed = tcp_sack_doack(tp, &to, th->th_ack); else /* * Reset the value so that previous (valid) value * from the last ack with SACK doesn't get used. */ tp->sackhint.sacked_bytes = 0; #ifdef TCP_HHOOK /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ hhook_run_tcp_est_in(tp, th, &to); #endif if (SEQ_LEQ(th->th_ack, tp->snd_una)) { u_int maxseg; maxseg = tcp_maxseg(tp); if (tlen == 0 && (tiwin == tp->snd_wnd || (tp->t_flags & TF_SACK_PERMIT))) { /* * If this is the first time we've seen a * FIN from the remote, this is not a * duplicate and it needs to be processed * normally. This happens during a * simultaneous close. */ if ((thflags & TH_FIN) && (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { tp->t_dupacks = 0; break; } TCPSTAT_INC(tcps_rcvdupack); /* * If we have outstanding data (other than * a window probe), this is a completely * duplicate ack (ie, window info didn't * change and FIN isn't set), * the ack is the biggest we've * seen and we've seen exactly our rexmt * threshold of them, assume a packet * has been dropped and retransmit it. * Kludge snd_nxt & the congestion * window so we send only this one * packet. * * We know we're losing at the current * window size so do congestion avoidance * (set ssthresh to half the current window * and pull our congestion window back to * the new ssthresh). * * Dup acks mean that packets have left the * network (they're now cached at the receiver) * so bump cwnd by the amount in the receiver * to keep a constant cwnd packets in the * network. * * When using TCP ECN, notify the peer that * we reduced the cwnd. */ /* * Following 2 kinds of acks should not affect * dupack counting: * 1) Old acks * 2) Acks with SACK but without any new SACK * information in them. These could result from * any anomaly in the network like a switch * duplicating packets or a possible DoS attack. */ if (th->th_ack != tp->snd_una || ((tp->t_flags & TF_SACK_PERMIT) && !sack_changed)) break; else if (!tcp_timer_active(tp, TT_REXMT)) tp->t_dupacks = 0; else if (++tp->t_dupacks > tcprexmtthresh || IN_FASTRECOVERY(tp->t_flags)) { cc_ack_received(tp, th, nsegs, CC_DUPACK); if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags)) { int awnd; /* * Compute the amount of data in flight first. * We can inject new data into the pipe iff * we have less than 1/2 the original window's * worth of data in flight. */ if (V_tcp_do_rfc6675_pipe) awnd = tcp_compute_pipe(tp); else awnd = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit; if (awnd < tp->snd_ssthresh) { tp->snd_cwnd += maxseg; /* * RFC5681 Section 3.2 talks about cwnd * inflation on additional dupacks and * deflation on recovering from loss. * * We keep cwnd into check so that * we don't have to 'deflate' it when we * get out of recovery. */ if (tp->snd_cwnd > tp->snd_ssthresh) tp->snd_cwnd = tp->snd_ssthresh; } } else tp->snd_cwnd += maxseg; (void) tp->t_fb->tfb_tcp_output(tp); goto drop; } else if (tp->t_dupacks == tcprexmtthresh) { tcp_seq onxt = tp->snd_nxt; /* * If we're doing sack, check to * see if we're already in sack * recovery. If we're not doing sack, * check to see if we're in newreno * recovery. */ if (tp->t_flags & TF_SACK_PERMIT) { if (IN_FASTRECOVERY(tp->t_flags)) { tp->t_dupacks = 0; break; } } else { if (SEQ_LEQ(th->th_ack, tp->snd_recover)) { tp->t_dupacks = 0; break; } } /* Congestion signal before ack. */ cc_cong_signal(tp, th, CC_NDUPACK); cc_ack_received(tp, th, nsegs, CC_DUPACK); tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; if (tp->t_flags & TF_SACK_PERMIT) { TCPSTAT_INC( tcps_sack_recovery_episode); tp->sack_newdata = tp->snd_nxt; if (CC_ALGO(tp)->cong_signal == NULL) tp->snd_cwnd = maxseg; (void) tp->t_fb->tfb_tcp_output(tp); goto drop; } tp->snd_nxt = th->th_ack; if (CC_ALGO(tp)->cong_signal == NULL) tp->snd_cwnd = maxseg; (void) tp->t_fb->tfb_tcp_output(tp); KASSERT(tp->snd_limited <= 2, ("%s: tp->snd_limited too big", __func__)); if (CC_ALGO(tp)->cong_signal == NULL) tp->snd_cwnd = tp->snd_ssthresh + maxseg * (tp->t_dupacks - tp->snd_limited); if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; goto drop; } else if (V_tcp_do_rfc3042) { /* * Process first and second duplicate * ACKs. Each indicates a segment * leaving the network, creating room * for more. Make sure we can send a * packet on reception of each duplicate * ACK by increasing snd_cwnd by one * segment. Restore the original * snd_cwnd after packet transmission. */ cc_ack_received(tp, th, nsegs, CC_DUPACK); uint32_t oldcwnd = tp->snd_cwnd; tcp_seq oldsndmax = tp->snd_max; u_int sent; int avail; KASSERT(tp->t_dupacks == 1 || tp->t_dupacks == 2, ("%s: dupacks not 1 or 2", __func__)); if (tp->t_dupacks == 1) tp->snd_limited = 0; tp->snd_cwnd = (tp->snd_nxt - tp->snd_una) + (tp->t_dupacks - tp->snd_limited) * maxseg; /* * Only call tcp_output when there * is new data available to be sent. * Otherwise we would send pure ACKs. */ SOCKBUF_LOCK(&so->so_snd); avail = sbavail(&so->so_snd) - (tp->snd_nxt - tp->snd_una); SOCKBUF_UNLOCK(&so->so_snd); if (avail > 0) (void) tp->t_fb->tfb_tcp_output(tp); sent = tp->snd_max - oldsndmax; if (sent > maxseg) { KASSERT((tp->t_dupacks == 2 && tp->snd_limited == 0) || (sent == maxseg + 1 && tp->t_flags & TF_SENTFIN), ("%s: sent too much", __func__)); tp->snd_limited = 2; } else if (sent > 0) ++tp->snd_limited; tp->snd_cwnd = oldcwnd; goto drop; } } break; } else { /* * This ack is advancing the left edge, reset the * counter. */ tp->t_dupacks = 0; /* * If this ack also has new SACK info, increment the * counter as per rfc6675. */ if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed) tp->t_dupacks++; } KASSERT(SEQ_GT(th->th_ack, tp->snd_una), ("%s: th_ack <= snd_una", __func__)); /* * If the congestion window was inflated to account * for the other side's cached packets, retract it. */ if (IN_FASTRECOVERY(tp->t_flags)) { if (SEQ_LT(th->th_ack, tp->snd_recover)) { if (tp->t_flags & TF_SACK_PERMIT) tcp_sack_partialack(tp, th); else tcp_newreno_partial_ack(tp, th); } else cc_post_recovery(tp, th); } /* * If we reach this point, ACK is not a duplicate, * i.e., it ACKs something we sent. */ if (tp->t_flags & TF_NEEDSYN) { /* * T/TCP: Connection was half-synchronized, and our * SYN has been ACK'd (so connection is now fully * synchronized). Go to non-starred state, * increment snd_una for ACK of SYN, and check if * we can do window scaling. */ tp->t_flags &= ~TF_NEEDSYN; tp->snd_una++; /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; /* Send window already scaled. */ } } process_ACK: INP_WLOCK_ASSERT(tp->t_inpcb); acked = BYTES_THIS_ACK(tp, th); KASSERT(acked >= 0, ("%s: acked unexepectedly negative " "(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__, tp->snd_una, th->th_ack, tp, m)); TCPSTAT_ADD(tcps_rcvackpack, nsegs); TCPSTAT_ADD(tcps_rcvackbyte, acked); /* * If we just performed our first retransmit, and the ACK * arrives within our recovery window, then it was a mistake * to do the retransmit in the first place. Recover our * original cwnd and ssthresh, and proceed to transmit where * we left off. */ if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && (int)(ticks - tp->t_badrxtwin) < 0) cc_cong_signal(tp, th, CC_RTO_ERR); /* * If we have a timestamp reply, update smoothed * round trip time. If no timestamp is present but * transmit timer is running and timed sequence * number was acked, update smoothed round trip time. * Since we now have an rtt measurement, cancel the * timer backoff (cf., Phil Karn's retransmit alg.). * Recompute the initial retransmit timer. * * Some boxes send broken timestamp replies * during the SYN+ACK phase, ignore * timestamps of 0 or we could calculate a * huge RTT and blow up the retransmit timer. */ if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) { uint32_t t; t = tcp_ts_getticks() - to.to_tsecr; if (!tp->t_rttlow || tp->t_rttlow > t) tp->t_rttlow = t; tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1); } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) tp->t_rttlow = ticks - tp->t_rtttime; tcp_xmit_timer(tp, ticks - tp->t_rtttime); } /* * If all outstanding data is acked, stop retransmit * timer and remember to restart (more output or persist). * If there is more data to be acked, restart retransmit * timer, using current (possibly backed-off) value. */ if (th->th_ack == tp->snd_max) { tcp_timer_activate(tp, TT_REXMT, 0); needoutput = 1; } else if (!tcp_timer_active(tp, TT_PERSIST)) tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); /* * If no data (only SYN) was ACK'd, * skip rest of ACK processing. */ if (acked == 0) goto step6; /* * Let the congestion control algorithm update congestion * control related information. This typically means increasing * the congestion window. */ cc_ack_received(tp, th, nsegs, CC_ACK); SOCKBUF_LOCK(&so->so_snd); if (acked > sbavail(&so->so_snd)) { if (tp->snd_wnd >= sbavail(&so->so_snd)) tp->snd_wnd -= sbavail(&so->so_snd); else tp->snd_wnd = 0; mfree = sbcut_locked(&so->so_snd, (int)sbavail(&so->so_snd)); ourfinisacked = 1; } else { mfree = sbcut_locked(&so->so_snd, acked); if (tp->snd_wnd >= (uint32_t) acked) tp->snd_wnd -= acked; else tp->snd_wnd = 0; ourfinisacked = 0; } /* NB: sowwakeup_locked() does an implicit unlock. */ sowwakeup_locked(so); m_freem(mfree); /* Detect una wraparound. */ if (!IN_RECOVERY(tp->t_flags) && SEQ_GT(tp->snd_una, tp->snd_recover) && SEQ_LEQ(th->th_ack, tp->snd_recover)) tp->snd_recover = th->th_ack - 1; /* XXXLAS: Can this be moved up into cc_post_recovery? */ if (IN_RECOVERY(tp->t_flags) && SEQ_GEQ(th->th_ack, tp->snd_recover)) { EXIT_RECOVERY(tp->t_flags); } tp->snd_una = th->th_ack; if (tp->t_flags & TF_SACK_PERMIT) { if (SEQ_GT(tp->snd_una, tp->snd_recover)) tp->snd_recover = tp->snd_una; } if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; switch (tp->t_state) { /* * In FIN_WAIT_1 STATE in addition to the processing * for the ESTABLISHED state if our FIN is now acknowledged * then enter FIN_WAIT_2. */ case TCPS_FIN_WAIT_1: if (ourfinisacked) { /* * If we can't receive any more * data, then closing user can proceed. * Starting the timer is contrary to the * specification, but if we don't get a FIN * we'll hang forever. * * XXXjl: * we should release the tp also, and use a * compressed state. */ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { soisdisconnected(so); tcp_timer_activate(tp, TT_2MSL, (tcp_fast_finwait2_recycle ? tcp_finwait2_timeout : TP_MAXIDLE(tp))); } tcp_state_change(tp, TCPS_FIN_WAIT_2); } break; /* * In CLOSING STATE in addition to the processing for * the ESTABLISHED state if the ACK acknowledges our FIN * then enter the TIME-WAIT state, otherwise ignore * the segment. */ case TCPS_CLOSING: if (ourfinisacked) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); INP_INFO_RUNLOCK(&V_tcbinfo); m_freem(m); return; } break; /* * In LAST_ACK, we may still be waiting for data to drain * and/or to be acked, as well as for the ack of our FIN. * If our FIN is now acknowledged, delete the TCB, * enter the closed state and return. */ case TCPS_LAST_ACK: if (ourfinisacked) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tp = tcp_close(tp); goto drop; } break; } } step6: INP_WLOCK_ASSERT(tp->t_inpcb); /* * Update window information. * Don't look at window if no ACK: TAC's send garbage on first SYN. */ if ((thflags & TH_ACK) && (SEQ_LT(tp->snd_wl1, th->th_seq) || (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) || (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) { /* keep track of pure window updates */ if (tlen == 0 && tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) TCPSTAT_INC(tcps_rcvwinupd); tp->snd_wnd = tiwin; tp->snd_wl1 = th->th_seq; tp->snd_wl2 = th->th_ack; if (tp->snd_wnd > tp->max_sndwnd) tp->max_sndwnd = tp->snd_wnd; needoutput = 1; } /* * Process segments with URG. */ if ((thflags & TH_URG) && th->th_urp && TCPS_HAVERCVDFIN(tp->t_state) == 0) { /* * This is a kludge, but if we receive and accept * random urgent pointers, we'll crash in * soreceive. It's hard to imagine someone * actually wanting to send this much urgent data. */ SOCKBUF_LOCK(&so->so_rcv); if (th->th_urp + sbavail(&so->so_rcv) > sb_max) { th->th_urp = 0; /* XXX */ thflags &= ~TH_URG; /* XXX */ SOCKBUF_UNLOCK(&so->so_rcv); /* XXX */ goto dodata; /* XXX */ } /* * If this segment advances the known urgent pointer, * then mark the data stream. This should not happen * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since * a FIN has been received from the remote side. * In these states we ignore the URG. * * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section as the original * spec states (in one of two places). */ if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { tp->rcv_up = th->th_seq + th->th_urp; so->so_oobmark = sbavail(&so->so_rcv) + (tp->rcv_up - tp->rcv_nxt) - 1; if (so->so_oobmark == 0) so->so_rcv.sb_state |= SBS_RCVATMARK; sohasoutofband(so); tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); } SOCKBUF_UNLOCK(&so->so_rcv); /* * Remove out of band data so doesn't get presented to user. * This can happen independent of advancing the URG pointer, * but if two URG's are pending at once, some out-of-band * data may creep in... ick. */ if (th->th_urp <= (uint32_t)tlen && !(so->so_options & SO_OOBINLINE)) { /* hdr drop is delayed */ tcp_pulloutofband(so, th, m, drop_hdrlen); } } else { /* * If no out of band data is expected, * pull receive urgent pointer along * with the receive window. */ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) tp->rcv_up = tp->rcv_nxt; } dodata: /* XXX */ INP_WLOCK_ASSERT(tp->t_inpcb); /* * Process the segment text, merging it into the TCP sequencing queue, * and arranging for acknowledgment of receipt if necessary. * This process logically involves adjusting tp->rcv_wnd as data * is presented to the user (this happens in tcp_usrreq.c, * case PRU_RCVD). If a FIN has already been received on this * connection then we just ignore the text. */ #ifdef TCP_RFC7413 tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) && IS_FASTOPEN(tp->t_flags)); #else #define tfo_syn (false) #endif if ((tlen || (thflags & TH_FIN) || tfo_syn) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { tcp_seq save_start = th->th_seq; m_adj(m, drop_hdrlen); /* delayed header drop */ /* * Insert segment which includes th into TCP reassembly queue * with control block tp. Set thflags to whether reassembly now * includes a segment with FIN. This handles the common case * inline (segment is the next to be received on an established * connection, and the queue is empty), avoiding linkage into * and removal from the queue and repetition of various * conversions. * Set DELACK for segments received in order, but ack * immediately when segments are out of order (so * fast retransmit can work). */ if (th->th_seq == tp->rcv_nxt && LIST_EMPTY(&tp->t_segq) && (TCPS_HAVEESTABLISHED(tp->t_state) || tfo_syn)) { if (DELAY_ACK(tp, tlen) || tfo_syn) tp->t_flags |= TF_DELACK; else tp->t_flags |= TF_ACKNOW; tp->rcv_nxt += tlen; thflags = th->th_flags & TH_FIN; TCPSTAT_INC(tcps_rcvpack); TCPSTAT_ADD(tcps_rcvbyte, tlen); SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) m_freem(m); else sbappendstream_locked(&so->so_rcv, m, 0); /* NB: sorwakeup_locked() does an implicit unlock. */ sorwakeup_locked(so); } else { /* * XXX: Due to the header drop above "th" is * theoretically invalid by now. Fortunately * m_adj() doesn't actually frees any mbufs * when trimming from the head. */ thflags = tcp_reass(tp, th, &tlen, m); tp->t_flags |= TF_ACKNOW; } if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT)) tcp_update_sack_list(tp, save_start, save_start + tlen); #if 0 /* * Note the amount of data that peer has sent into * our window, in order to estimate the sender's * buffer size. * XXX: Unused. */ if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); else len = so->so_rcv.sb_hiwat; #endif } else { m_freem(m); thflags &= ~TH_FIN; } /* * If FIN is received ACK the FIN and let the user know * that the connection is closing. */ if (thflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { socantrcvmore(so); /* * If connection is half-synchronized * (ie NEEDSYN flag on) then delay ACK, * so it may be piggybacked when SYN is sent. * Otherwise, since we received a FIN then no * more input can be expected, send ACK now. */ if (tp->t_flags & TF_NEEDSYN) tp->t_flags |= TF_DELACK; else tp->t_flags |= TF_ACKNOW; tp->rcv_nxt++; } switch (tp->t_state) { /* * In SYN_RECEIVED and ESTABLISHED STATES * enter the CLOSE_WAIT state. */ case TCPS_SYN_RECEIVED: tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: tcp_state_change(tp, TCPS_CLOSE_WAIT); break; /* * If still in FIN_WAIT_1 STATE FIN has not been acked so * enter the CLOSING state. */ case TCPS_FIN_WAIT_1: tcp_state_change(tp, TCPS_CLOSING); break; /* * In FIN_WAIT_2 state enter the TIME_WAIT state, * starting the time-wait timer, turning off the other * standard timers. */ case TCPS_FIN_WAIT_2: INP_INFO_RLOCK_ASSERT(&V_tcbinfo); KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata " "TCP_FIN_WAIT_2 ti_locked: %d", __func__, ti_locked)); tcp_twstart(tp); INP_INFO_RUNLOCK(&V_tcbinfo); return; } } if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); /* * Return any desired output. */ if (needoutput || (tp->t_flags & TF_ACKNOW)) (void) tp->t_fb->tfb_tcp_output(tp); check_delack: KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { tp->t_flags &= ~TF_DELACK; tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); } INP_WUNLOCK(tp->t_inpcb); return; dropafterack: /* * Generate an ACK dropping incoming segment if it occupies * sequence space, where the ACK reflects our state. * * We can now skip the test for the RST flag since all * paths to this code happen after packets containing * RST have been dropped. * * In the SYN-RECEIVED state, don't send an ACK unless the * segment we received passes the SYN-RECEIVED ACK test. * If it fails send a RST. This breaks the loop in the * "LAND" DoS attack, and also prevents an ACK storm * between two listening ports that have been sent forged * SYN segments, each with the source address of the other. */ if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) && (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max)) ) { rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; tp->t_flags |= TF_ACKNOW; (void) tp->t_fb->tfb_tcp_output(tp); INP_WUNLOCK(tp->t_inpcb); m_freem(m); return; dropwithreset: if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; if (tp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); INP_WUNLOCK(tp->t_inpcb); } else tcp_dropwithreset(m, th, NULL, tlen, rstreason); return; drop: if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS else INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); #endif /* * Drop space held by incoming segment and return. */ #ifdef TCPDEBUG if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); if (tp != NULL) INP_WUNLOCK(tp->t_inpcb); m_freem(m); #ifndef TCP_RFC7413 #undef tfo_syn #endif } /* * Issue RST and make ACK acceptable to originator of segment. * The mbuf must still include the original packet header. * tp may be NULL. */ void tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int tlen, int rstreason) { #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; #endif if (tp != NULL) { INP_WLOCK_ASSERT(tp->t_inpcb); } /* Don't bother if destination was broadcast/multicast. */ if ((th->th_flags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST)) goto drop; #ifdef INET6 if (mtod(m, struct ip *)->ip_v == 6) { ip6 = mtod(m, struct ip6_hdr *); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) goto drop; /* IPv6 anycast check is done at tcp6_input() */ } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { ip = mtod(m, struct ip *); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) goto drop; } #endif /* Perform bandwidth limiting. */ if (badport_bandlim(rstreason) < 0) goto drop; /* tcp_respond consumes the mbuf chain. */ if (th->th_flags & TH_ACK) { tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack, TH_RST); } else { if (th->th_flags & TH_SYN) tlen++; if (th->th_flags & TH_FIN) tlen++; tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen, (tcp_seq)0, TH_RST|TH_ACK); } return; drop: m_freem(m); } /* * Parse TCP options and place in tcpopt. */ void tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags) { int opt, optlen; to->to_flags = 0; for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[0]; if (opt == TCPOPT_EOL) break; if (opt == TCPOPT_NOP) optlen = 1; else { if (cnt < 2) break; optlen = cp[1]; if (optlen < 2 || optlen > cnt) break; } switch (opt) { case TCPOPT_MAXSEG: if (optlen != TCPOLEN_MAXSEG) continue; if (!(flags & TO_SYN)) continue; to->to_flags |= TOF_MSS; bcopy((char *)cp + 2, (char *)&to->to_mss, sizeof(to->to_mss)); to->to_mss = ntohs(to->to_mss); break; case TCPOPT_WINDOW: if (optlen != TCPOLEN_WINDOW) continue; if (!(flags & TO_SYN)) continue; to->to_flags |= TOF_SCALE; to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT); break; case TCPOPT_TIMESTAMP: if (optlen != TCPOLEN_TIMESTAMP) continue; to->to_flags |= TOF_TS; bcopy((char *)cp + 2, (char *)&to->to_tsval, sizeof(to->to_tsval)); to->to_tsval = ntohl(to->to_tsval); bcopy((char *)cp + 6, (char *)&to->to_tsecr, sizeof(to->to_tsecr)); to->to_tsecr = ntohl(to->to_tsecr); break; -#ifdef TCP_SIGNATURE - /* - * XXX In order to reply to a host which has set the - * TCP_SIGNATURE option in its initial SYN, we have to - * record the fact that the option was observed here - * for the syncache code to perform the correct response. - */ case TCPOPT_SIGNATURE: + /* + * In order to reply to a host which has set the + * TCP_SIGNATURE option in its initial SYN, we have + * to record the fact that the option was observed + * here for the syncache code to perform the correct + * response. + */ if (optlen != TCPOLEN_SIGNATURE) continue; to->to_flags |= TOF_SIGNATURE; to->to_signature = cp + 2; break; -#endif case TCPOPT_SACK_PERMITTED: if (optlen != TCPOLEN_SACK_PERMITTED) continue; if (!(flags & TO_SYN)) continue; if (!V_tcp_do_sack) continue; to->to_flags |= TOF_SACKPERM; break; case TCPOPT_SACK: if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0) continue; if (flags & TO_SYN) continue; to->to_flags |= TOF_SACK; to->to_nsacks = (optlen - 2) / TCPOLEN_SACK; to->to_sacks = cp + 2; TCPSTAT_INC(tcps_sack_rcv_blocks); break; #ifdef TCP_RFC7413 case TCPOPT_FAST_OPEN: if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) && (optlen < TCPOLEN_FAST_OPEN_MIN) && (optlen > TCPOLEN_FAST_OPEN_MAX)) continue; if (!(flags & TO_SYN)) continue; if (!V_tcp_fastopen_enabled) continue; to->to_flags |= TOF_FASTOPEN; to->to_tfo_len = optlen - 2; to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL; break; #endif default: continue; } } } /* * Pull out of band byte out of a segment so * it doesn't appear in the user's data queue. * It is still reflected in the segment length for * sequencing purposes. */ void tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m, int off) { int cnt = off + th->th_urp - 1; while (cnt >= 0) { if (m->m_len > cnt) { char *cp = mtod(m, caddr_t) + cnt; struct tcpcb *tp = sototcpcb(so); INP_WLOCK_ASSERT(tp->t_inpcb); tp->t_iobc = *cp; tp->t_oobflags |= TCPOOB_HAVEDATA; bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); m->m_len--; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len--; return; } cnt -= m->m_len; m = m->m_next; if (m == NULL) break; } panic("tcp_pulloutofband"); } /* * Collect new round-trip time estimate * and update averages and current timeout. */ void tcp_xmit_timer(struct tcpcb *tp, int rtt) { int delta; INP_WLOCK_ASSERT(tp->t_inpcb); TCPSTAT_INC(tcps_rttupdated); tp->t_rttupdated++; if (tp->t_srtt != 0) { /* * srtt is stored as fixed point with 5 bits after the * binary point (i.e., scaled by 8). The following magic * is equivalent to the smoothing algorithm in rfc793 with * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed * point). Adjust rtt to origin 0. */ delta = ((rtt - 1) << TCP_DELTA_SHIFT) - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)); if ((tp->t_srtt += delta) <= 0) tp->t_srtt = 1; /* * We accumulate a smoothed rtt variance (actually, a * smoothed mean difference), then set the retransmit * timer to smoothed rtt + 4 times the smoothed variance. * rttvar is stored as fixed point with 4 bits after the * binary point (scaled by 16). The following is * equivalent to rfc793 smoothing with an alpha of .75 * (rttvar = rttvar*3/4 + |delta| / 4). This replaces * rfc793's wired-in beta. */ if (delta < 0) delta = -delta; delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT); if ((tp->t_rttvar += delta) <= 0) tp->t_rttvar = 1; if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar) tp->t_rttbest = tp->t_srtt + tp->t_rttvar; } else { /* * No rtt measurement yet - use the unsmoothed rtt. * Set the variance to half the rtt (so our first * retransmit happens at 3*rtt). */ tp->t_srtt = rtt << TCP_RTT_SHIFT; tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); tp->t_rttbest = tp->t_srtt + tp->t_rttvar; } tp->t_rtttime = 0; tp->t_rxtshift = 0; /* * the retransmit should happen at rtt + 4 * rttvar. * Because of the way we do the smoothing, srtt and rttvar * will each average +1/2 tick of bias. When we compute * the retransmit timer, we want 1/2 tick of rounding and * 1 extra tick because of +-1/2 tick uncertainty in the * firing of the timer. The bias will give us exactly the * 1.5 tick we need. But, because the bias is * statistical, we have to test that we don't drop below * the minimum feasible timer (which is 2 ticks). */ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX); /* * We received an ack for a packet that wasn't retransmitted; * it is probably safe to discard any error indications we've * received recently. This isn't quite right, but close enough * for now (a route might have failed after we sent a segment, * and the return path might not be symmetrical). */ tp->t_softerror = 0; } /* * Determine a reasonable value for maxseg size. * If the route is known, check route for mtu. * If none, use an mss that can be handled on the outgoing interface * without forcing IP to fragment. If no route is found, route has no mtu, * or the destination isn't local, use a default, hopefully conservative * size (usually 512 or the default IP max size, but no more than the mtu * of the interface), as we can't discover anything about intervening * gateways or networks. We also initialize the congestion/slow start * window to be a single segment if the destination isn't local. * While looking at the routing entry, we also initialize other path-dependent * parameters from pre-set or cached values in the routing entry. * * NOTE that resulting t_maxseg doesn't include space for TCP options or * IP options, e.g. IPSEC data, since length of this data may vary, and * thus it is calculated for every segment separately in tcp_output(). * * NOTE that this routine is only called when we process an incoming * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS * settings are handled in tcp_mssopt(). */ void tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap) { int mss = 0; uint32_t maxmtu = 0; struct inpcb *inp = tp->t_inpcb; struct hc_metrics_lite metrics; #ifdef INET6 int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; size_t min_protoh = isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : sizeof (struct tcpiphdr); #else const size_t min_protoh = sizeof(struct tcpiphdr); #endif INP_WLOCK_ASSERT(tp->t_inpcb); if (mtuoffer != -1) { KASSERT(offer == -1, ("%s: conflict", __func__)); offer = mtuoffer - min_protoh; } /* Initialize. */ #ifdef INET6 if (isipv6) { maxmtu = tcp_maxmtu6(&inp->inp_inc, cap); tp->t_maxseg = V_tcp_v6mssdflt; } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { maxmtu = tcp_maxmtu(&inp->inp_inc, cap); tp->t_maxseg = V_tcp_mssdflt; } #endif /* * No route to sender, stay with default mss and return. */ if (maxmtu == 0) { /* * In case we return early we need to initialize metrics * to a defined state as tcp_hc_get() would do for us * if there was no cache hit. */ if (metricptr != NULL) bzero(metricptr, sizeof(struct hc_metrics_lite)); return; } /* What have we got? */ switch (offer) { case 0: /* * Offer == 0 means that there was no MSS on the SYN * segment, in this case we use tcp_mssdflt as * already assigned to t_maxseg above. */ offer = tp->t_maxseg; break; case -1: /* * Offer == -1 means that we didn't receive SYN yet. */ /* FALLTHROUGH */ default: /* * Prevent DoS attack with too small MSS. Round up * to at least minmss. */ offer = max(offer, V_tcp_minmss); } /* * rmx information is now retrieved from tcp_hostcache. */ tcp_hc_get(&inp->inp_inc, &metrics); if (metricptr != NULL) bcopy(&metrics, metricptr, sizeof(struct hc_metrics_lite)); /* * If there's a discovered mtu in tcp hostcache, use it. * Else, use the link mtu. */ if (metrics.rmx_mtu) mss = min(metrics.rmx_mtu, maxmtu) - min_protoh; else { #ifdef INET6 if (isipv6) { mss = maxmtu - min_protoh; if (!V_path_mtu_discovery && !in6_localaddr(&inp->in6p_faddr)) mss = min(mss, V_tcp_v6mssdflt); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { mss = maxmtu - min_protoh; if (!V_path_mtu_discovery && !in_localaddr(inp->inp_faddr)) mss = min(mss, V_tcp_mssdflt); } #endif /* * XXX - The above conditional (mss = maxmtu - min_protoh) * probably violates the TCP spec. * The problem is that, since we don't know the * other end's MSS, we are supposed to use a conservative * default. But, if we do that, then MTU discovery will * never actually take place, because the conservative * default is much less than the MTUs typically seen * on the Internet today. For the moment, we'll sweep * this under the carpet. * * The conservative default might not actually be a problem * if the only case this occurs is when sending an initial * SYN with options and data to a host we've never talked * to before. Then, they will reply with an MSS value which * will get recorded and the new parameters should get * recomputed. For Further Study. */ } mss = min(mss, offer); /* * Sanity check: make sure that maxseg will be large * enough to allow some data on segments even if the * all the option space is used (40bytes). Otherwise * funny things may happen in tcp_output. * * XXXGL: shouldn't we reserve space for IP/IPv6 options? */ mss = max(mss, 64); tp->t_maxseg = mss; } void tcp_mss(struct tcpcb *tp, int offer) { int mss; uint32_t bufsize; struct inpcb *inp; struct socket *so; struct hc_metrics_lite metrics; struct tcp_ifcap cap; KASSERT(tp != NULL, ("%s: tp == NULL", __func__)); bzero(&cap, sizeof(cap)); tcp_mss_update(tp, offer, -1, &metrics, &cap); mss = tp->t_maxseg; inp = tp->t_inpcb; /* * If there's a pipesize, change the socket buffer to that size, * don't change if sb_hiwat is different than default (then it * has been changed on purpose with setsockopt). * Make the socket buffers an integral number of mss units; * if the mss is larger than the socket buffer, decrease the mss. */ so = inp->inp_socket; SOCKBUF_LOCK(&so->so_snd); if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.rmx_sendpipe) bufsize = metrics.rmx_sendpipe; else bufsize = so->so_snd.sb_hiwat; if (bufsize < mss) mss = bufsize; else { bufsize = roundup(bufsize, mss); if (bufsize > sb_max) bufsize = sb_max; if (bufsize > so->so_snd.sb_hiwat) (void)sbreserve_locked(&so->so_snd, bufsize, so, NULL); } SOCKBUF_UNLOCK(&so->so_snd); /* * Sanity check: make sure that maxseg will be large * enough to allow some data on segments even if the * all the option space is used (40bytes). Otherwise * funny things may happen in tcp_output. * * XXXGL: shouldn't we reserve space for IP/IPv6 options? */ tp->t_maxseg = max(mss, 64); SOCKBUF_LOCK(&so->so_rcv); if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe) bufsize = metrics.rmx_recvpipe; else bufsize = so->so_rcv.sb_hiwat; if (bufsize > mss) { bufsize = roundup(bufsize, mss); if (bufsize > sb_max) bufsize = sb_max; if (bufsize > so->so_rcv.sb_hiwat) (void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL); } SOCKBUF_UNLOCK(&so->so_rcv); /* Check the interface for TSO capabilities. */ if (cap.ifcap & CSUM_TSO) { tp->t_flags |= TF_TSO; tp->t_tsomax = cap.tsomax; tp->t_tsomaxsegcount = cap.tsomaxsegcount; tp->t_tsomaxsegsize = cap.tsomaxsegsize; } } /* * Determine the MSS option to send on an outgoing SYN. */ int tcp_mssopt(struct in_conninfo *inc) { int mss = 0; uint32_t thcmtu = 0; uint32_t maxmtu = 0; size_t min_protoh; KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer")); #ifdef INET6 if (inc->inc_flags & INC_ISIPV6) { mss = V_tcp_v6mssdflt; maxmtu = tcp_maxmtu6(inc, NULL); min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { mss = V_tcp_mssdflt; maxmtu = tcp_maxmtu(inc, NULL); min_protoh = sizeof(struct tcpiphdr); } #endif #if defined(INET6) || defined(INET) thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */ #endif if (maxmtu && thcmtu) mss = min(maxmtu, thcmtu) - min_protoh; else if (maxmtu || thcmtu) mss = max(maxmtu, thcmtu) - min_protoh; return (mss); } /* * On a partial ack arrives, force the retransmission of the * next unacknowledged segment. Do not clear tp->t_dupacks. * By setting snd_nxt to ti_ack, this forces retransmission timer to * be started again. */ void tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th) { tcp_seq onxt = tp->snd_nxt; uint32_t ocwnd = tp->snd_cwnd; u_int maxseg = tcp_maxseg(tp); INP_WLOCK_ASSERT(tp->t_inpcb); tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; tp->snd_nxt = th->th_ack; /* * Set snd_cwnd to one segment beyond acknowledged offset. * (tp->snd_una has not yet been updated when this function is called.) */ tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th); tp->t_flags |= TF_ACKNOW; (void) tp->t_fb->tfb_tcp_output(tp); tp->snd_cwnd = ocwnd; if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; /* * Partial window deflation. Relies on fact that tp->snd_una * not updated yet. */ if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th)) tp->snd_cwnd -= BYTES_THIS_ACK(tp, th); else tp->snd_cwnd = 0; tp->snd_cwnd += maxseg; } int tcp_compute_pipe(struct tcpcb *tp) { return (tp->snd_max - tp->snd_una + tp->sackhint.sack_bytes_rexmit - tp->sackhint.sacked_bytes); } diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 8406f286315b..bba043e058da 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1,1826 +1,1851 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #include #ifdef TCP_HHOOK #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #endif #ifdef TCP_RFC7413 #include #endif #include #define TCPOUTFLAGS #include #include #include #include #include #include #ifdef TCPPCAP #include #endif #ifdef TCPDEBUG #include #endif #ifdef TCP_OFFLOAD #include #endif -#ifdef IPSEC -#include -#endif /*IPSEC*/ +#include #include #include VNET_DEFINE(int, path_mtu_discovery) = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(path_mtu_discovery), 1, "Enable Path MTU Discovery"); VNET_DEFINE(int, tcp_do_tso) = 1; #define V_tcp_do_tso VNET(tcp_do_tso) SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_tso), 0, "Enable TCP Segmentation Offload"); VNET_DEFINE(int, tcp_sendspace) = 1024*32; #define V_tcp_sendspace VNET(tcp_sendspace) SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_sendspace), 0, "Initial send socket buffer size"); VNET_DEFINE(int, tcp_do_autosndbuf) = 1; #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_autosndbuf), 0, "Enable automatic send buffer sizing"); VNET_DEFINE(int, tcp_autosndbuf_inc) = 8*1024; #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autosndbuf_inc), 0, "Incrementor step size of automatic send buffer"); VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024; #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autosndbuf_max), 0, "Max size of automatic send buffer"); /* * Make sure that either retransmit or persist timer is set for SYN, FIN and * non-ACK. */ #define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags) \ KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\ tcp_timer_active((tp), TT_REXMT) || \ tcp_timer_active((tp), TT_PERSIST), \ ("neither rexmt nor persist timer is set")) #ifdef TCP_HHOOK static void inline hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, uint32_t len, int tso); #endif static void inline cc_after_idle(struct tcpcb *tp); #ifdef TCP_HHOOK /* * Wrapper for the TCP established output helper hook. */ static void inline hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, uint32_t len, int tso) { struct tcp_hhook_data hhook_data; if (V_tcp_hhh[HHOOK_TCP_EST_OUT]->hhh_nhooks > 0) { hhook_data.tp = tp; hhook_data.th = th; hhook_data.to = to; hhook_data.len = len; hhook_data.tso = tso; hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_OUT], &hhook_data, tp->osd); } } #endif /* * CC wrapper hook functions */ static void inline cc_after_idle(struct tcpcb *tp) { INP_WLOCK_ASSERT(tp->t_inpcb); if (CC_ALGO(tp)->after_idle != NULL) CC_ALGO(tp)->after_idle(tp->ccv); } /* * Tcp output routine: figure out what should be sent and send it. */ int tcp_output(struct tcpcb *tp) { struct socket *so = tp->t_inpcb->inp_socket; int32_t len; uint32_t recwin, sendwin; int off, flags, error = 0; /* Keep compiler happy */ struct mbuf *m; struct ip *ip = NULL; struct ipovly *ipov = NULL; struct tcphdr *th; u_char opt[TCP_MAXOLEN]; unsigned ipoptlen, optlen, hdrlen; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) unsigned ipsec_optlen = 0; #endif int idle, sendalot; int sack_rxmit, sack_bytes_rxmt; struct sackhole *p; int tso, mtu; struct tcpopt to; #if 0 int maxburst = TCP_MAXBURST; #endif #ifdef INET6 struct ip6_hdr *ip6 = NULL; int isipv6; isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; #endif INP_WLOCK_ASSERT(tp->t_inpcb); #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) return (tcp_offload_output(tp)); #endif #ifdef TCP_RFC7413 /* * For TFO connections in SYN_RECEIVED, only allow the initial * SYN|ACK and those sent by the retransmit timer. */ if (IS_FASTOPEN(tp->t_flags) && (tp->t_state == TCPS_SYN_RECEIVED) && SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN|ACK sent */ (tp->snd_nxt != tp->snd_una)) /* not a retransmit */ return (0); #endif /* * Determine length of data that should be transmitted, * and flags that will be used. * If there is some data or critical controls (SYN, RST) * to send, then transmit; otherwise, investigate further. */ idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una); if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur) cc_after_idle(tp); tp->t_flags &= ~TF_LASTIDLE; if (idle) { if (tp->t_flags & TF_MORETOCOME) { tp->t_flags |= TF_LASTIDLE; idle = 0; } } again: /* * If we've recently taken a timeout, snd_max will be greater than * snd_nxt. There may be SACK information that allows us to avoid * resending already delivered data. Adjust snd_nxt accordingly. */ if ((tp->t_flags & TF_SACK_PERMIT) && SEQ_LT(tp->snd_nxt, tp->snd_max)) tcp_sack_adjust(tp); sendalot = 0; tso = 0; mtu = 0; off = tp->snd_nxt - tp->snd_una; sendwin = min(tp->snd_wnd, tp->snd_cwnd); flags = tcp_outflags[tp->t_state]; /* * Send any SACK-generated retransmissions. If we're explicitly trying * to send out new data (when sendalot is 1), bypass this function. * If we retransmit in fast recovery mode, decrement snd_cwnd, since * we're replacing a (future) new transmission with a retransmission * now, and we previously incremented snd_cwnd in tcp_input(). */ /* * Still in sack recovery , reset rxmit flag to zero. */ sack_rxmit = 0; sack_bytes_rxmt = 0; len = 0; p = NULL; if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) && (p = tcp_sack_output(tp, &sack_bytes_rxmt))) { uint32_t cwin; cwin = imax(min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt, 0); /* Do not retransmit SACK segments beyond snd_recover */ if (SEQ_GT(p->end, tp->snd_recover)) { /* * (At least) part of sack hole extends beyond * snd_recover. Check to see if we can rexmit data * for this hole. */ if (SEQ_GEQ(p->rxmit, tp->snd_recover)) { /* * Can't rexmit any more data for this hole. * That data will be rexmitted in the next * sack recovery episode, when snd_recover * moves past p->rxmit. */ p = NULL; goto after_sack_rexmit; } else /* Can rexmit part of the current hole */ len = ((int32_t)ulmin(cwin, tp->snd_recover - p->rxmit)); } else len = ((int32_t)ulmin(cwin, p->end - p->rxmit)); off = p->rxmit - tp->snd_una; KASSERT(off >= 0,("%s: sack block to the left of una : %d", __func__, off)); if (len > 0) { sack_rxmit = 1; sendalot = 1; TCPSTAT_INC(tcps_sack_rexmits); TCPSTAT_ADD(tcps_sack_rexmit_bytes, min(len, tp->t_maxseg)); } } after_sack_rexmit: /* * Get standard flags, and add SYN or FIN if requested by 'hidden' * state flags. */ if (tp->t_flags & TF_NEEDFIN) flags |= TH_FIN; if (tp->t_flags & TF_NEEDSYN) flags |= TH_SYN; SOCKBUF_LOCK(&so->so_snd); /* * If in persist timeout with window of 0, send 1 byte. * Otherwise, if window is small but nonzero * and timer expired, we will send what we can * and go to transmit state. */ if (tp->t_flags & TF_FORCEDATA) { if (sendwin == 0) { /* * If we still have some data to send, then * clear the FIN bit. Usually this would * happen below when it realizes that we * aren't sending all the data. However, * if we have exactly 1 byte of unsent data, * then it won't clear the FIN bit below, * and if we are in persist state, we wind * up sending the packet without recording * that we sent the FIN bit. * * We can't just blindly clear the FIN bit, * because if we don't have any more data * to send then the probe will be the FIN * itself. */ if (off < sbused(&so->so_snd)) flags &= ~TH_FIN; sendwin = 1; } else { tcp_timer_activate(tp, TT_PERSIST, 0); tp->t_rxtshift = 0; } } /* * If snd_nxt == snd_max and we have transmitted a FIN, the * offset will be > 0 even if so_snd.sb_cc is 0, resulting in * a negative length. This can also occur when TCP opens up * its congestion window while receiving additional duplicate * acks after fast-retransmit because TCP will reset snd_nxt * to snd_max after the fast-retransmit. * * In the normal retransmit-FIN-only case, however, snd_nxt will * be set to snd_una, the offset will be 0, and the length may * wind up 0. * * If sack_rxmit is true we are retransmitting from the scoreboard * in which case len is already set. */ if (sack_rxmit == 0) { if (sack_bytes_rxmt == 0) len = ((int32_t)ulmin(sbavail(&so->so_snd), sendwin) - off); else { int32_t cwin; /* * We are inside of a SACK recovery episode and are * sending new data, having retransmitted all the * data possible in the scoreboard. */ len = ((int32_t)min(sbavail(&so->so_snd), tp->snd_wnd) - off); /* * Don't remove this (len > 0) check ! * We explicitly check for len > 0 here (although it * isn't really necessary), to work around a gcc * optimization issue - to force gcc to compute * len above. Without this check, the computation * of len is bungled by the optimizer. */ if (len > 0) { cwin = tp->snd_cwnd - (tp->snd_nxt - tp->sack_newdata) - sack_bytes_rxmt; if (cwin < 0) cwin = 0; len = imin(len, cwin); } } } /* * Lop off SYN bit if it has already been sent. However, if this * is SYN-SENT state and if segment contains data and if we don't * know that foreign host supports TAO, suppress sending segment. */ if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { if (tp->t_state != TCPS_SYN_RECEIVED) flags &= ~TH_SYN; #ifdef TCP_RFC7413 /* * When sending additional segments following a TFO SYN|ACK, * do not include the SYN bit. */ if (IS_FASTOPEN(tp->t_flags) && (tp->t_state == TCPS_SYN_RECEIVED)) flags &= ~TH_SYN; #endif off--, len++; } /* * Be careful not to send data and/or FIN on SYN segments. * This measure is needed to prevent interoperability problems * with not fully conformant TCP implementations. */ if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) { len = 0; flags &= ~TH_FIN; } #ifdef TCP_RFC7413 /* * When retransmitting SYN|ACK on a passively-created TFO socket, * don't include data, as the presence of data may have caused the * original SYN|ACK to have been dropped by a middlebox. */ if (IS_FASTOPEN(tp->t_flags) && (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) || (flags & TH_RST))) len = 0; #endif if (len <= 0) { /* * If FIN has been sent but not acked, * but we haven't been called to retransmit, * len will be < 0. Otherwise, window shrank * after we sent into it. If window shrank to 0, * cancel pending retransmit, pull snd_nxt back * to (closed) window, and set the persist timer * if it isn't already going. If the window didn't * close completely, just wait for an ACK. * * We also do a general check here to ensure that * we will set the persist timer when we have data * to send, but a 0-byte window. This makes sure * the persist timer is set even if the packet * hits one of the "goto send" lines below. */ len = 0; if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) && (off < (int) sbavail(&so->so_snd))) { tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rxtshift = 0; tp->snd_nxt = tp->snd_una; if (!tcp_timer_active(tp, TT_PERSIST)) tcp_setpersist(tp); } } /* len will be >= 0 after this point. */ KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__)); /* * Automatic sizing of send socket buffer. Often the send buffer * size is not optimally adjusted to the actual network conditions * at hand (delay bandwidth product). Setting the buffer size too * small limits throughput on links with high bandwidth and high * delay (eg. trans-continental/oceanic links). Setting the * buffer size too big consumes too much real kernel memory, * especially with many connections on busy servers. * * The criteria to step up the send buffer one notch are: * 1. receive window of remote host is larger than send buffer * (with a fudge factor of 5/4th); * 2. send buffer is filled to 7/8th with data (so we actually * have data to make use of it); * 3. send buffer fill has not hit maximal automatic size; * 4. our send window (slow start and cogestion controlled) is * larger than sent but unacknowledged data in send buffer. * * The remote host receive window scaling factor may limit the * growing of the send buffer before it reaches its allowed * maximum. * * It scales directly with slow start or congestion window * and does at most one step per received ACK. This fast * scaling has the drawback of growing the send buffer beyond * what is strictly necessary to make full use of a given * delay*bandwidth product. However testing has shown this not * to be much of an problem. At worst we are trading wasting * of available bandwidth (the non-use of it) for wasting some * socket buffer memory. * * TODO: Shrink send buffer during idle periods together * with congestion window. Requires another timer. Has to * wait for upcoming tcp timer rewrite. * * XXXGL: should there be used sbused() or sbavail()? */ if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) { if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat && sbused(&so->so_snd) >= (so->so_snd.sb_hiwat / 8 * 7) && sbused(&so->so_snd) < V_tcp_autosndbuf_max && sendwin >= (sbused(&so->so_snd) - (tp->snd_nxt - tp->snd_una))) { if (!sbreserve_locked(&so->so_snd, min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc, V_tcp_autosndbuf_max), so, curthread)) so->so_snd.sb_flags &= ~SB_AUTOSIZE; } } /* * Decide if we can use TCP Segmentation Offloading (if supported by * hardware). * * TSO may only be used if we are in a pure bulk sending state. The * presence of TCP-MD5, SACK retransmits, SACK advertizements and * IP options prevent using TSO. With TSO the TCP header is the same * (except for the sequence number) for all generated packets. This * makes it impossible to transmit any options which vary per generated * segment or packet. * * IPv4 handling has a clear separation of ip options and ip header * flags while IPv6 combines both in in6p_outputopts. ip6_optlen() does * the right thing below to provide length of just ip options and thus * checking for ipoptlen is enough to decide if ip options are present. */ -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Pre-calculate here as we save another lookup into the darknesses * of IPsec that way and can actually decide if TSO is ok. */ - ipsec_optlen = ipsec_hdrsiz_tcp(tp); +#ifdef INET6 + if (isipv6 && IPSEC_ENABLED(ipv6)) + ipsec_optlen = IPSEC_HDRSIZE(ipv6, tp->t_inpcb); +#ifdef INET + else #endif - +#endif /* INET6 */ +#ifdef INET + if (IPSEC_ENABLED(ipv4)) + ipsec_optlen = IPSEC_HDRSIZE(ipv4, tp->t_inpcb); +#endif /* INET */ +#endif /* IPSEC */ #ifdef INET6 if (isipv6) ipoptlen = ip6_optlen(tp->t_inpcb); else #endif if (tp->t_inpcb->inp_options) ipoptlen = tp->t_inpcb->inp_options->m_len - offsetof(struct ipoption, ipopt_list); else ipoptlen = 0; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) ipoptlen += ipsec_optlen; #endif if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg && ((tp->t_flags & TF_SIGNATURE) == 0) && tp->rcv_numsacks == 0 && sack_rxmit == 0 && ipoptlen == 0) tso = 1; if (sack_rxmit) { if (SEQ_LT(p->rxmit + len, tp->snd_una + sbused(&so->so_snd))) flags &= ~TH_FIN; } else { if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + sbused(&so->so_snd))) flags &= ~TH_FIN; } recwin = lmin(lmax(sbspace(&so->so_rcv), 0), (long)TCP_MAXWIN << tp->rcv_scale); /* * Sender silly window avoidance. We transmit under the following * conditions when len is non-zero: * * - We have a full segment (or more with TSO) * - This is the last buffer in a write()/send() and we are * either idle or running NODELAY * - we've timed out (e.g. persist timer) * - we have more then 1/2 the maximum send window's worth of * data (receiver may be limited the window size) * - we need to retransmit */ if (len) { if (len >= tp->t_maxseg) goto send; /* * NOTE! on localhost connections an 'ack' from the remote * end may occur synchronously with the output and cause * us to flush a buffer queued with moretocome. XXX * * note: the len + off check is almost certainly unnecessary. */ if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */ (idle || (tp->t_flags & TF_NODELAY)) && (uint32_t)len + (uint32_t)off >= sbavail(&so->so_snd) && (tp->t_flags & TF_NOPUSH) == 0) { goto send; } if (tp->t_flags & TF_FORCEDATA) /* typ. timeout case */ goto send; if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) goto send; if (SEQ_LT(tp->snd_nxt, tp->snd_max)) /* retransmit case */ goto send; if (sack_rxmit) goto send; } /* * Sending of standalone window updates. * * Window updates are important when we close our window due to a * full socket buffer and are opening it again after the application * reads data from it. Once the window has opened again and the * remote end starts to send again the ACK clock takes over and * provides the most current window information. * * We must avoid the silly window syndrome whereas every read * from the receive buffer, no matter how small, causes a window * update to be sent. We also should avoid sending a flurry of * window updates when the socket buffer had queued a lot of data * and the application is doing small reads. * * Prevent a flurry of pointless window updates by only sending * an update when we can increase the advertized window by more * than 1/4th of the socket buffer capacity. When the buffer is * getting full or is very small be more aggressive and send an * update whenever we can increase by two mss sized segments. * In all other situations the ACK's to new incoming data will * carry further window increases. * * Don't send an independent window update if a delayed * ACK is pending (it will get piggy-backed on it) or the * remote side already has done a half-close and won't send * more data. Skip this if the connection is in T/TCP * half-open state. */ if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) && !(tp->t_flags & TF_DELACK) && !TCPS_HAVERCVDFIN(tp->t_state)) { /* * "adv" is the amount we could increase the window, * taking into account that we are limited by * TCP_MAXWIN << tp->rcv_scale. */ int32_t adv; int oldwin; adv = recwin; if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) { oldwin = (tp->rcv_adv - tp->rcv_nxt); adv -= oldwin; } else oldwin = 0; /* * If the new window size ends up being the same as or less * than the old size when it is scaled, then don't force * a window update. */ if (oldwin >> tp->rcv_scale >= (adv + oldwin) >> tp->rcv_scale) goto dontupdate; if (adv >= (int32_t)(2 * tp->t_maxseg) && (adv >= (int32_t)(so->so_rcv.sb_hiwat / 4) || recwin <= (so->so_rcv.sb_hiwat / 8) || so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg)) goto send; } dontupdate: /* * Send if we owe the peer an ACK, RST, SYN, or urgent data. ACKNOW * is also a catch-all for the retransmit timer timeout case. */ if (tp->t_flags & TF_ACKNOW) goto send; if ((flags & TH_RST) || ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) goto send; if (SEQ_GT(tp->snd_up, tp->snd_una)) goto send; /* * If our state indicates that FIN should be sent * and we have not yet done so, then we need to send. */ if (flags & TH_FIN && ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) goto send; /* * In SACK, it is possible for tcp_output to fail to send a segment * after the retransmission timer has been turned off. Make sure * that the retransmission timer is set. */ if ((tp->t_flags & TF_SACK_PERMIT) && SEQ_GT(tp->snd_max, tp->snd_una) && !tcp_timer_active(tp, TT_REXMT) && !tcp_timer_active(tp, TT_PERSIST)) { tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); goto just_return; } /* * TCP window updates are not reliable, rather a polling protocol * using ``persist'' packets is used to insure receipt of window * updates. The three ``states'' for the output side are: * idle not doing retransmits or persists * persisting to move a small or zero window * (re)transmitting and thereby not persisting * * tcp_timer_active(tp, TT_PERSIST) * is true when we are in persist state. * (tp->t_flags & TF_FORCEDATA) * is set when we are called to send a persist packet. * tcp_timer_active(tp, TT_REXMT) * is set when we are retransmitting * The output side is idle when both timers are zero. * * If send window is too small, there is data to transmit, and no * retransmit or persist is pending, then go to persist state. * If nothing happens soon, send when timer expires: * if window is nonzero, transmit what we can, * otherwise force out a byte. */ if (sbavail(&so->so_snd) && !tcp_timer_active(tp, TT_REXMT) && !tcp_timer_active(tp, TT_PERSIST)) { tp->t_rxtshift = 0; tcp_setpersist(tp); } /* * No reason to send a segment, just return. */ just_return: SOCKBUF_UNLOCK(&so->so_snd); return (0); send: SOCKBUF_LOCK_ASSERT(&so->so_snd); if (len > 0) { if (len >= tp->t_maxseg) tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT; else tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT; } /* * Before ESTABLISHED, force sending of initial options * unless TCP set not to do any options. * NOTE: we assume that the IP/TCP header plus TCP options * always fit in a single mbuf, leaving room for a maximum * link header, i.e. * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES */ optlen = 0; #ifdef INET6 if (isipv6) hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr); else #endif hdrlen = sizeof (struct tcpiphdr); /* * Compute options for segment. * We only have to care about SYN and established connection * segments. Options for SYN-ACK segments are handled in TCP * syncache. */ to.to_flags = 0; if ((tp->t_flags & TF_NOOPT) == 0) { /* Maximum segment size. */ if (flags & TH_SYN) { tp->snd_nxt = tp->iss; to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc); to.to_flags |= TOF_MSS; #ifdef TCP_RFC7413 /* * Only include the TFO option on the first * transmission of the SYN|ACK on a * passively-created TFO socket, as the presence of * the TFO option may have caused the original * SYN|ACK to have been dropped by a middlebox. */ if (IS_FASTOPEN(tp->t_flags) && (tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift == 0)) { to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN; to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie; to.to_flags |= TOF_FASTOPEN; } #endif } /* Window scaling. */ if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) { to.to_wscale = tp->request_r_scale; to.to_flags |= TOF_SCALE; } /* Timestamps. */ if ((tp->t_flags & TF_RCVD_TSTMP) || ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) { to.to_tsval = tcp_ts_getticks() + tp->ts_offset; to.to_tsecr = tp->ts_recent; to.to_flags |= TOF_TS; /* Set receive buffer autosizing timestamp. */ if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE)) tp->rfbuf_ts = tcp_ts_getticks(); } /* Selective ACK's. */ if (tp->t_flags & TF_SACK_PERMIT) { if (flags & TH_SYN) to.to_flags |= TOF_SACKPERM; else if (TCPS_HAVEESTABLISHED(tp->t_state) && (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) { to.to_flags |= TOF_SACK; to.to_nsacks = tp->rcv_numsacks; to.to_sacks = (u_char *)tp->sackblks; } } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* TCP-MD5 (RFC2385). */ + /* + * Check that TCP_MD5SIG is enabled in tcpcb to + * account the size needed to set this TCP option. + */ if (tp->t_flags & TF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif /* TCP_SIGNATURE */ /* Processing the options. */ hdrlen += optlen = tcp_addoptions(&to, opt); } /* * Adjust data length if insertion of options will * bump the packet length beyond the t_maxseg length. * Clear the FIN bit because we cut off the tail of * the segment. */ if (len + optlen + ipoptlen > tp->t_maxseg) { flags &= ~TH_FIN; if (tso) { u_int if_hw_tsomax; u_int if_hw_tsomaxsegcount; u_int if_hw_tsomaxsegsize; struct mbuf *mb; u_int moff; int max_len; /* extract TSO information */ if_hw_tsomax = tp->t_tsomax; if_hw_tsomaxsegcount = tp->t_tsomaxsegcount; if_hw_tsomaxsegsize = tp->t_tsomaxsegsize; /* * Limit a TSO burst to prevent it from * overflowing or exceeding the maximum length * allowed by the network interface: */ KASSERT(ipoptlen == 0, ("%s: TSO can't do IP options", __func__)); /* * Check if we should limit by maximum payload * length: */ if (if_hw_tsomax != 0) { /* compute maximum TSO length */ max_len = (if_hw_tsomax - hdrlen - max_linkhdr); if (max_len <= 0) { len = 0; } else if (len > max_len) { sendalot = 1; len = max_len; } } /* * Check if we should limit by maximum segment * size and count: */ if (if_hw_tsomaxsegcount != 0 && if_hw_tsomaxsegsize != 0) { /* * Subtract one segment for the LINK * and TCP/IP headers mbuf that will * be prepended to this mbuf chain * after the code in this section * limits the number of mbufs in the * chain to if_hw_tsomaxsegcount. */ if_hw_tsomaxsegcount -= 1; max_len = 0; mb = sbsndmbuf(&so->so_snd, off, &moff); while (mb != NULL && max_len < len) { u_int mlen; u_int frags; /* * Get length of mbuf fragment * and how many hardware frags, * rounded up, it would use: */ mlen = (mb->m_len - moff); frags = howmany(mlen, if_hw_tsomaxsegsize); /* Handle special case: Zero Length Mbuf */ if (frags == 0) frags = 1; /* * Check if the fragment limit * will be reached or exceeded: */ if (frags >= if_hw_tsomaxsegcount) { max_len += min(mlen, if_hw_tsomaxsegcount * if_hw_tsomaxsegsize); break; } max_len += mlen; if_hw_tsomaxsegcount -= frags; moff = 0; mb = mb->m_next; } if (max_len <= 0) { len = 0; } else if (len > max_len) { sendalot = 1; len = max_len; } } /* * Prevent the last segment from being * fractional unless the send sockbuf can be * emptied: */ max_len = (tp->t_maxseg - optlen); if (((uint32_t)off + (uint32_t)len) < sbavail(&so->so_snd)) { moff = len % max_len; if (moff != 0) { len -= moff; sendalot = 1; } } /* * In case there are too many small fragments * don't use TSO: */ if (len <= max_len) { len = max_len; sendalot = 1; tso = 0; } /* * Send the FIN in a separate segment * after the bulk sending is done. * We don't trust the TSO implementations * to clear the FIN flag on all but the * last segment. */ if (tp->t_flags & TF_NEEDFIN) sendalot = 1; } else { len = tp->t_maxseg - optlen - ipoptlen; sendalot = 1; } } else tso = 0; KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET, ("%s: len > IP_MAXPACKET", __func__)); /*#ifdef DIAGNOSTIC*/ #ifdef INET6 if (max_linkhdr + hdrlen > MCLBYTES) #else if (max_linkhdr + hdrlen > MHLEN) #endif panic("tcphdr too big"); /*#endif*/ /* * This KASSERT is here to catch edge cases at a well defined place. * Before, those had triggered (random) panic conditions further down. */ KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__)); /* * Grab a header mbuf, attaching a copy of data to * be transmitted, and initialize the header from * the template for sends on this connection. */ if (len) { struct mbuf *mb; u_int moff; if ((tp->t_flags & TF_FORCEDATA) && len == 1) TCPSTAT_INC(tcps_sndprobe); else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) { tp->t_sndrexmitpack++; TCPSTAT_INC(tcps_sndrexmitpack); TCPSTAT_ADD(tcps_sndrexmitbyte, len); } else { TCPSTAT_INC(tcps_sndpack); TCPSTAT_ADD(tcps_sndbyte, len); } #ifdef INET6 if (MHLEN < hdrlen + max_linkhdr) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else #endif m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOBUFS; sack_rxmit = 0; goto out; } m->m_data += max_linkhdr; m->m_len = hdrlen; /* * Start the m_copy functions from the closest mbuf * to the offset in the socket buffer chain. */ mb = sbsndptr(&so->so_snd, off, len, &moff); if (len <= MHLEN - hdrlen - max_linkhdr) { m_copydata(mb, moff, len, mtod(m, caddr_t) + hdrlen); m->m_len += len; } else { m->m_next = m_copym(mb, moff, len, M_NOWAIT); if (m->m_next == NULL) { SOCKBUF_UNLOCK(&so->so_snd); (void) m_free(m); error = ENOBUFS; sack_rxmit = 0; goto out; } } /* * If we're sending everything we've got, set PUSH. * (This will keep happy those implementations which only * give data to the user when a buffer fills or * a PUSH comes in.) */ if (((uint32_t)off + (uint32_t)len == sbused(&so->so_snd)) && !(flags & TH_SYN)) flags |= TH_PUSH; SOCKBUF_UNLOCK(&so->so_snd); } else { SOCKBUF_UNLOCK(&so->so_snd); if (tp->t_flags & TF_ACKNOW) TCPSTAT_INC(tcps_sndacks); else if (flags & (TH_SYN|TH_FIN|TH_RST)) TCPSTAT_INC(tcps_sndctrl); else if (SEQ_GT(tp->snd_up, tp->snd_una)) TCPSTAT_INC(tcps_sndurg); else TCPSTAT_INC(tcps_sndwinup); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { error = ENOBUFS; sack_rxmit = 0; goto out; } #ifdef INET6 if (isipv6 && (MHLEN < hdrlen + max_linkhdr) && MHLEN >= hdrlen) { M_ALIGN(m, hdrlen); } else #endif m->m_data += max_linkhdr; m->m_len = hdrlen; } SOCKBUF_UNLOCK_ASSERT(&so->so_snd); m->m_pkthdr.rcvif = (struct ifnet *)0; #ifdef MAC mac_inpcb_create_mbuf(tp->t_inpcb, m); #endif #ifdef INET6 if (isipv6) { ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)(ip6 + 1); tcpip_fillheaders(tp->t_inpcb, ip6, th); } else #endif /* INET6 */ { ip = mtod(m, struct ip *); ipov = (struct ipovly *)ip; th = (struct tcphdr *)(ip + 1); tcpip_fillheaders(tp->t_inpcb, ip, th); } /* * Fill in fields, remembering maximum advertised * window for use in delaying messages about window sizes. * If resending a FIN, be sure not to use a new sequence number. */ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && tp->snd_nxt == tp->snd_max) tp->snd_nxt--; /* * If we are starting a connection, send ECN setup * SYN packet. If we are on a retransmit, we may * resend those bits a number of times as per * RFC 3168. */ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) { if (tp->t_rxtshift >= 1) { if (tp->t_rxtshift <= V_tcp_ecn_maxretries) flags |= TH_ECE|TH_CWR; } else flags |= TH_ECE|TH_CWR; } if (tp->t_state == TCPS_ESTABLISHED && (tp->t_flags & TF_ECN_PERMIT)) { /* * If the peer has ECN, mark data packets with * ECN capable transmission (ECT). * Ignore pure ack packets, retransmissions and window probes. */ if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && !((tp->t_flags & TF_FORCEDATA) && len == 1)) { #ifdef INET6 if (isipv6) ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); else #endif ip->ip_tos |= IPTOS_ECN_ECT0; TCPSTAT_INC(tcps_ecn_ect0); } /* * Reply with proper ECN notifications. */ if (tp->t_flags & TF_ECN_SND_CWR) { flags |= TH_CWR; tp->t_flags &= ~TF_ECN_SND_CWR; } if (tp->t_flags & TF_ECN_SND_ECE) flags |= TH_ECE; } /* * If we are doing retransmissions, then snd_nxt will * not reflect the first unsent octet. For ACK only * packets, we do not want the sequence number of the * retransmitted packet, we want the sequence number * of the next unsent octet. So, if there is no data * (and no SYN or FIN), use snd_max instead of snd_nxt * when filling in ti_seq. But if we are in persist * state, snd_max might reflect one byte beyond the * right edge of the window, so use snd_nxt in that * case, since we know we aren't doing a retransmission. * (retransmit and persist are mutually exclusive...) */ if (sack_rxmit == 0) { if (len || (flags & (TH_SYN|TH_FIN)) || tcp_timer_active(tp, TT_PERSIST)) th->th_seq = htonl(tp->snd_nxt); else th->th_seq = htonl(tp->snd_max); } else { th->th_seq = htonl(p->rxmit); p->rxmit += len; tp->sackhint.sack_bytes_rexmit += len; } th->th_ack = htonl(tp->rcv_nxt); if (optlen) { bcopy(opt, th + 1, optlen); th->th_off = (sizeof (struct tcphdr) + optlen) >> 2; } th->th_flags = flags; /* * Calculate receive window. Don't shrink window, * but avoid silly window syndrome. */ if (recwin < (so->so_rcv.sb_hiwat / 4) && recwin < tp->t_maxseg) recwin = 0; if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) && recwin < (tp->rcv_adv - tp->rcv_nxt)) recwin = (tp->rcv_adv - tp->rcv_nxt); /* * According to RFC1323 the window field in a SYN (i.e., a * or ) segment itself is never scaled. The * case is handled in syncache. */ if (flags & TH_SYN) th->th_win = htons((u_short) (min(sbspace(&so->so_rcv), TCP_MAXWIN))); else th->th_win = htons((u_short)(recwin >> tp->rcv_scale)); /* * Adjust the RXWIN0SENT flag - indicate that we have advertised * a 0 window. This may cause the remote transmitter to stall. This * flag tells soreceive() to disable delayed acknowledgements when * draining the buffer. This can occur if the receiver is attempting * to read more data than can be buffered prior to transmitting on * the connection. */ if (th->th_win == 0) { tp->t_sndzerowin++; tp->t_flags |= TF_RXWIN0SENT; } else tp->t_flags &= ~TF_RXWIN0SENT; if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); th->th_flags |= TH_URG; } else /* * If no urgent pointer to send, then we pull * the urgent pointer to the left edge of the send window * so that it doesn't drift into the send window on sequence * number wraparound. */ tp->snd_up = tp->snd_una; /* drag it along */ -#ifdef TCP_SIGNATURE - if (to.to_flags & TOF_SIGNATURE) { - int sigoff = to.to_signature - opt; - tcp_signature_compute(m, 0, len, optlen, - (u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND); - } -#endif - /* * Put TCP length in extended header, and then * checksum extended header and data. */ m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (to.to_flags & TOF_SIGNATURE) { + /* + * Calculate MD5 signature and put it into the place + * determined before. + * NOTE: since TCP options buffer doesn't point into + * mbuf's data, calculate offset and use it. + */ + if (!TCPMD5_ENABLED() || TCPMD5_OUTPUT(m, th, + (u_char *)(th + 1) + (to.to_signature - opt)) != 0) { + /* + * Do not send segment if the calculation of MD5 + * digest has failed. + */ + goto out; + } + } +#endif #ifdef INET6 if (isipv6) { /* * There is no need to fill in ip6_plen right now. * It will be filled later by ip6_output. */ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP, 0); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET { m->m_pkthdr.csum_flags = CSUM_TCP; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen)); /* IP version must be set here for ipv4/ipv6 checking later */ KASSERT(ip->ip_v == IPVERSION, ("%s: IP version incorrect: %d", __func__, ip->ip_v)); } #endif /* * Enable TSO and specify the size of the segments. * The TCP pseudo header checksum is always provided. */ if (tso) { KASSERT(len > tp->t_maxseg - optlen, ("%s: len <= tso_segsz", __func__)); m->m_pkthdr.csum_flags |= CSUM_TSO; m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen; } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL), ("%s: mbuf chain shorter than expected: %d + %u + %u - %u != %u", __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL))); #else KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL), ("%s: mbuf chain shorter than expected: %d + %u + %u != %u", __func__, len, hdrlen, ipoptlen, m_length(m, NULL))); #endif #ifdef TCP_HHOOK /* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */ hhook_run_tcp_est_out(tp, th, &to, len, tso); #endif #ifdef TCPDEBUG /* * Trace. */ if (so->so_options & SO_DEBUG) { u_short save = 0; #ifdef INET6 if (!isipv6) #endif { save = ipov->ih_len; ipov->ih_len = htons(m->m_pkthdr.len /* - hdrlen + (th->th_off << 2) */); } tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, void *), th, 0); #ifdef INET6 if (!isipv6) #endif ipov->ih_len = save; } #endif /* TCPDEBUG */ TCP_PROBE3(debug__output, tp, th, m); /* * Fill in IP length and desired time to live and * send to IP level. There should be a better way * to handle ttl and tos; we could keep them in * the template, but need a way to checksum without them. */ /* * m->m_pkthdr.len should have been set before checksum calculation, * because in6_cksum() need it. */ #ifdef INET6 if (isipv6) { struct route_in6 ro; bzero(&ro, sizeof(ro)); /* * we separately set hoplimit for every segment, since the * user might want to change the value via setsockopt. * Also, desired default hop limit might be changed via * Neighbor Discovery. */ ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL); /* * Set the packet size here for the benefit of DTrace probes. * ip6_output() will set it properly; it's supposed to include * the option header lengths as well. */ ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) tp->t_flags2 |= TF2_PLPMTU_PMTUD; else tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; if (tp->t_state == TCPS_SYN_SENT) TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th); TCP_PROBE5(send, NULL, tp, ip6, tp, th); #ifdef TCPPCAP /* Save packet, if requested. */ tcp_pcap_add(th, m, &(tp->t_outpkts)); #endif /* TODO: IPv6 IP6TOS_ECT bit on */ error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), NULL, NULL, tp->t_inpcb); if (error == EMSGSIZE && ro.ro_rt != NULL) mtu = ro.ro_rt->rt_mtu; RO_RTFREE(&ro); } #endif /* INET6 */ #if defined(INET) && defined(INET6) else #endif #ifdef INET { ip->ip_len = htons(m->m_pkthdr.len); #ifdef INET6 if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO) ip->ip_ttl = in6_selecthlim(tp->t_inpcb, NULL); #endif /* INET6 */ /* * If we do path MTU discovery, then we set DF on every packet. * This might not be the best thing to do according to RFC3390 * Section 2. However the tcp hostcache migitates the problem * so it affects only the first tcp connection with a host. * * NB: Don't set DF on small MTU/MSS to have a safe fallback. */ if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) { ip->ip_off |= htons(IP_DF); tp->t_flags2 |= TF2_PLPMTU_PMTUD; } else { tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; } if (tp->t_state == TCPS_SYN_SENT) TCP_PROBE5(connect__request, NULL, tp, ip, tp, th); TCP_PROBE5(send, NULL, tp, ip, tp, th); #ifdef TCPPCAP /* Save packet, if requested. */ tcp_pcap_add(th, m, &(tp->t_outpkts)); #endif error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, tp->t_inpcb); if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_rt != NULL) mtu = tp->t_inpcb->inp_route.ro_rt->rt_mtu; } #endif /* INET */ out: /* * In transmit state, time the transmission and arrange for * the retransmit. In persist state, just set snd_max. */ if ((tp->t_flags & TF_FORCEDATA) == 0 || !tcp_timer_active(tp, TT_PERSIST)) { tcp_seq startseq = tp->snd_nxt; /* * Advance snd_nxt over sequence space of this segment. */ if (flags & (TH_SYN|TH_FIN)) { if (flags & TH_SYN) tp->snd_nxt++; if (flags & TH_FIN) { tp->snd_nxt++; tp->t_flags |= TF_SENTFIN; } } if (sack_rxmit) goto timer; tp->snd_nxt += len; if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { tp->snd_max = tp->snd_nxt; /* * Time this transmission if not a retransmission and * not currently timing anything. */ if (tp->t_rtttime == 0) { tp->t_rtttime = ticks; tp->t_rtseq = startseq; TCPSTAT_INC(tcps_segstimed); } } /* * Set retransmit timer if not currently set, * and not doing a pure ack or a keep-alive probe. * Initial value for retransmit timer is smoothed * round-trip time + 2 * round-trip time variance. * Initialize shift counter which is used for backoff * of retransmit time. */ timer: if (!tcp_timer_active(tp, TT_REXMT) && ((sack_rxmit && tp->snd_nxt != tp->snd_max) || (tp->snd_nxt != tp->snd_una))) { if (tcp_timer_active(tp, TT_PERSIST)) { tcp_timer_activate(tp, TT_PERSIST, 0); tp->t_rxtshift = 0; } tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); } else if (len == 0 && sbavail(&so->so_snd) && !tcp_timer_active(tp, TT_REXMT) && !tcp_timer_active(tp, TT_PERSIST)) { /* * Avoid a situation where we do not set persist timer * after a zero window condition. For example: * 1) A -> B: packet with enough data to fill the window * 2) B -> A: ACK for #1 + new data (0 window * advertisement) * 3) A -> B: ACK for #2, 0 len packet * * In this case, A will not activate the persist timer, * because it chose to send a packet. Unless tcp_output * is called for some other reason (delayed ack timer, * another input packet from B, socket syscall), A will * not send zero window probes. * * So, if you send a 0-length packet, but there is data * in the socket buffer, and neither the rexmt or * persist timer is already set, then activate the * persist timer. */ tp->t_rxtshift = 0; tcp_setpersist(tp); } } else { /* * Persist case, update snd_max but since we are in * persist mode (no window) we do not update snd_nxt. */ int xlen = len; if (flags & TH_SYN) ++xlen; if (flags & TH_FIN) { ++xlen; tp->t_flags |= TF_SENTFIN; } if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max)) tp->snd_max = tp->snd_nxt + xlen; } if (error) { /* * We know that the packet was lost, so back out the * sequence number advance, if any. * * If the error is EPERM the packet got blocked by the * local firewall. Normally we should terminate the * connection but the blocking may have been spurious * due to a firewall reconfiguration cycle. So we treat * it like a packet loss and let the retransmit timer and * timeouts do their work over time. * XXX: It is a POLA question whether calling tcp_drop right * away would be the really correct behavior instead. */ if (((tp->t_flags & TF_FORCEDATA) == 0 || !tcp_timer_active(tp, TT_PERSIST)) && ((flags & TH_SYN) == 0) && (error != EPERM)) { if (sack_rxmit) { p->rxmit -= len; tp->sackhint.sack_bytes_rexmit -= len; KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, ("sackhint bytes rtx >= 0")); } else tp->snd_nxt -= len; } SOCKBUF_UNLOCK_ASSERT(&so->so_snd); /* Check gotos. */ switch (error) { + case EACCES: + tp->t_softerror = error; + return (0); case EPERM: tp->t_softerror = error; return (error); case ENOBUFS: TCP_XMIT_TIMER_ASSERT(tp, len, flags); tp->snd_cwnd = tp->t_maxseg; return (0); case EMSGSIZE: /* * For some reason the interface we used initially * to send segments changed to another or lowered * its MTU. * If TSO was active we either got an interface * without TSO capabilits or TSO was turned off. * If we obtained mtu from ip_output() then update * it and try again. */ if (tso) tp->t_flags &= ~TF_TSO; if (mtu != 0) { tcp_mss_update(tp, -1, mtu, NULL, NULL); goto again; } return (error); case EHOSTDOWN: case EHOSTUNREACH: case ENETDOWN: case ENETUNREACH: if (TCPS_HAVERCVDSYN(tp->t_state)) { tp->t_softerror = error; return (0); } /* FALLTHROUGH */ default: return (error); } } TCPSTAT_INC(tcps_sndtotal); /* * Data sent (as far as we can tell). * If this advertises a larger window than any other segment, * then remember the size of the advertised window. * Any pending ACK has now been sent. */ if (SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv)) tp->rcv_adv = tp->rcv_nxt + recwin; tp->last_ack_sent = tp->rcv_nxt; tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); if (tcp_timer_active(tp, TT_DELACK)) tcp_timer_activate(tp, TT_DELACK, 0); #if 0 /* * This completely breaks TCP if newreno is turned on. What happens * is that if delayed-acks are turned on on the receiver, this code * on the transmitter effectively destroys the TCP window, forcing * it to four packets (1.5Kx4 = 6K window). */ if (sendalot && --maxburst) goto again; #endif if (sendalot) goto again; return (0); } void tcp_setpersist(struct tcpcb *tp) { int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; int tt; tp->t_flags &= ~TF_PREVVALID; if (tcp_timer_active(tp, TT_REXMT)) panic("tcp_setpersist: retransmit pending"); /* * Start/restart persistence timer. */ TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift], tcp_persmin, tcp_persmax); tcp_timer_activate(tp, TT_PERSIST, tt); if (tp->t_rxtshift < TCP_MAXRXTSHIFT) tp->t_rxtshift++; } /* * Insert TCP options according to the supplied parameters to the place * optp in a consistent way. Can handle unaligned destinations. * * The order of the option processing is crucial for optimal packing and * alignment for the scarce option space. * * The optimal order for a SYN/SYN-ACK segment is: * MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) + * Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40. * * The SACK options should be last. SACK blocks consume 8*n+2 bytes. * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks). * At minimum we need 10 bytes (to generate 1 SACK block). If both * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present, * we only have 10 bytes for SACK options (40 - (12 + 18)). */ int tcp_addoptions(struct tcpopt *to, u_char *optp) { u_int32_t mask, optlen = 0; for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) { if ((to->to_flags & mask) != mask) continue; if (optlen == TCP_MAXOLEN) break; switch (to->to_flags & mask) { case TOF_MSS: while (optlen % 4) { optlen += TCPOLEN_NOP; *optp++ = TCPOPT_NOP; } if (TCP_MAXOLEN - optlen < TCPOLEN_MAXSEG) continue; optlen += TCPOLEN_MAXSEG; *optp++ = TCPOPT_MAXSEG; *optp++ = TCPOLEN_MAXSEG; to->to_mss = htons(to->to_mss); bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss)); optp += sizeof(to->to_mss); break; case TOF_SCALE: while (!optlen || optlen % 2 != 1) { optlen += TCPOLEN_NOP; *optp++ = TCPOPT_NOP; } if (TCP_MAXOLEN - optlen < TCPOLEN_WINDOW) continue; optlen += TCPOLEN_WINDOW; *optp++ = TCPOPT_WINDOW; *optp++ = TCPOLEN_WINDOW; *optp++ = to->to_wscale; break; case TOF_SACKPERM: while (optlen % 2) { optlen += TCPOLEN_NOP; *optp++ = TCPOPT_NOP; } if (TCP_MAXOLEN - optlen < TCPOLEN_SACK_PERMITTED) continue; optlen += TCPOLEN_SACK_PERMITTED; *optp++ = TCPOPT_SACK_PERMITTED; *optp++ = TCPOLEN_SACK_PERMITTED; break; case TOF_TS: while (!optlen || optlen % 4 != 2) { optlen += TCPOLEN_NOP; *optp++ = TCPOPT_NOP; } if (TCP_MAXOLEN - optlen < TCPOLEN_TIMESTAMP) continue; optlen += TCPOLEN_TIMESTAMP; *optp++ = TCPOPT_TIMESTAMP; *optp++ = TCPOLEN_TIMESTAMP; to->to_tsval = htonl(to->to_tsval); to->to_tsecr = htonl(to->to_tsecr); bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval)); optp += sizeof(to->to_tsval); bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr)); optp += sizeof(to->to_tsecr); break; -#ifdef TCP_SIGNATURE case TOF_SIGNATURE: { int siglen = TCPOLEN_SIGNATURE - 2; while (!optlen || optlen % 4 != 2) { optlen += TCPOLEN_NOP; *optp++ = TCPOPT_NOP; } - if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE) + if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE) { + to->to_flags &= ~TOF_SIGNATURE; continue; + } optlen += TCPOLEN_SIGNATURE; *optp++ = TCPOPT_SIGNATURE; *optp++ = TCPOLEN_SIGNATURE; to->to_signature = optp; while (siglen--) *optp++ = 0; break; } -#endif case TOF_SACK: { int sackblks = 0; struct sackblk *sack = (struct sackblk *)to->to_sacks; tcp_seq sack_seq; while (!optlen || optlen % 4 != 2) { optlen += TCPOLEN_NOP; *optp++ = TCPOPT_NOP; } if (TCP_MAXOLEN - optlen < TCPOLEN_SACKHDR + TCPOLEN_SACK) continue; optlen += TCPOLEN_SACKHDR; *optp++ = TCPOPT_SACK; sackblks = min(to->to_nsacks, (TCP_MAXOLEN - optlen) / TCPOLEN_SACK); *optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK; while (sackblks--) { sack_seq = htonl(sack->start); bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq)); optp += sizeof(sack_seq); sack_seq = htonl(sack->end); bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq)); optp += sizeof(sack_seq); optlen += TCPOLEN_SACK; sack++; } TCPSTAT_INC(tcps_sack_send_blocks); break; } #ifdef TCP_RFC7413 case TOF_FASTOPEN: { int total_len; /* XXX is there any point to aligning this option? */ total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len; if (TCP_MAXOLEN - optlen < total_len) continue; *optp++ = TCPOPT_FAST_OPEN; *optp++ = total_len; if (to->to_tfo_len > 0) { bcopy(to->to_tfo_cookie, optp, to->to_tfo_len); optp += to->to_tfo_len; } optlen += total_len; break; } #endif default: panic("%s: unknown TCP option type", __func__); break; } } /* Terminate and pad TCP options to a 4 byte boundary. */ if (optlen % 4) { optlen += TCPOLEN_EOL; *optp++ = TCPOPT_EOL; } /* * According to RFC 793 (STD0007): * "The content of the header beyond the End-of-Option option * must be header padding (i.e., zero)." * and later: "The padding is composed of zeros." */ while (optlen % 4) { optlen += TCPOLEN_PAD; *optp++ = TCPOPT_PAD; } KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__)); return (optlen); } diff --git a/sys/netinet/tcp_stacks/fastpath.c b/sys/netinet/tcp_stacks/fastpath.c index 146fba36b88f..551433d36f13 100644 --- a/sys/netinet/tcp_stacks/fastpath.c +++ b/sys/netinet/tcp_stacks/fastpath.c @@ -1,2468 +1,2462 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * Copyright (c) 2007-2008,2010 * Swinburne University of Technology, Melbourne, Australia. * Copyright (c) 2009-2010 Lawrence Stewart * Copyright (c) 2010 The FreeBSD Foundation * Copyright (c) 2010-2011 Juniper Networks, Inc. * Copyright (c) 2015 Netflix Inc. * All rights reserved. * * Portions of this software were developed at the Centre for Advanced Internet * Architectures, Swinburne University of Technology, by Lawrence Stewart, * James Healy and David Hayes, made possible in part by a grant from the Cisco * University Research Program Fund at Community Foundation Silicon Valley. * * Portions of this software were developed at the Centre for Advanced * Internet Architectures, Swinburne University of Technology, Melbourne, * Australia by David Hayes under sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Portions of this software were developed by Randall R. Stewart while * working for Netflix Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" -#include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #include #include #include #ifdef TCP_HHOOK #include #endif #include #include #include /* for proc0 declaration */ #include #include #include #include #include #include #include #include #include /* before tcp_seq.h, for tcp_random18() */ #include #include #include #define TCPSTATES /* for logging */ #include #include #include #include #include #include /* required for icmp_var.h */ #include /* for ICMP_BANDLIM */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef TCPDEBUG #include #endif /* TCPDEBUG */ #ifdef TCP_OFFLOAD #include #endif -#ifdef IPSEC -#include -#include -#endif /*IPSEC*/ - #include #include VNET_DECLARE(int, tcp_autorcvbuf_inc); #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) VNET_DECLARE(int, tcp_autorcvbuf_max); #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) VNET_DECLARE(int, tcp_do_rfc3042); #define V_tcp_do_rfc3042 VNET(tcp_do_rfc3042) VNET_DECLARE(int, tcp_do_autorcvbuf); #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) VNET_DECLARE(int, tcp_insecure_rst); #define V_tcp_insecure_rst VNET(tcp_insecure_rst) VNET_DECLARE(int, tcp_insecure_syn); #define V_tcp_insecure_syn VNET(tcp_insecure_syn) static void tcp_do_segment_fastslow(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t, int); static void tcp_do_segment_fastack(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t, int); /* * Indicate whether this ack should be delayed. We can delay the ack if * following conditions are met: * - There is no delayed ack timer in progress. * - Our last ack wasn't a 0-sized window. We never want to delay * the ack that opens up a 0-sized window. * - LRO wasn't used for this segment. We make sure by checking that the * segment size is not larger than the MSS. */ #define DELAY_ACK(tp, tlen) \ ((!tcp_timer_active(tp, TT_DELACK) && \ (tp->t_flags & TF_RXWIN0SENT) == 0) && \ (tlen <= tp->t_maxseg) && \ (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) /* * So how is this faster than the normal fast ack? * It basically allows us to also stay in the fastpath * when a window-update ack also arrives. In testing * we saw only 25-30% of connections doing fastpath * due to the fact that along with moving forward * in sequence the window was also updated. */ static void tcp_do_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, int ti_locked, uint32_t tiwin) { int acked; uint16_t nsegs; int winup_only=0; nsegs = max(1, m->m_pkthdr.lro_nsegs); #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, * now IPv6. */ u_char tcp_saveipgen[IP6_HDR_LEN]; struct tcphdr tcp_savetcp; short ostate = 0; #endif /* * The following if statement will be true if * we are doing the win_up_in_fp * - We have more new data (SEQ_LT(tp->snd_wl1, th->th_seq)) * - No more new data, but we have an ack for new data * (tp->snd_wl1 == th->th_seq && SEQ_LT(tp->snd_wl2, th->th_ack)) * - No more new data, the same ack point but the window grew * (tp->snd_wl1 == th->th_seq && tp->snd_wl2 == th->th_ack && twin > tp->snd_wnd) */ if ((SEQ_LT(tp->snd_wl1, th->th_seq) || (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) || (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) { /* keep track of pure window updates */ if (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) { winup_only = 1; TCPSTAT_INC(tcps_rcvwinupd); } tp->snd_wnd = tiwin; tp->snd_wl1 = th->th_seq; tp->snd_wl2 = th->th_ack; if (tp->snd_wnd > tp->max_sndwnd) tp->max_sndwnd = tp->snd_wnd; } /* * If last ACK falls within this segment's sequence numbers, * record the timestamp. * NOTE that the test is modified according to the latest * proposal of the tcplw@cray.com list (Braden 1993/04/26). */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * This is a pure ack for outstanding data. */ if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); } ti_locked = TI_UNLOCKED; TCPSTAT_INC(tcps_predack); /* * "bad retransmit" recovery. */ if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && (int)(ticks - tp->t_badrxtwin) < 0) { cc_cong_signal(tp, th, CC_RTO_ERR); } /* * Recalculate the transmit timer / rtt. * * Some boxes send broken timestamp replies * during the SYN+ACK phase, ignore * timestamps of 0 or we could calculate a * huge RTT and blow up the retransmit timer. */ if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) { uint32_t t; t = tcp_ts_getticks() - to->to_tsecr; if (!tp->t_rttlow || tp->t_rttlow > t) tp->t_rttlow = t; tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1); } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) tp->t_rttlow = ticks - tp->t_rtttime; tcp_xmit_timer(tp, ticks - tp->t_rtttime); } if (winup_only == 0) { acked = BYTES_THIS_ACK(tp, th); #ifdef TCP_HHOOK /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ hhook_run_tcp_est_in(tp, th, to); #endif TCPSTAT_ADD(tcps_rcvackbyte, acked); sbdrop(&so->so_snd, acked); if (SEQ_GT(tp->snd_una, tp->snd_recover) && SEQ_LEQ(th->th_ack, tp->snd_recover)) tp->snd_recover = th->th_ack - 1; /* * Let the congestion control algorithm update * congestion control related information. This * typically means increasing the congestion * window. */ cc_ack_received(tp, th, nsegs, CC_ACK); tp->snd_una = th->th_ack; /* * Pull snd_wl2 up to prevent seq wrap relative * to th_ack. */ tp->snd_wl2 = th->th_ack; tp->t_dupacks = 0; /* * If all outstanding data are acked, stop * retransmit timer, otherwise restart timer * using current (possibly backed-off) value. * If process is waiting for space, * wakeup/selwakeup/signal. If data * are ready to send, let tcp_output * decide between more output or persist. */ #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); m_freem(m); if (tp->snd_una == tp->snd_max) tcp_timer_activate(tp, TT_REXMT, 0); else if (!tcp_timer_active(tp, TT_PERSIST)) tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); } else { /* * Window update only, just free the mbufs and * send out whatever we can. */ m_freem(m); } sowwakeup(so); if (sbavail(&so->so_snd)) (void) tcp_output(tp); KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { tp->t_flags &= ~TF_DELACK; tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); } INP_WUNLOCK(tp->t_inpcb); } /* * Here nothing is really faster, its just that we * have broken out the fast-data path also just like * the fast-ack. */ static void tcp_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, int ti_locked, uint32_t tiwin) { int newsize = 0; /* automatic sockbuf scaling */ #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, * now IPv6. */ u_char tcp_saveipgen[IP6_HDR_LEN]; struct tcphdr tcp_savetcp; short ostate = 0; #endif /* * If last ACK falls within this segment's sequence numbers, * record the timestamp. * NOTE that the test is modified according to the latest * proposal of the tcplw@cray.com list (Braden 1993/04/26). */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * This is a pure, in-sequence data packet with * nothing on the reassembly queue and we have enough * buffer space to take it. */ if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); } ti_locked = TI_UNLOCKED; /* Clean receiver SACK report if present */ if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) tcp_clean_sackreport(tp); TCPSTAT_INC(tcps_preddat); tp->rcv_nxt += tlen; /* * Pull snd_wl1 up to prevent seq wrap relative to * th_seq. */ tp->snd_wl1 = th->th_seq; /* * Pull rcv_up up to prevent seq wrap relative to * rcv_nxt. */ tp->rcv_up = tp->rcv_nxt; TCPSTAT_ADD(tcps_rcvbyte, tlen); #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); /* * Automatic sizing of receive socket buffer. Often the send * buffer size is not optimally adjusted to the actual network * conditions at hand (delay bandwidth product). Setting the * buffer size too small limits throughput on links with high * bandwidth and high delay (eg. trans-continental/oceanic links). * * On the receive side the socket buffer memory is only rarely * used to any significant extent. This allows us to be much * more aggressive in scaling the receive socket buffer. For * the case that the buffer space is actually used to a large * extent and we run out of kernel memory we can simply drop * the new segments; TCP on the sender will just retransmit it * later. Setting the buffer size too big may only consume too * much kernel memory if the application doesn't read() from * the socket or packet loss or reordering makes use of the * reassembly queue. * * The criteria to step up the receive buffer one notch are: * 1. Application has not set receive buffer size with * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE. * 2. the number of bytes received during the time it takes * one timestamp to be reflected back to us (the RTT); * 3. received bytes per RTT is within seven eighth of the * current socket buffer size; * 4. receive buffer size has not hit maximal automatic size; * * This algorithm does one step per RTT at most and only if * we receive a bulk stream w/o packet losses or reorderings. * Shrinking the buffer during idle times is not necessary as * it doesn't consume any memory when idle. * * TODO: Only step up if the application is actually serving * the buffer to better manage the socket buffer resources. */ if (V_tcp_do_autorcvbuf && (to->to_flags & TOF_TS) && to->to_tsecr && (so->so_rcv.sb_flags & SB_AUTOSIZE)) { if (TSTMP_GT(to->to_tsecr, tp->rfbuf_ts) && to->to_tsecr - tp->rfbuf_ts < hz) { if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) && so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) { newsize = min(so->so_rcv.sb_hiwat + V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); } /* Start over with next RTT. */ tp->rfbuf_ts = 0; tp->rfbuf_cnt = 0; } else tp->rfbuf_cnt += tlen; /* add up */ } /* Add data to socket buffer. */ SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { m_freem(m); } else { /* * Set new socket buffer size. * Give up when limit is reached. */ if (newsize) if (!sbreserve_locked(&so->so_rcv, newsize, so, NULL)) so->so_rcv.sb_flags &= ~SB_AUTOSIZE; m_adj(m, drop_hdrlen); /* delayed header drop */ sbappendstream_locked(&so->so_rcv, m, 0); } /* NB: sorwakeup_locked() does an implicit unlock. */ sorwakeup_locked(so); if (DELAY_ACK(tp, tlen)) { tp->t_flags |= TF_DELACK; } else { tp->t_flags |= TF_ACKNOW; tcp_output(tp); } KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { tp->t_flags &= ~TF_DELACK; tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); } INP_WUNLOCK(tp->t_inpcb); } /* * The slow-path is the clone of the long long part * of tcp_do_segment past all the fast-path stuff. We * use it here by two different callers, the fast/slow and * the fastack only. */ static void tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, int ti_locked, uint32_t tiwin, int thflags) { int acked, ourfinisacked, needoutput = 0; int rstreason, todrop, win; uint16_t nsegs; char *s; struct in_conninfo *inc; struct mbuf *mfree = NULL; nsegs = max(1, m->m_pkthdr.lro_nsegs); #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, * now IPv6. */ u_char tcp_saveipgen[IP6_HDR_LEN]; struct tcphdr tcp_savetcp; short ostate = 0; #endif /* * Calculate amount of space in receive window, * and then do TCP input processing. * Receive window is amount of space in rcv queue, * but not less than advertised window. */ inc = &tp->t_inpcb->inp_inc; win = sbspace(&so->so_rcv); if (win < 0) win = 0; tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); /* Reset receive buffer auto scaling when not in bulk receive mode. */ tp->rfbuf_ts = 0; tp->rfbuf_cnt = 0; switch (tp->t_state) { /* * If the state is SYN_RECEIVED: * if seg contains an ACK, but not for our SYN/ACK, send a RST. */ case TCPS_SYN_RECEIVED: if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } break; /* * If the state is SYN_SENT: * if seg contains an ACK, but not for our SYN, drop the input. * if seg contains a RST, then drop the connection. * if seg does not contain SYN, then drop it. * Otherwise this is an acceptable SYN segment * initialize tp->rcv_nxt and tp->irs * if seg contains ack then advance tp->snd_una * if seg contains an ECE and ECN support is enabled, the stream * is ECN capable. * if SYN has been acked change to ESTABLISHED else SYN_RCVD state * arrange for segment to be acked (eventually) * continue processing rest of data/controls, beginning with URG */ case TCPS_SYN_SENT: if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { rstreason = BANDLIM_UNLIMITED; goto dropwithreset; } if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) { TCP_PROBE5(connect__refused, NULL, tp, m, tp, th); tp = tcp_drop(tp, ECONNREFUSED); } if (thflags & TH_RST) goto drop; if (!(thflags & TH_SYN)) goto drop; tp->irs = th->th_seq; tcp_rcvseqinit(tp); if (thflags & TH_ACK) { TCPSTAT_INC(tcps_connects); soisconnected(so); #ifdef MAC mac_socketpeer_set_from_mbuf(m, so); #endif /* Do window scaling on this connection? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; } tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN << tp->rcv_scale); tp->snd_una++; /* SYN is acked */ /* * If there's data, delay ACK; if there's also a FIN * ACKNOW will be turned on later. */ if (DELAY_ACK(tp, tlen) && tlen != 0) tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); else tp->t_flags |= TF_ACKNOW; if ((thflags & TH_ECE) && V_tcp_do_ecn) { tp->t_flags |= TF_ECN_PERMIT; TCPSTAT_INC(tcps_ecn_shs); } /* * Received in SYN_SENT[*] state. * Transitions: * SYN_SENT --> ESTABLISHED * SYN_SENT* --> FIN_WAIT_1 */ tp->t_starttime = ticks; if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; thflags &= ~TH_SYN; } else { tcp_state_change(tp, TCPS_ESTABLISHED); TCP_PROBE5(connect__established, NULL, tp, m, tp, th); cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } } else { /* * Received initial SYN in SYN-SENT[*] state => * simultaneous open. * If it succeeds, connection is * half-synchronized. * Otherwise, do 3-way handshake: * SYN-SENT -> SYN-RECEIVED * SYN-SENT* -> SYN-RECEIVED* */ tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); tcp_timer_activate(tp, TT_REXMT, 0); tcp_state_change(tp, TCPS_SYN_RECEIVED); } KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: " "ti_locked %d", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); /* * Advance th->th_seq to correspond to first data byte. * If data, trim to stay within window, * dropping FIN if necessary. */ th->th_seq++; if (tlen > tp->rcv_wnd) { todrop = tlen - tp->rcv_wnd; m_adj(m, -todrop); tlen = tp->rcv_wnd; thflags &= ~TH_FIN; TCPSTAT_INC(tcps_rcvpackafterwin); TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop); } tp->snd_wl1 = th->th_seq - 1; tp->rcv_up = th->th_seq; /* * Client side of transaction: already sent SYN and data. * If the remote host used T/TCP to validate the SYN, * our data will be ACK'd; if so, enter normal data segment * processing in the middle of step 5, ack processing. * Otherwise, goto step 6. */ if (thflags & TH_ACK) goto process_ACK; goto step6; /* * If the state is LAST_ACK or CLOSING or TIME_WAIT: * do normal processing. * * NB: Leftover from RFC1644 T/TCP. Cases to be reused later. */ case TCPS_LAST_ACK: case TCPS_CLOSING: break; /* continue normal processing */ } /* * States other than LISTEN or SYN_SENT. * First check the RST flag and sequence number since reset segments * are exempt from the timestamp and connection count tests. This * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix * below which allowed reset segments in half the sequence space * to fall though and be processed (which gives forged reset * segments with a random sequence number a 50 percent chance of * killing a connection). * Then check timestamp, if present. * Then check the connection count, if present. * Then check that at least some bytes of segment are within * receive window. If segment begins before rcv_nxt, * drop leading data (and SYN); if nothing left, just ack. */ if (thflags & TH_RST) { /* * RFC5961 Section 3.2 * * - RST drops connection only if SEG.SEQ == RCV.NXT. * - If RST is in window, we send challenge ACK. * * Note: to take into account delayed ACKs, we should * test against last_ack_sent instead of rcv_nxt. * Note 2: we handle special case of closed window, not * covered by the RFC. */ if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) && SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); KASSERT(ti_locked == TI_RLOCKED, ("%s: TH_RST ti_locked %d, th %p tp %p", __func__, ti_locked, th, tp)); KASSERT(tp->t_state != TCPS_SYN_SENT, ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", __func__, th, tp)); if (V_tcp_insecure_rst || tp->last_ack_sent == th->th_seq) { TCPSTAT_INC(tcps_drops); /* Drop the connection. */ switch (tp->t_state) { case TCPS_SYN_RECEIVED: so->so_error = ECONNREFUSED; goto close; case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: case TCPS_FIN_WAIT_2: case TCPS_CLOSE_WAIT: case TCPS_CLOSING: case TCPS_LAST_ACK: so->so_error = ECONNRESET; close: /* FALLTHROUGH */ default: tp = tcp_close(tp); } } else { TCPSTAT_INC(tcps_badrst); /* Send challenge ACK. */ tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt, tp->snd_nxt, TH_ACK); tp->last_ack_sent = tp->rcv_nxt; m = NULL; } } goto drop; } /* * RFC5961 Section 4.2 * Send challenge ACK for any SYN in synchronized state. */ if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) { KASSERT(ti_locked == TI_RLOCKED, ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); TCPSTAT_INC(tcps_badsyn); if (V_tcp_insecure_syn && SEQ_GEQ(th->th_seq, tp->last_ack_sent) && SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) { tp = tcp_drop(tp, ECONNRESET); rstreason = BANDLIM_UNLIMITED; } else { /* Send challenge ACK. */ tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt, tp->snd_nxt, TH_ACK); tp->last_ack_sent = tp->rcv_nxt; m = NULL; } goto drop; } /* * RFC 1323 PAWS: If we have a timestamp reply on this segment * and it's less than ts_recent, drop it. */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { /* Check to see if ts_recent is over 24 days old. */ if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) { /* * Invalidate ts_recent. If this segment updates * ts_recent, the age will be reset later and ts_recent * will get a valid value. If it does not, setting * ts_recent to zero will at least satisfy the * requirement that zero be placed in the timestamp * echo reply when ts_recent isn't valid. The * age isn't reset until we get a valid ts_recent * because we don't want out-of-order segments to be * dropped when ts_recent is old. */ tp->ts_recent = 0; } else { TCPSTAT_INC(tcps_rcvduppack); TCPSTAT_ADD(tcps_rcvdupbyte, tlen); TCPSTAT_INC(tcps_pawsdrop); if (tlen) goto dropafterack; goto drop; } } /* * In the SYN-RECEIVED state, validate that the packet belongs to * this connection before trimming the data to fit the receive * window. Check the sequence number versus IRS since we know * the sequence numbers haven't wrapped. This is a partial fix * for the "LAND" DoS attack. */ if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) { rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } todrop = tp->rcv_nxt - th->th_seq; if (todrop > 0) { if (thflags & TH_SYN) { thflags &= ~TH_SYN; th->th_seq++; if (th->th_urp > 1) th->th_urp--; else thflags &= ~TH_URG; todrop--; } /* * Following if statement from Stevens, vol. 2, p. 960. */ if (todrop > tlen || (todrop == tlen && (thflags & TH_FIN) == 0)) { /* * Any valid FIN must be to the left of the window. * At this point the FIN must be a duplicate or out * of sequence; drop it. */ thflags &= ~TH_FIN; /* * Send an ACK to resynchronize and drop any data. * But keep on processing for RST or ACK. */ tp->t_flags |= TF_ACKNOW; todrop = tlen; TCPSTAT_INC(tcps_rcvduppack); TCPSTAT_ADD(tcps_rcvdupbyte, todrop); } else { TCPSTAT_INC(tcps_rcvpartduppack); TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop); } drop_hdrlen += todrop; /* drop from the top afterwards */ th->th_seq += todrop; tlen -= todrop; if (th->th_urp > todrop) th->th_urp -= todrop; else { thflags &= ~TH_URG; th->th_urp = 0; } } /* * If new data are received on a connection after the * user processes are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && tlen) { KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && " "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data " "after socket was closed, " "sending RST and removing tcpcb\n", s, __func__, tcpstates[tp->t_state], tlen); free(s, M_TCPLOG); } tp = tcp_close(tp); TCPSTAT_INC(tcps_rcvafterclose); rstreason = BANDLIM_UNLIMITED; goto dropwithreset; } /* * If segment ends after window, drop trailing data * (and PUSH and FIN); if nothing left, just ACK. */ todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd); if (todrop > 0) { TCPSTAT_INC(tcps_rcvpackafterwin); if (todrop >= tlen) { TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen); /* * If window is closed can only take segments at * window edge, and have to drop data and PUSH from * incoming segments. Continue processing, but * remember to ack. Otherwise, drop segment * and ack. */ if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) { tp->t_flags |= TF_ACKNOW; TCPSTAT_INC(tcps_rcvwinprobe); } else goto dropafterack; } else TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop); m_adj(m, -todrop); tlen -= todrop; thflags &= ~(TH_PUSH|TH_FIN); } /* * If last ACK falls within this segment's sequence numbers, * record its timestamp. * NOTE: * 1) That the test incorporates suggestions from the latest * proposal of the tcplw@cray.com list (Braden 1993/04/26). * 2) That updating only on newer timestamps interferes with * our earlier PAWS tests, so this check should be solely * predicated on the sequence space of this segment. * 3) That we modify the segment boundary check to be * Last.ACK.Sent <= SEG.SEQ + SEG.Len * instead of RFC1323's * Last.ACK.Sent < SEG.SEQ + SEG.Len, * This modified check allows us to overcome RFC1323's * limitations as described in Stevens TCP/IP Illustrated * Vol. 2 p.869. In such cases, we can still calculate the * RTT correctly when RCV.NXT == Last.ACK.Sent. */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN|TH_FIN)) != 0))) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN * flag is on (half-synchronized state), then queue data for * later processing; else drop segment and return. */ if ((thflags & TH_ACK) == 0) { if (tp->t_state == TCPS_SYN_RECEIVED || (tp->t_flags & TF_NEEDSYN)) goto step6; else if (tp->t_flags & TF_ACKNOW) goto dropafterack; else goto drop; } /* * Ack processing. */ switch (tp->t_state) { /* * In SYN_RECEIVED state, the ack ACKs our SYN, so enter * ESTABLISHED state and continue processing. * The ACK was checked above. */ case TCPS_SYN_RECEIVED: TCPSTAT_INC(tcps_connects); soisconnected(so); /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; tp->snd_wnd = tiwin; } /* * Make transitions: * SYN-RECEIVED -> ESTABLISHED * SYN-RECEIVED* -> FIN-WAIT-1 */ tp->t_starttime = ticks; if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; } else { tcp_state_change(tp, TCPS_ESTABLISHED); TCP_PROBE5(accept__established, NULL, tp, m, tp, th); cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } /* * If segment contains data or ACK, will call tcp_reass() * later; if not, do so now to pass queued data to user. */ if (tlen == 0 && (thflags & TH_FIN) == 0) (void) tcp_reass(tp, (struct tcphdr *)0, 0, (struct mbuf *)0); tp->snd_wl1 = th->th_seq - 1; /* FALLTHROUGH */ /* * In ESTABLISHED state: drop duplicate ACKs; ACK out of range * ACKs. If the ack is in the range * tp->snd_una < th->th_ack <= tp->snd_max * then advance tp->snd_una to th->th_ack and drop * data from the retransmission queue. If this ACK reflects * more up to date window information we update our window information. */ case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: case TCPS_FIN_WAIT_2: case TCPS_CLOSE_WAIT: case TCPS_CLOSING: case TCPS_LAST_ACK: if (SEQ_GT(th->th_ack, tp->snd_max)) { TCPSTAT_INC(tcps_rcvacktoomuch); goto dropafterack; } if ((tp->t_flags & TF_SACK_PERMIT) && ((to->to_flags & TOF_SACK) || !TAILQ_EMPTY(&tp->snd_holes))) tcp_sack_doack(tp, to, th->th_ack); else /* * Reset the value so that previous (valid) value * from the last ack with SACK doesn't get used. */ tp->sackhint.sacked_bytes = 0; #ifdef TCP_HHOOK /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ hhook_run_tcp_est_in(tp, th, to); #endif if (SEQ_LEQ(th->th_ack, tp->snd_una)) { if (tlen == 0 && tiwin == tp->snd_wnd) { /* * If this is the first time we've seen a * FIN from the remote, this is not a * duplicate and it needs to be processed * normally. This happens during a * simultaneous close. */ if ((thflags & TH_FIN) && (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { tp->t_dupacks = 0; break; } TCPSTAT_INC(tcps_rcvdupack); /* * If we have outstanding data (other than * a window probe), this is a completely * duplicate ack (ie, window info didn't * change and FIN isn't set), * the ack is the biggest we've * seen and we've seen exactly our rexmt * threshold of them, assume a packet * has been dropped and retransmit it. * Kludge snd_nxt & the congestion * window so we send only this one * packet. * * We know we're losing at the current * window size so do congestion avoidance * (set ssthresh to half the current window * and pull our congestion window back to * the new ssthresh). * * Dup acks mean that packets have left the * network (they're now cached at the receiver) * so bump cwnd by the amount in the receiver * to keep a constant cwnd packets in the * network. * * When using TCP ECN, notify the peer that * we reduced the cwnd. */ if (!tcp_timer_active(tp, TT_REXMT) || th->th_ack != tp->snd_una) tp->t_dupacks = 0; else if (++tp->t_dupacks > tcprexmtthresh || IN_FASTRECOVERY(tp->t_flags)) { cc_ack_received(tp, th, nsegs, CC_DUPACK); if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags)) { int awnd; /* * Compute the amount of data in flight first. * We can inject new data into the pipe iff * we have less than 1/2 the original window's * worth of data in flight. */ if (V_tcp_do_rfc6675_pipe) awnd = tcp_compute_pipe(tp); else awnd = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit; if (awnd < tp->snd_ssthresh) { tp->snd_cwnd += tp->t_maxseg; /* * RFC5681 Section 3.2 talks about cwnd * inflation on additional dupacks and * deflation on recovering from loss. * * We keep cwnd into check so that * we don't have to 'deflate' it when we * get out of recovery. */ if (tp->snd_cwnd > tp->snd_ssthresh) tp->snd_cwnd = tp->snd_ssthresh; } } else tp->snd_cwnd += tp->t_maxseg; (void) tp->t_fb->tfb_tcp_output(tp); goto drop; } else if (tp->t_dupacks == tcprexmtthresh) { tcp_seq onxt = tp->snd_nxt; /* * If we're doing sack, check to * see if we're already in sack * recovery. If we're not doing sack, * check to see if we're in newreno * recovery. */ if (tp->t_flags & TF_SACK_PERMIT) { if (IN_FASTRECOVERY(tp->t_flags)) { tp->t_dupacks = 0; break; } } else { if (SEQ_LEQ(th->th_ack, tp->snd_recover)) { tp->t_dupacks = 0; break; } } /* Congestion signal before ack. */ cc_cong_signal(tp, th, CC_NDUPACK); cc_ack_received(tp, th, nsegs, CC_DUPACK); tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; if (tp->t_flags & TF_SACK_PERMIT) { TCPSTAT_INC( tcps_sack_recovery_episode); tp->sack_newdata = tp->snd_nxt; if (CC_ALGO(tp)->cong_signal == NULL) tp->snd_cwnd = tp->t_maxseg; (void) tp->t_fb->tfb_tcp_output(tp); goto drop; } tp->snd_nxt = th->th_ack; if (CC_ALGO(tp)->cong_signal == NULL) tp->snd_cwnd = tp->t_maxseg; (void) tp->t_fb->tfb_tcp_output(tp); KASSERT(tp->snd_limited <= 2, ("%s: tp->snd_limited too big", __func__)); if (CC_ALGO(tp)->cong_signal == NULL) tp->snd_cwnd = tp->snd_ssthresh + tp->t_maxseg * (tp->t_dupacks - tp->snd_limited); if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; goto drop; } else if (V_tcp_do_rfc3042) { /* * Process first and second duplicate * ACKs. Each indicates a segment * leaving the network, creating room * for more. Make sure we can send a * packet on reception of each duplicate * ACK by increasing snd_cwnd by one * segment. Restore the original * snd_cwnd after packet transmission. */ cc_ack_received(tp, th, nsegs, CC_DUPACK); uint32_t oldcwnd = tp->snd_cwnd; tcp_seq oldsndmax = tp->snd_max; u_int sent; int avail; KASSERT(tp->t_dupacks == 1 || tp->t_dupacks == 2, ("%s: dupacks not 1 or 2", __func__)); if (tp->t_dupacks == 1) tp->snd_limited = 0; tp->snd_cwnd = (tp->snd_nxt - tp->snd_una) + (tp->t_dupacks - tp->snd_limited) * tp->t_maxseg; /* * Only call tcp_output when there * is new data available to be sent. * Otherwise we would send pure ACKs. */ SOCKBUF_LOCK(&so->so_snd); avail = sbavail(&so->so_snd) - (tp->snd_nxt - tp->snd_una); SOCKBUF_UNLOCK(&so->so_snd); if (avail > 0) (void) tp->t_fb->tfb_tcp_output(tp); sent = tp->snd_max - oldsndmax; if (sent > tp->t_maxseg) { KASSERT((tp->t_dupacks == 2 && tp->snd_limited == 0) || (sent == tp->t_maxseg + 1 && tp->t_flags & TF_SENTFIN), ("%s: sent too much", __func__)); tp->snd_limited = 2; } else if (sent > 0) ++tp->snd_limited; tp->snd_cwnd = oldcwnd; goto drop; } } else tp->t_dupacks = 0; break; } KASSERT(SEQ_GT(th->th_ack, tp->snd_una), ("%s: th_ack <= snd_una", __func__)); /* * If the congestion window was inflated to account * for the other side's cached packets, retract it. */ if (IN_FASTRECOVERY(tp->t_flags)) { if (SEQ_LT(th->th_ack, tp->snd_recover)) { if (tp->t_flags & TF_SACK_PERMIT) tcp_sack_partialack(tp, th); else tcp_newreno_partial_ack(tp, th); } else cc_post_recovery(tp, th); } tp->t_dupacks = 0; /* * If we reach this point, ACK is not a duplicate, * i.e., it ACKs something we sent. */ if (tp->t_flags & TF_NEEDSYN) { /* * T/TCP: Connection was half-synchronized, and our * SYN has been ACK'd (so connection is now fully * synchronized). Go to non-starred state, * increment snd_una for ACK of SYN, and check if * we can do window scaling. */ tp->t_flags &= ~TF_NEEDSYN; tp->snd_una++; /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; /* Send window already scaled. */ } } process_ACK: INP_WLOCK_ASSERT(tp->t_inpcb); acked = BYTES_THIS_ACK(tp, th); TCPSTAT_INC(tcps_rcvackpack); TCPSTAT_ADD(tcps_rcvackbyte, acked); /* * If we just performed our first retransmit, and the ACK * arrives within our recovery window, then it was a mistake * to do the retransmit in the first place. Recover our * original cwnd and ssthresh, and proceed to transmit where * we left off. */ if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && (int)(ticks - tp->t_badrxtwin) < 0) cc_cong_signal(tp, th, CC_RTO_ERR); /* * If we have a timestamp reply, update smoothed * round trip time. If no timestamp is present but * transmit timer is running and timed sequence * number was acked, update smoothed round trip time. * Since we now have an rtt measurement, cancel the * timer backoff (cf., Phil Karn's retransmit alg.). * Recompute the initial retransmit timer. * * Some boxes send broken timestamp replies * during the SYN+ACK phase, ignore * timestamps of 0 or we could calculate a * huge RTT and blow up the retransmit timer. */ if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) { uint32_t t; t = tcp_ts_getticks() - to->to_tsecr; if (!tp->t_rttlow || tp->t_rttlow > t) tp->t_rttlow = t; tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1); } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) tp->t_rttlow = ticks - tp->t_rtttime; tcp_xmit_timer(tp, ticks - tp->t_rtttime); } /* * If all outstanding data is acked, stop retransmit * timer and remember to restart (more output or persist). * If there is more data to be acked, restart retransmit * timer, using current (possibly backed-off) value. */ if (th->th_ack == tp->snd_max) { tcp_timer_activate(tp, TT_REXMT, 0); needoutput = 1; } else if (!tcp_timer_active(tp, TT_PERSIST)) tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); /* * If no data (only SYN) was ACK'd, * skip rest of ACK processing. */ if (acked == 0) goto step6; /* * Let the congestion control algorithm update congestion * control related information. This typically means increasing * the congestion window. */ cc_ack_received(tp, th, nsegs, CC_ACK); SOCKBUF_LOCK(&so->so_snd); if (acked > sbavail(&so->so_snd)) { tp->snd_wnd -= sbavail(&so->so_snd); mfree = sbcut_locked(&so->so_snd, (int)sbavail(&so->so_snd)); ourfinisacked = 1; } else { mfree = sbcut_locked(&so->so_snd, acked); tp->snd_wnd -= acked; ourfinisacked = 0; } /* NB: sowwakeup_locked() does an implicit unlock. */ sowwakeup_locked(so); m_freem(mfree); /* Detect una wraparound. */ if (!IN_RECOVERY(tp->t_flags) && SEQ_GT(tp->snd_una, tp->snd_recover) && SEQ_LEQ(th->th_ack, tp->snd_recover)) tp->snd_recover = th->th_ack - 1; /* XXXLAS: Can this be moved up into cc_post_recovery? */ if (IN_RECOVERY(tp->t_flags) && SEQ_GEQ(th->th_ack, tp->snd_recover)) { EXIT_RECOVERY(tp->t_flags); } tp->snd_una = th->th_ack; if (tp->t_flags & TF_SACK_PERMIT) { if (SEQ_GT(tp->snd_una, tp->snd_recover)) tp->snd_recover = tp->snd_una; } if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; switch (tp->t_state) { /* * In FIN_WAIT_1 STATE in addition to the processing * for the ESTABLISHED state if our FIN is now acknowledged * then enter FIN_WAIT_2. */ case TCPS_FIN_WAIT_1: if (ourfinisacked) { /* * If we can't receive any more * data, then closing user can proceed. * Starting the timer is contrary to the * specification, but if we don't get a FIN * we'll hang forever. * * XXXjl: * we should release the tp also, and use a * compressed state. */ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { soisdisconnected(so); tcp_timer_activate(tp, TT_2MSL, (tcp_fast_finwait2_recycle ? tcp_finwait2_timeout : TP_MAXIDLE(tp))); } tcp_state_change(tp, TCPS_FIN_WAIT_2); } break; /* * In CLOSING STATE in addition to the processing for * the ESTABLISHED state if the ACK acknowledges our FIN * then enter the TIME-WAIT state, otherwise ignore * the segment. */ case TCPS_CLOSING: if (ourfinisacked) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); INP_INFO_RUNLOCK(&V_tcbinfo); m_freem(m); return; } break; /* * In LAST_ACK, we may still be waiting for data to drain * and/or to be acked, as well as for the ack of our FIN. * If our FIN is now acknowledged, delete the TCB, * enter the closed state and return. */ case TCPS_LAST_ACK: if (ourfinisacked) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tp = tcp_close(tp); goto drop; } break; } } step6: INP_WLOCK_ASSERT(tp->t_inpcb); /* * Update window information. * Don't look at window if no ACK: TAC's send garbage on first SYN. */ if ((thflags & TH_ACK) && (SEQ_LT(tp->snd_wl1, th->th_seq) || (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) || (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) { /* keep track of pure window updates */ if (tlen == 0 && tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) TCPSTAT_INC(tcps_rcvwinupd); tp->snd_wnd = tiwin; tp->snd_wl1 = th->th_seq; tp->snd_wl2 = th->th_ack; if (tp->snd_wnd > tp->max_sndwnd) tp->max_sndwnd = tp->snd_wnd; needoutput = 1; } /* * Process segments with URG. */ if ((thflags & TH_URG) && th->th_urp && TCPS_HAVERCVDFIN(tp->t_state) == 0) { /* * This is a kludge, but if we receive and accept * random urgent pointers, we'll crash in * soreceive. It's hard to imagine someone * actually wanting to send this much urgent data. */ SOCKBUF_LOCK(&so->so_rcv); if (th->th_urp + sbavail(&so->so_rcv) > sb_max) { th->th_urp = 0; /* XXX */ thflags &= ~TH_URG; /* XXX */ SOCKBUF_UNLOCK(&so->so_rcv); /* XXX */ goto dodata; /* XXX */ } /* * If this segment advances the known urgent pointer, * then mark the data stream. This should not happen * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since * a FIN has been received from the remote side. * In these states we ignore the URG. * * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section as the original * spec states (in one of two places). */ if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { tp->rcv_up = th->th_seq + th->th_urp; so->so_oobmark = sbavail(&so->so_rcv) + (tp->rcv_up - tp->rcv_nxt) - 1; if (so->so_oobmark == 0) so->so_rcv.sb_state |= SBS_RCVATMARK; sohasoutofband(so); tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); } SOCKBUF_UNLOCK(&so->so_rcv); /* * Remove out of band data so doesn't get presented to user. * This can happen independent of advancing the URG pointer, * but if two URG's are pending at once, some out-of-band * data may creep in... ick. */ if (th->th_urp <= (uint32_t)tlen && !(so->so_options & SO_OOBINLINE)) { /* hdr drop is delayed */ tcp_pulloutofband(so, th, m, drop_hdrlen); } } else { /* * If no out of band data is expected, * pull receive urgent pointer along * with the receive window. */ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) tp->rcv_up = tp->rcv_nxt; } dodata: /* XXX */ INP_WLOCK_ASSERT(tp->t_inpcb); /* * Process the segment text, merging it into the TCP sequencing queue, * and arranging for acknowledgment of receipt if necessary. * This process logically involves adjusting tp->rcv_wnd as data * is presented to the user (this happens in tcp_usrreq.c, * case PRU_RCVD). If a FIN has already been received on this * connection then we just ignore the text. */ if ((tlen || (thflags & TH_FIN)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { tcp_seq save_start = th->th_seq; m_adj(m, drop_hdrlen); /* delayed header drop */ /* * Insert segment which includes th into TCP reassembly queue * with control block tp. Set thflags to whether reassembly now * includes a segment with FIN. This handles the common case * inline (segment is the next to be received on an established * connection, and the queue is empty), avoiding linkage into * and removal from the queue and repetition of various * conversions. * Set DELACK for segments received in order, but ack * immediately when segments are out of order (so * fast retransmit can work). */ if (th->th_seq == tp->rcv_nxt && LIST_EMPTY(&tp->t_segq) && TCPS_HAVEESTABLISHED(tp->t_state)) { if (DELAY_ACK(tp, tlen)) tp->t_flags |= TF_DELACK; else tp->t_flags |= TF_ACKNOW; tp->rcv_nxt += tlen; thflags = th->th_flags & TH_FIN; TCPSTAT_INC(tcps_rcvpack); TCPSTAT_ADD(tcps_rcvbyte, tlen); SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) m_freem(m); else sbappendstream_locked(&so->so_rcv, m, 0); /* NB: sorwakeup_locked() does an implicit unlock. */ sorwakeup_locked(so); } else { /* * XXX: Due to the header drop above "th" is * theoretically invalid by now. Fortunately * m_adj() doesn't actually frees any mbufs * when trimming from the head. */ thflags = tcp_reass(tp, th, &tlen, m); tp->t_flags |= TF_ACKNOW; } if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT)) tcp_update_sack_list(tp, save_start, save_start + tlen); #if 0 /* * Note the amount of data that peer has sent into * our window, in order to estimate the sender's * buffer size. * XXX: Unused. */ if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); else len = so->so_rcv.sb_hiwat; #endif } else { m_freem(m); thflags &= ~TH_FIN; } /* * If FIN is received ACK the FIN and let the user know * that the connection is closing. */ if (thflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { socantrcvmore(so); /* * If connection is half-synchronized * (ie NEEDSYN flag on) then delay ACK, * so it may be piggybacked when SYN is sent. * Otherwise, since we received a FIN then no * more input can be expected, send ACK now. */ if (tp->t_flags & TF_NEEDSYN) tp->t_flags |= TF_DELACK; else tp->t_flags |= TF_ACKNOW; tp->rcv_nxt++; } switch (tp->t_state) { /* * In SYN_RECEIVED and ESTABLISHED STATES * enter the CLOSE_WAIT state. */ case TCPS_SYN_RECEIVED: tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: tcp_state_change(tp, TCPS_CLOSE_WAIT); break; /* * If still in FIN_WAIT_1 STATE FIN has not been acked so * enter the CLOSING state. */ case TCPS_FIN_WAIT_1: tcp_state_change(tp, TCPS_CLOSING); break; /* * In FIN_WAIT_2 state enter the TIME_WAIT state, * starting the time-wait timer, turning off the other * standard timers. */ case TCPS_FIN_WAIT_2: INP_INFO_RLOCK_ASSERT(&V_tcbinfo); KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata " "TCP_FIN_WAIT_2 ti_locked: %d", __func__, ti_locked)); tcp_twstart(tp); INP_INFO_RUNLOCK(&V_tcbinfo); return; } } if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); } ti_locked = TI_UNLOCKED; #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); /* * Return any desired output. */ if (needoutput || (tp->t_flags & TF_ACKNOW)) (void) tp->t_fb->tfb_tcp_output(tp); KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { tp->t_flags &= ~TF_DELACK; tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); } INP_WUNLOCK(tp->t_inpcb); return; dropafterack: /* * Generate an ACK dropping incoming segment if it occupies * sequence space, where the ACK reflects our state. * * We can now skip the test for the RST flag since all * paths to this code happen after packets containing * RST have been dropped. * * In the SYN-RECEIVED state, don't send an ACK unless the * segment we received passes the SYN-RECEIVED ACK test. * If it fails send a RST. This breaks the loop in the * "LAND" DoS attack, and also prevents an ACK storm * between two listening ports that have been sent forged * SYN segments, each with the source address of the other. */ if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) && (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max)) ) { rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__drop, tp, th, m); if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); } ti_locked = TI_UNLOCKED; tp->t_flags |= TF_ACKNOW; (void) tp->t_fb->tfb_tcp_output(tp); INP_WUNLOCK(tp->t_inpcb); m_freem(m); return; dropwithreset: if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); } ti_locked = TI_UNLOCKED; if (tp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); INP_WUNLOCK(tp->t_inpcb); } else tcp_dropwithreset(m, th, NULL, tlen, rstreason); return; drop: if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS else INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); #endif /* * Drop space held by incoming segment and return. */ #ifdef TCPDEBUG if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__drop, tp, th, m); if (tp != NULL) INP_WUNLOCK(tp->t_inpcb); m_freem(m); } /* * Do fast slow is a combination of the original * tcp_dosegment and a split fastpath, one function * for the fast-ack which also includes allowing fastpath * for window advanced in sequence acks. And also a * sub-function that handles the insequence data. */ void tcp_do_segment_fastslow(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, int ti_locked) { int thflags; uint32_t tiwin; char *s; uint16_t nsegs; int can_enter; struct in_conninfo *inc; struct tcpopt to; thflags = th->th_flags; tp->sackhint.last_sack_ack = 0; inc = &tp->t_inpcb->inp_inc; nsegs = max(1, m->m_pkthdr.lro_nsegs); /* * If this is either a state-changing packet or current state isn't * established, we require a write lock on tcbinfo. Otherwise, we * allow the tcbinfo to be in either alocked or unlocked, as the * caller may have unnecessarily acquired a write lock due to a race. */ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || tp->t_state != TCPS_ESTABLISHED) { KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " "SYN/FIN/RST/!EST", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { #ifdef INVARIANTS if (ti_locked == TI_RLOCKED) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " "ti_locked: %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } #endif } INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", __func__)); KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", __func__)); /* * Segment received on connection. * Reset idle time and keep-alive timer. * XXX: This should be done after segment * validation to ignore broken/spoofed segs. */ tp->t_rcvtime = ticks; /* * Unscale the window into a 32-bit value. * For the SYN_SENT state the scale is zero. */ tiwin = th->th_win << tp->snd_scale; /* * TCP ECN processing. */ if (tp->t_flags & TF_ECN_PERMIT) { if (thflags & TH_CWR) tp->t_flags &= ~TF_ECN_SND_ECE; switch (iptos & IPTOS_ECN_MASK) { case IPTOS_ECN_CE: tp->t_flags |= TF_ECN_SND_ECE; TCPSTAT_INC(tcps_ecn_ce); break; case IPTOS_ECN_ECT0: TCPSTAT_INC(tcps_ecn_ect0); break; case IPTOS_ECN_ECT1: TCPSTAT_INC(tcps_ecn_ect1); break; } /* Congestion experienced. */ if (thflags & TH_ECE) { cc_cong_signal(tp, th, CC_ECN); } } /* * Parse options on any incoming segment. */ tcp_dooptions(&to, (u_char *)(th + 1), (th->th_off << 2) - sizeof(struct tcphdr), (thflags & TH_SYN) ? TO_SYN : 0); /* * If echoed timestamp is later than the current time, * fall back to non RFC1323 RTT calculation. Normalize * timestamp if syncookies were used when this connection * was established. */ if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) { to.to_tsecr -= tp->ts_offset; if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks())) to.to_tsecr = 0; } /* * If timestamps were negotiated during SYN/ACK they should * appear on every segment during this session and vice versa. */ if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Timestamp missing, " "no action\n", s, __func__); free(s, M_TCPLOG); } } if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Timestamp not expected, " "no action\n", s, __func__); free(s, M_TCPLOG); } } /* * Process options only when we get SYN/ACK back. The SYN case * for incoming connections is handled in tcp_syncache. * According to RFC1323 the window field in a SYN (i.e., a * or ) segment itself is never scaled. * XXX this is traditional behavior, may need to be cleaned up. */ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; tp->snd_scale = to.to_wscale; } /* * Initial send window. It will be updated with * the next incoming segment to the scaled value. */ tp->snd_wnd = th->th_win; if (to.to_flags & TOF_TS) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; tp->ts_recent_age = tcp_ts_getticks(); } if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss); if ((tp->t_flags & TF_SACK_PERMIT) && (to.to_flags & TOF_SACKPERM) == 0) tp->t_flags &= ~TF_SACK_PERMIT; } can_enter = 0; if (__predict_true((tlen == 0))) { /* * The ack moved forward and we have a window (non-zero) * * The ack did not move forward, but the window increased. */ if (__predict_true((SEQ_GT(th->th_ack, tp->snd_una) && tiwin) || ((th->th_ack == tp->snd_una) && tiwin && (tiwin > tp->snd_wnd)))) { can_enter = 1; } } else { /* * Data incoming, use the old entry criteria * for fast-path with data. */ if ((tiwin && tiwin == tp->snd_wnd)) { can_enter = 1; } } /* * Header prediction: check for the two common cases * of a uni-directional data xfer. If the packet has * no control flags, is in-sequence, the window didn't * change and we're not retransmitting, it's a * candidate. If the length is zero and the ack moved * forward, we're the sender side of the xfer. Just * free the data acked & wake any higher level process * that was blocked waiting for space. If the length * is non-zero and the ack didn't move, we're the * receiver side. If we're getting packets in-order * (the reassembly queue is empty), add the data to * the socket buffer and note that we need a delayed ack. * Make sure that the hidden state-flags are also off. * Since we check for TCPS_ESTABLISHED first, it can only * be TH_NEEDSYN. */ if (__predict_true(tp->t_state == TCPS_ESTABLISHED && th->th_seq == tp->rcv_nxt && (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && tp->snd_nxt == tp->snd_max && can_enter && ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && LIST_EMPTY(&tp->t_segq) && ((to.to_flags & TOF_TS) == 0 || TSTMP_GEQ(to.to_tsval, tp->ts_recent)))) { if (__predict_true((tlen == 0) && (SEQ_LEQ(th->th_ack, tp->snd_max) && !IN_RECOVERY(tp->t_flags) && (to.to_flags & TOF_SACK) == 0 && TAILQ_EMPTY(&tp->snd_holes)))) { /* We are done */ tcp_do_fastack(m, th, so, tp, &to, drop_hdrlen, tlen, ti_locked, tiwin); return; } else if ((tlen) && (th->th_ack == tp->snd_una && tlen <= sbspace(&so->so_rcv))) { tcp_do_fastnewdata(m, th, so, tp, &to, drop_hdrlen, tlen, ti_locked, tiwin); /* We are done */ return; } } tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen, ti_locked, tiwin, thflags); } /* * This subfunction is used to try to highly optimize the * fast path. We again allow window updates that are * in sequence to remain in the fast-path. We also add * in the __predict's to attempt to help the compiler. * Note that if we return a 0, then we can *not* process * it and the caller should push the packet into the * slow-path. */ static int tcp_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, int ti_locked, uint32_t tiwin) { int acked; uint16_t nsegs; int winup_only=0; nsegs = max(1, m->m_pkthdr.lro_nsegs); #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, * now IPv6. */ u_char tcp_saveipgen[IP6_HDR_LEN]; struct tcphdr tcp_savetcp; short ostate = 0; #endif if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) { /* Old ack, behind (or duplicate to) the last one rcv'd */ return (0); } if (__predict_false(th->th_ack == tp->snd_una) && __predict_false(tiwin <= tp->snd_wnd)) { /* duplicate ack a shrinking dup ack with shrinking window */ return (0); } if (__predict_false(tiwin == 0)) { /* zero window */ return (0); } if (__predict_false(SEQ_GT(th->th_ack, tp->snd_max))) { /* Above what we have sent? */ return (0); } if (__predict_false(tp->snd_nxt != tp->snd_max)) { /* We are retransmitting */ return (0); } if (__predict_false(tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN))) { /* We need a SYN or a FIN, unlikely.. */ return (0); } if((to->to_flags & TOF_TS) && __predict_false(TSTMP_LT(to->to_tsval, tp->ts_recent))) { /* Timestamp is behind .. old ack with seq wrap? */ return (0); } if (__predict_false(IN_RECOVERY(tp->t_flags))) { /* Still recovering */ return (0); } if (__predict_false(to->to_flags & TOF_SACK)) { /* Sack included in the ack.. */ return (0); } if (!TAILQ_EMPTY(&tp->snd_holes)) { /* We have sack holes on our scoreboard */ return (0); } /* Ok if we reach here, we can process a fast-ack */ /* Did the window get updated? */ if (tiwin != tp->snd_wnd) { /* keep track of pure window updates */ if (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) { winup_only = 1; TCPSTAT_INC(tcps_rcvwinupd); } tp->snd_wnd = tiwin; tp->snd_wl1 = th->th_seq; if (tp->snd_wnd > tp->max_sndwnd) tp->max_sndwnd = tp->snd_wnd; } /* * Pull snd_wl2 up to prevent seq wrap relative * to th_ack. */ tp->snd_wl2 = th->th_ack; /* * If last ACK falls within this segment's sequence numbers, * record the timestamp. * NOTE that the test is modified according to the latest * proposal of the tcplw@cray.com list (Braden 1993/04/26). */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * This is a pure ack for outstanding data. */ if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); } ti_locked = TI_UNLOCKED; TCPSTAT_INC(tcps_predack); /* * "bad retransmit" recovery. */ if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && (int)(ticks - tp->t_badrxtwin) < 0) { cc_cong_signal(tp, th, CC_RTO_ERR); } /* * Recalculate the transmit timer / rtt. * * Some boxes send broken timestamp replies * during the SYN+ACK phase, ignore * timestamps of 0 or we could calculate a * huge RTT and blow up the retransmit timer. */ if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) { uint32_t t; t = tcp_ts_getticks() - to->to_tsecr; if (!tp->t_rttlow || tp->t_rttlow > t) tp->t_rttlow = t; tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1); } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) tp->t_rttlow = ticks - tp->t_rtttime; tcp_xmit_timer(tp, ticks - tp->t_rtttime); } if (winup_only == 0) { acked = BYTES_THIS_ACK(tp, th); #ifdef TCP_HHOOK /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ hhook_run_tcp_est_in(tp, th, to); #endif TCPSTAT_ADD(tcps_rcvackbyte, acked); sbdrop(&so->so_snd, acked); if (SEQ_GT(tp->snd_una, tp->snd_recover) && SEQ_LEQ(th->th_ack, tp->snd_recover)) tp->snd_recover = th->th_ack - 1; /* * Let the congestion control algorithm update * congestion control related information. This * typically means increasing the congestion * window. */ cc_ack_received(tp, th, nsegs, CC_ACK); tp->snd_una = th->th_ack; tp->t_dupacks = 0; /* * If all outstanding data are acked, stop * retransmit timer, otherwise restart timer * using current (possibly backed-off) value. * If process is waiting for space, * wakeup/selwakeup/signal. If data * are ready to send, let tcp_output * decide between more output or persist. */ #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); m_freem(m); if (tp->snd_una == tp->snd_max) tcp_timer_activate(tp, TT_REXMT, 0); else if (!tcp_timer_active(tp, TT_PERSIST)) tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); /* Wake up the socket if we have room to write more */ sowwakeup(so); } else { /* * Window update only, just free the mbufs and * send out whatever we can. */ m_freem(m); } if (sbavail(&so->so_snd)) (void) tcp_output(tp); KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { tp->t_flags &= ~TF_DELACK; tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); } INP_WUNLOCK(tp->t_inpcb); return (1); } /* * This tcp-do-segment concentrates on making the fastest * ack processing path. It does not have a fast-path for * data (it possibly could which would then eliminate the * need for fast-slow above). For a content distributor having * large outgoing elephants and very very little coming in * having no fastpath for data does not really help (since you * don't get much data in). The most important thing is * processing ack's quickly and getting the rest of the data * output to the peer as quickly as possible. This routine * seems to be about an overall 3% faster then the old * tcp_do_segment and keeps us in the fast-path for packets * much more (by allowing window updates to also stay in the fastpath). */ void tcp_do_segment_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, int ti_locked) { int thflags; uint32_t tiwin; char *s; struct in_conninfo *inc; struct tcpopt to; thflags = th->th_flags; tp->sackhint.last_sack_ack = 0; inc = &tp->t_inpcb->inp_inc; /* * If this is either a state-changing packet or current state isn't * established, we require a write lock on tcbinfo. Otherwise, we * allow the tcbinfo to be in either alocked or unlocked, as the * caller may have unnecessarily acquired a write lock due to a race. */ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || tp->t_state != TCPS_ESTABLISHED) { KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " "SYN/FIN/RST/!EST", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { #ifdef INVARIANTS if (ti_locked == TI_RLOCKED) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " "ti_locked: %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } #endif } INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", __func__)); KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", __func__)); /* * Segment received on connection. * Reset idle time and keep-alive timer. * XXX: This should be done after segment * validation to ignore broken/spoofed segs. */ tp->t_rcvtime = ticks; /* * Unscale the window into a 32-bit value. * For the SYN_SENT state the scale is zero. */ tiwin = th->th_win << tp->snd_scale; /* * TCP ECN processing. */ if (tp->t_flags & TF_ECN_PERMIT) { if (thflags & TH_CWR) tp->t_flags &= ~TF_ECN_SND_ECE; switch (iptos & IPTOS_ECN_MASK) { case IPTOS_ECN_CE: tp->t_flags |= TF_ECN_SND_ECE; TCPSTAT_INC(tcps_ecn_ce); break; case IPTOS_ECN_ECT0: TCPSTAT_INC(tcps_ecn_ect0); break; case IPTOS_ECN_ECT1: TCPSTAT_INC(tcps_ecn_ect1); break; } /* Congestion experienced. */ if (thflags & TH_ECE) { cc_cong_signal(tp, th, CC_ECN); } } /* * Parse options on any incoming segment. */ tcp_dooptions(&to, (u_char *)(th + 1), (th->th_off << 2) - sizeof(struct tcphdr), (thflags & TH_SYN) ? TO_SYN : 0); /* * If echoed timestamp is later than the current time, * fall back to non RFC1323 RTT calculation. Normalize * timestamp if syncookies were used when this connection * was established. */ if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) { to.to_tsecr -= tp->ts_offset; if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks())) to.to_tsecr = 0; } /* * If timestamps were negotiated during SYN/ACK they should * appear on every segment during this session and vice versa. */ if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Timestamp missing, " "no action\n", s, __func__); free(s, M_TCPLOG); } } if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Timestamp not expected, " "no action\n", s, __func__); free(s, M_TCPLOG); } } /* * Process options only when we get SYN/ACK back. The SYN case * for incoming connections is handled in tcp_syncache. * According to RFC1323 the window field in a SYN (i.e., a * or ) segment itself is never scaled. * XXX this is traditional behavior, may need to be cleaned up. */ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; tp->snd_scale = to.to_wscale; } /* * Initial send window. It will be updated with * the next incoming segment to the scaled value. */ tp->snd_wnd = th->th_win; if (to.to_flags & TOF_TS) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; tp->ts_recent_age = tcp_ts_getticks(); } if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss); if ((tp->t_flags & TF_SACK_PERMIT) && (to.to_flags & TOF_SACKPERM) == 0) tp->t_flags &= ~TF_SACK_PERMIT; } /* * Header prediction: check for the two common cases * of a uni-directional data xfer. If the packet has * no control flags, is in-sequence, the window didn't * change and we're not retransmitting, it's a * candidate. If the length is zero and the ack moved * forward, we're the sender side of the xfer. Just * free the data acked & wake any higher level process * that was blocked waiting for space. If the length * is non-zero and the ack didn't move, we're the * receiver side. If we're getting packets in-order * (the reassembly queue is empty), add the data to * the socket buffer and note that we need a delayed ack. * Make sure that the hidden state-flags are also off. * Since we check for TCPS_ESTABLISHED first, it can only * be TH_NEEDSYN. */ if (__predict_true(tp->t_state == TCPS_ESTABLISHED) && __predict_true(((to.to_flags & TOF_SACK) == 0)) && __predict_true(tlen == 0) && __predict_true((thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK) && __predict_true(LIST_EMPTY(&tp->t_segq)) && __predict_true(th->th_seq == tp->rcv_nxt)) { if (tcp_fastack(m, th, so, tp, &to, drop_hdrlen, tlen, ti_locked, tiwin)) { return; } } tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen, ti_locked, tiwin, thflags); } struct tcp_function_block __tcp_fastslow = { .tfb_tcp_block_name = "fastslow", .tfb_tcp_output = tcp_output, .tfb_tcp_do_segment = tcp_do_segment_fastslow, .tfb_tcp_ctloutput = tcp_default_ctloutput, }; struct tcp_function_block __tcp_fastack = { .tfb_tcp_block_name = "fastack", .tfb_tcp_output = tcp_output, .tfb_tcp_do_segment = tcp_do_segment_fastack, .tfb_tcp_ctloutput = tcp_default_ctloutput }; static int tcp_addfastpaths(module_t mod, int type, void *data) { int err=0; switch (type) { case MOD_LOAD: err = register_tcp_functions(&__tcp_fastack, M_WAITOK); if (err) { printf("Failed to register fastack module -- err:%d\n", err); return(err); } err = register_tcp_functions(&__tcp_fastslow, M_WAITOK); if (err) { printf("Failed to register fastslow module -- err:%d\n", err); deregister_tcp_functions(&__tcp_fastack); return(err); } break; case MOD_QUIESCE: if ((__tcp_fastslow.tfb_refcnt) ||( __tcp_fastack.tfb_refcnt)) { return(EBUSY); } break; case MOD_UNLOAD: err = deregister_tcp_functions(&__tcp_fastack); if (err == EBUSY) break; err = deregister_tcp_functions(&__tcp_fastslow); if (err == EBUSY) break; err = 0; break; default: return (EOPNOTSUPP); } return (err); } static moduledata_t new_tcp_fastpaths = { .name = "tcp_fastpaths", .evhand = tcp_addfastpaths, .priv = 0 }; MODULE_VERSION(kern_tcpfastpaths, 1); DECLARE_MODULE(kern_tcpfastpaths, new_tcp_fastpaths, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 5e86ed537190..eb5f189c26ec 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1,3116 +1,2767 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #include #include #ifdef TCP_HHOOK #include #endif #include #ifdef TCP_HHOOK #include #endif #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #include #include #include #include #endif #ifdef TCP_RFC7413 #include #endif #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef TCPPCAP #include #endif #ifdef TCPDEBUG #include #endif #ifdef INET6 #include #endif #ifdef TCP_OFFLOAD #include #endif -#ifdef IPSEC -#include -#include -#ifdef INET6 -#include -#endif -#include -#include -#endif /*IPSEC*/ +#include #include #include #include VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS; #ifdef INET6 VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS; #endif struct rwlock tcp_function_lock; static int sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS) { int error, new; new = V_tcp_mssdflt; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { if (new < TCP_MINMSS) error = EINVAL; else V_tcp_mssdflt = new; } return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_mssdflt), 0, &sysctl_net_inet_tcp_mss_check, "I", "Default TCP Maximum Segment Size"); #ifdef INET6 static int sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS) { int error, new; new = V_tcp_v6mssdflt; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { if (new < TCP_MINMSS) error = EINVAL; else V_tcp_v6mssdflt = new; } return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0, &sysctl_net_inet_tcp_mss_v6_check, "I", "Default TCP Maximum Segment Size for IPv6"); #endif /* INET6 */ /* * Minimum MSS we accept and use. This prevents DoS attacks where * we are forced to a ridiculous low MSS like 20 and send hundreds * of packets instead of one. The effect scales with the available * bandwidth and quickly saturates the CPU and network interface * with packet generation and sending. Set to zero to disable MINMSS * checking. This setting prevents us from sending too small packets. */ VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS; SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_minmss), 0, "Minimum TCP Maximum Segment Size"); VNET_DEFINE(int, tcp_do_rfc1323) = 1; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc1323), 0, "Enable rfc1323 (high performance TCP) extensions"); static int tcp_log_debug = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW, &tcp_log_debug, 0, "Log errors caused by incoming TCP segments"); static int tcp_tcbhashsize; SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); static int do_tcpdrain = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, "Enable tcp_drain routine for extra help when low on mbufs"); SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs"); static VNET_DEFINE(int, icmp_may_rst) = 1; #define V_icmp_may_rst VNET(icmp_may_rst) SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp_may_rst), 0, "Certain ICMP unreachable messages may abort connections in SYN_SENT"); static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0; #define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval) SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_isn_reseed_interval), 0, "Seconds between reseeding of ISN secret"); static int tcp_soreceive_stream; SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN, &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets"); -#ifdef TCP_SIGNATURE -static int tcp_sig_checksigs = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, signature_verify_input, CTLFLAG_RW, - &tcp_sig_checksigs, 0, "Verify RFC2385 digests on inbound traffic"); -#endif - VNET_DEFINE(uma_zone_t, sack_hole_zone); #define V_sack_hole_zone VNET(sack_hole_zone) #ifdef TCP_HHOOK VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]); #endif static struct inpcb *tcp_notify(struct inpcb *, int); static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int); static void tcp_mtudisc(struct inpcb *, int); static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr); static struct tcp_function_block tcp_def_funcblk = { "default", tcp_output, tcp_do_segment, tcp_default_ctloutput, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0 }; int t_functions_inited = 0; struct tcp_funchead t_functions; static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk; static void init_tcp_functions(void) { if (t_functions_inited == 0) { TAILQ_INIT(&t_functions); rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0); t_functions_inited = 1; } } static struct tcp_function_block * find_tcp_functions_locked(struct tcp_function_set *fs) { struct tcp_function *f; struct tcp_function_block *blk=NULL; TAILQ_FOREACH(f, &t_functions, tf_next) { if (strcmp(f->tf_fb->tfb_tcp_block_name, fs->function_set_name) == 0) { blk = f->tf_fb; break; } } return(blk); } static struct tcp_function_block * find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s) { struct tcp_function_block *rblk=NULL; struct tcp_function *f; TAILQ_FOREACH(f, &t_functions, tf_next) { if (f->tf_fb == blk) { rblk = blk; if (s) { *s = f; } break; } } return (rblk); } struct tcp_function_block * find_and_ref_tcp_functions(struct tcp_function_set *fs) { struct tcp_function_block *blk; rw_rlock(&tcp_function_lock); blk = find_tcp_functions_locked(fs); if (blk) refcount_acquire(&blk->tfb_refcnt); rw_runlock(&tcp_function_lock); return(blk); } struct tcp_function_block * find_and_ref_tcp_fb(struct tcp_function_block *blk) { struct tcp_function_block *rblk; rw_rlock(&tcp_function_lock); rblk = find_tcp_fb_locked(blk, NULL); if (rblk) refcount_acquire(&rblk->tfb_refcnt); rw_runlock(&tcp_function_lock); return(rblk); } static int sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS) { int error=ENOENT; struct tcp_function_set fs; struct tcp_function_block *blk; memset(&fs, 0, sizeof(fs)); rw_rlock(&tcp_function_lock); blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL); if (blk) { /* Found him */ strcpy(fs.function_set_name, blk->tfb_tcp_block_name); fs.pcbcnt = blk->tfb_refcnt; } rw_runlock(&tcp_function_lock); error = sysctl_handle_string(oidp, fs.function_set_name, sizeof(fs.function_set_name), req); /* Check for error or no change */ if (error != 0 || req->newptr == NULL) return(error); rw_wlock(&tcp_function_lock); blk = find_tcp_functions_locked(&fs); if ((blk == NULL) || (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) { error = ENOENT; goto done; } tcp_func_set_ptr = blk; done: rw_wunlock(&tcp_function_lock); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default, CTLTYPE_STRING | CTLFLAG_RW, NULL, 0, sysctl_net_inet_default_tcp_functions, "A", "Set/get the default TCP functions"); static int sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS) { int error, cnt, linesz; struct tcp_function *f; char *buffer, *cp; size_t bufsz, outsz; cnt = 0; rw_rlock(&tcp_function_lock); TAILQ_FOREACH(f, &t_functions, tf_next) { cnt++; } rw_runlock(&tcp_function_lock); bufsz = (cnt+2) * (TCP_FUNCTION_NAME_LEN_MAX + 12) + 1; buffer = malloc(bufsz, M_TEMP, M_WAITOK); error = 0; cp = buffer; linesz = snprintf(cp, bufsz, "\n%-32s%c %s\n", "Stack", 'D', "PCB count"); cp += linesz; bufsz -= linesz; outsz = linesz; rw_rlock(&tcp_function_lock); TAILQ_FOREACH(f, &t_functions, tf_next) { linesz = snprintf(cp, bufsz, "%-32s%c %u\n", f->tf_fb->tfb_tcp_block_name, (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ', f->tf_fb->tfb_refcnt); if (linesz >= bufsz) { error = EOVERFLOW; break; } cp += linesz; bufsz -= linesz; outsz += linesz; } rw_runlock(&tcp_function_lock); if (error == 0) error = sysctl_handle_string(oidp, buffer, outsz + 1, req); free(buffer, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available, CTLTYPE_STRING|CTLFLAG_RD, NULL, 0, sysctl_net_inet_list_available, "A", "list available TCP Function sets"); /* * Target size of TCP PCB hash tables. Must be a power of two. * * Note that this can be overridden by the kernel environment * variable net.inet.tcp.tcbhashsize */ #ifndef TCBHASHSIZE #define TCBHASHSIZE 0 #endif /* * XXX * Callouts should be moved into struct tcp directly. They are currently * separate because the tcpcb structure is exported to userland for sysctl * parsing purposes, which do not know about callouts. */ struct tcpcb_mem { struct tcpcb tcb; struct tcp_timer tt; struct cc_var ccv; #ifdef TCP_HHOOK struct osd osd; #endif }; static VNET_DEFINE(uma_zone_t, tcpcb_zone); #define V_tcpcb_zone VNET(tcpcb_zone) MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers"); MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory"); static struct mtx isn_mtx; #define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF) #define ISN_LOCK() mtx_lock(&isn_mtx) #define ISN_UNLOCK() mtx_unlock(&isn_mtx) /* * TCP initialization. */ static void tcp_zone_change(void *tag) { uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets); uma_zone_set_max(V_tcpcb_zone, maxsockets); tcp_tw_zone_change(); } static int tcp_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp = mem; INP_LOCK_INIT(inp, "inp", "tcpinp"); return (0); } /* * Take a value and get the next power of 2 that doesn't overflow. * Used to size the tcp_inpcb hash buckets. */ static int maketcp_hashsize(int size) { int hashsize; /* * auto tune. * get the next power of 2 higher than maxsockets. */ hashsize = 1 << fls(size); /* catch overflow, and just go one power of 2 smaller */ if (hashsize < size) { hashsize = 1 << (fls(size) - 1); } return (hashsize); } int register_tcp_functions(struct tcp_function_block *blk, int wait) { struct tcp_function_block *lblk; struct tcp_function *n; struct tcp_function_set fs; if (t_functions_inited == 0) { init_tcp_functions(); } if ((blk->tfb_tcp_output == NULL) || (blk->tfb_tcp_do_segment == NULL) || (blk->tfb_tcp_ctloutput == NULL) || (strlen(blk->tfb_tcp_block_name) == 0)) { /* * These functions are required and you * need a name. */ return (EINVAL); } if (blk->tfb_tcp_timer_stop_all || blk->tfb_tcp_timer_activate || blk->tfb_tcp_timer_active || blk->tfb_tcp_timer_stop) { /* * If you define one timer function you * must have them all. */ if ((blk->tfb_tcp_timer_stop_all == NULL) || (blk->tfb_tcp_timer_activate == NULL) || (blk->tfb_tcp_timer_active == NULL) || (blk->tfb_tcp_timer_stop == NULL)) { return (EINVAL); } } n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait); if (n == NULL) { return (ENOMEM); } n->tf_fb = blk; strcpy(fs.function_set_name, blk->tfb_tcp_block_name); rw_wlock(&tcp_function_lock); lblk = find_tcp_functions_locked(&fs); if (lblk) { /* Duplicate name space not allowed */ rw_wunlock(&tcp_function_lock); free(n, M_TCPFUNCTIONS); return (EALREADY); } refcount_init(&blk->tfb_refcnt, 0); blk->tfb_flags = 0; TAILQ_INSERT_TAIL(&t_functions, n, tf_next); rw_wunlock(&tcp_function_lock); return(0); } int deregister_tcp_functions(struct tcp_function_block *blk) { struct tcp_function_block *lblk; struct tcp_function *f; int error=ENOENT; if (strcmp(blk->tfb_tcp_block_name, "default") == 0) { /* You can't un-register the default */ return (EPERM); } rw_wlock(&tcp_function_lock); if (blk == tcp_func_set_ptr) { /* You can't free the current default */ rw_wunlock(&tcp_function_lock); return (EBUSY); } if (blk->tfb_refcnt) { /* Still tcb attached, mark it. */ blk->tfb_flags |= TCP_FUNC_BEING_REMOVED; rw_wunlock(&tcp_function_lock); return (EBUSY); } lblk = find_tcp_fb_locked(blk, &f); if (lblk) { /* Found */ TAILQ_REMOVE(&t_functions, f, tf_next); f->tf_fb = NULL; free(f, M_TCPFUNCTIONS); error = 0; } rw_wunlock(&tcp_function_lock); return (error); } void tcp_init(void) { const char *tcbhash_tuneable; int hashsize; tcbhash_tuneable = "net.inet.tcp.tcbhashsize"; #ifdef TCP_HHOOK if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register helper hook\n", __func__); if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register helper hook\n", __func__); #endif hashsize = TCBHASHSIZE; TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize); if (hashsize == 0) { /* * Auto tune the hash size based on maxsockets. * A perfect hash would have a 1:1 mapping * (hashsize = maxsockets) however it's been * suggested that O(2) average is better. */ hashsize = maketcp_hashsize(maxsockets / 4); /* * Our historical default is 512, * do not autotune lower than this. */ if (hashsize < 512) hashsize = 512; if (bootverbose && IS_DEFAULT_VNET(curvnet)) printf("%s: %s auto tuned to %d\n", __func__, tcbhash_tuneable, hashsize); } /* * We require a hashsize to be a power of two. * Previously if it was not a power of two we would just reset it * back to 512, which could be a nasty surprise if you did not notice * the error message. * Instead what we do is clip it to the closest power of two lower * than the specified hash value. */ if (!powerof2(hashsize)) { int oldhashsize = hashsize; hashsize = maketcp_hashsize(hashsize); /* prevent absurdly low value */ if (hashsize < 16) hashsize = 16; printf("%s: WARNING: TCB hash size not a power of 2, " "clipped from %d to %d.\n", __func__, oldhashsize, hashsize); } in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize, "tcp_inpcb", tcp_inpcb_init, NULL, 0, IPI_HASHFIELDS_4TUPLE); /* * These have to be type stable for the benefit of the timers. */ V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_zone_set_max(V_tcpcb_zone, maxsockets); uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached"); tcp_tw_init(); syncache_init(); tcp_hc_init(); TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack); V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); #ifdef TCP_RFC7413 tcp_fastopen_init(); #endif /* Skip initialization of globals for non-default instances. */ if (!IS_DEFAULT_VNET(curvnet)) return; tcp_reass_global_init(); /* XXX virtualize those bellow? */ tcp_delacktime = TCPTV_DELACK; tcp_keepinit = TCPTV_KEEP_INIT; tcp_keepidle = TCPTV_KEEP_IDLE; tcp_keepintvl = TCPTV_KEEPINTVL; tcp_maxpersistidle = TCPTV_KEEP_IDLE; tcp_msl = TCPTV_MSL; tcp_rexmit_min = TCPTV_MIN; if (tcp_rexmit_min < 1) tcp_rexmit_min = 1; tcp_persmin = TCPTV_PERSMIN; tcp_persmax = TCPTV_PERSMAX; tcp_rexmit_slop = TCPTV_CPU_VAR; tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT; tcp_tcbhashsize = hashsize; /* Setup the tcp function block list */ init_tcp_functions(); register_tcp_functions(&tcp_def_funcblk, M_WAITOK); if (tcp_soreceive_stream) { #ifdef INET tcp_usrreqs.pru_soreceive = soreceive_stream; #endif #ifdef INET6 tcp6_usrreqs.pru_soreceive = soreceive_stream; #endif /* INET6 */ } #ifdef INET6 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) #else /* INET6 */ #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) #endif /* INET6 */ if (max_protohdr < TCP_MINPROTOHDR) max_protohdr = TCP_MINPROTOHDR; if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) panic("tcp_init"); #undef TCP_MINPROTOHDR ISN_LOCK_INIT(); EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL, SHUTDOWN_PRI_DEFAULT); EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL, EVENTHANDLER_PRI_ANY); #ifdef TCPPCAP tcp_pcap_init(); #endif } #ifdef VIMAGE static void tcp_destroy(void *unused __unused) { int n; #ifdef TCP_HHOOK int error; #endif /* * All our processes are gone, all our sockets should be cleaned * up, which means, we should be past the tcp_discardcb() calls. * Sleep to let all tcpcb timers really disappear and cleanup. */ for (;;) { INP_LIST_RLOCK(&V_tcbinfo); n = V_tcbinfo.ipi_count; INP_LIST_RUNLOCK(&V_tcbinfo); if (n == 0) break; pause("tcpdes", hz / 10); } tcp_hc_destroy(); syncache_destroy(); tcp_tw_destroy(); in_pcbinfo_destroy(&V_tcbinfo); /* tcp_discardcb() clears the sack_holes up. */ uma_zdestroy(V_sack_hole_zone); uma_zdestroy(V_tcpcb_zone); #ifdef TCP_RFC7413 /* * Cannot free the zone until all tcpcbs are released as we attach * the allocations to them. */ tcp_fastopen_destroy(); #endif #ifdef TCP_HHOOK error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_IN]); if (error != 0) { printf("%s: WARNING: unable to deregister helper hook " "type=%d, id=%d: error %d returned\n", __func__, HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, error); } error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_OUT]); if (error != 0) { printf("%s: WARNING: unable to deregister helper hook " "type=%d, id=%d: error %d returned\n", __func__, HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, error); } #endif } VNET_SYSUNINIT(tcp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_destroy, NULL); #endif void tcp_fini(void *xtp) { } /* * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. * tcp_template used to store this data in mbufs, but we now recopy it out * of the tcpcb each time to conserve mbufs. */ void tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr) { struct tcphdr *th = (struct tcphdr *)tcp_ptr; INP_WLOCK_ASSERT(inp); #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { struct ip6_hdr *ip6; ip6 = (struct ip6_hdr *)ip_ptr; ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (inp->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; } #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET { struct ip *ip; ip = (struct ip *)ip_ptr; ip->ip_v = IPVERSION; ip->ip_hl = 5; ip->ip_tos = inp->inp_ip_tos; ip->ip_len = 0; ip->ip_id = 0; ip->ip_off = 0; ip->ip_ttl = inp->inp_ip_ttl; ip->ip_sum = 0; ip->ip_p = IPPROTO_TCP; ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; } #endif /* INET */ th->th_sport = inp->inp_lport; th->th_dport = inp->inp_fport; th->th_seq = 0; th->th_ack = 0; th->th_x2 = 0; th->th_off = 5; th->th_flags = 0; th->th_win = 0; th->th_urp = 0; th->th_sum = 0; /* in_pseudo() is called later for ipv4 */ } /* * Create template to be used to send tcp packets on a connection. * Allocates an mbuf and fills in a skeletal tcp/ip header. The only * use for this function is in keepalives, which use tcp_respond. */ struct tcptemp * tcpip_maketemplate(struct inpcb *inp) { struct tcptemp *t; t = malloc(sizeof(*t), M_TEMP, M_NOWAIT); if (t == NULL) return (NULL); tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t); return (t); } /* * Send a single message to the TCP at address specified by * the given TCP/IP header. If m == NULL, then we make a copy * of the tcpiphdr at th and send directly to the addressed host. * This is used to force keep alive messages out using the TCP * template for a connection. If flags are given then we send * a message back to the TCP which originated the segment th, * and discard the mbuf containing it and any other attached mbufs. * * In any case the ack and sequence number of the transmitted * segment are as specified by the parameters. * * NOTE: If m != NULL, then th must point to *inside* the mbuf. */ void tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, tcp_seq ack, tcp_seq seq, int flags) { struct tcpopt to; struct inpcb *inp; struct ip *ip; struct mbuf *optm; struct tcphdr *nth; u_char *optp; #ifdef INET6 struct ip6_hdr *ip6; int isipv6; #endif /* INET6 */ int optlen, tlen, win; bool incl_opts; KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL")); #ifdef INET6 isipv6 = ((struct ip *)ipgen)->ip_v == (IPV6_VERSION >> 4); ip6 = ipgen; #endif /* INET6 */ ip = ipgen; if (tp != NULL) { inp = tp->t_inpcb; KASSERT(inp != NULL, ("tcp control block w/o inpcb")); INP_WLOCK_ASSERT(inp); } else inp = NULL; incl_opts = false; win = 0; if (tp != NULL) { if (!(flags & TH_RST)) { win = sbspace(&inp->inp_socket->so_rcv); if (win > TCP_MAXWIN << tp->rcv_scale) win = TCP_MAXWIN << tp->rcv_scale; } if ((tp->t_flags & TF_NOOPT) == 0) incl_opts = true; } if (m == NULL) { m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; m->m_data += max_linkhdr; #ifdef INET6 if (isipv6) { bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(struct ip6_hdr)); ip6 = mtod(m, struct ip6_hdr *); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); ip = mtod(m, struct ip *); nth = (struct tcphdr *)(ip + 1); } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); flags = TH_ACK; } else if (!M_WRITABLE(m)) { struct mbuf *n; /* Can't reuse 'm', allocate a new mbuf. */ n = m_gethdr(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; } if (!m_dup_pkthdr(n, m, M_NOWAIT)) { m_freem(m); m_freem(n); return; } n->m_data += max_linkhdr; /* m_len is set later */ #define xchg(a,b,type) { type t; t=a; a=b; b=t; } #ifdef INET6 if (isipv6) { bcopy((caddr_t)ip6, mtod(n, caddr_t), sizeof(struct ip6_hdr)); ip6 = mtod(n, struct ip6_hdr *); xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { bcopy((caddr_t)ip, mtod(n, caddr_t), sizeof(struct ip)); ip = mtod(n, struct ip *); xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t); nth = (struct tcphdr *)(ip + 1); } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); xchg(nth->th_dport, nth->th_sport, uint16_t); th = nth; m_freem(m); m = n; } else { /* * reuse the mbuf. * XXX MRT We inherit the FIB, which is lucky. */ m_freem(m->m_next); m->m_next = NULL; m->m_data = (caddr_t)ipgen; /* m_len is set later */ #ifdef INET6 if (isipv6) { xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t); nth = (struct tcphdr *)(ip + 1); } if (th != nth) { /* * this is usually a case when an extension header * exists between the IPv6 header and the * TCP header. */ nth->th_sport = th->th_sport; nth->th_dport = th->th_dport; } xchg(nth->th_dport, nth->th_sport, uint16_t); #undef xchg } tlen = 0; #ifdef INET6 if (isipv6) tlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr); #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET tlen = sizeof (struct tcpiphdr); #endif #ifdef INVARIANTS m->m_len = 0; KASSERT(M_TRAILINGSPACE(m) >= tlen, ("Not enough trailing space for message (m=%p, need=%d, have=%ld)", m, tlen, (long)M_TRAILINGSPACE(m))); #endif m->m_len = tlen; to.to_flags = 0; if (incl_opts) { /* Make sure we have room. */ if (M_TRAILINGSPACE(m) < TCP_MAXOLEN) { m->m_next = m_get(M_NOWAIT, MT_DATA); if (m->m_next) { optp = mtod(m->m_next, u_char *); optm = m->m_next; } else incl_opts = false; } else { optp = (u_char *) (nth + 1); optm = m; } } if (incl_opts) { /* Timestamps. */ if (tp->t_flags & TF_RCVD_TSTMP) { to.to_tsval = tcp_ts_getticks() + tp->ts_offset; to.to_tsecr = tp->ts_recent; to.to_flags |= TOF_TS; } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* TCP-MD5 (RFC2385). */ if (tp->t_flags & TF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif - /* Add the options. */ tlen += optlen = tcp_addoptions(&to, optp); /* Update m_len in the correct mbuf. */ optm->m_len += optlen; } else optlen = 0; #ifdef INET6 if (isipv6) { ip6->ip6_flow = 0; ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = htons(tlen - sizeof(*ip6)); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { ip->ip_len = htons(tlen); ip->ip_ttl = V_ip_defttl; if (V_path_mtu_discovery) ip->ip_off |= htons(IP_DF); } #endif m->m_pkthdr.len = tlen; m->m_pkthdr.rcvif = NULL; #ifdef MAC if (inp != NULL) { /* * Packet is associated with a socket, so allow the * label of the response to reflect the socket label. */ INP_WLOCK_ASSERT(inp); mac_inpcb_create_mbuf(inp, m); } else { /* * Packet is not associated with a socket, so possibly * update the label in place. */ mac_netinet_tcp_reply(m); } #endif nth->th_seq = htonl(seq); nth->th_ack = htonl(ack); nth->th_x2 = 0; nth->th_off = (sizeof (struct tcphdr) + optlen) >> 2; nth->th_flags = flags; if (tp != NULL) nth->th_win = htons((u_short) (win >> tp->rcv_scale)); else nth->th_win = htons((u_short)win); nth->th_urp = 0; -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (to.to_flags & TOF_SIGNATURE) { - tcp_signature_compute(m, 0, 0, optlen, to.to_signature, - IPSEC_DIR_OUTBOUND); + if (!TCPMD5_ENABLED() || + TCPMD5_OUTPUT(m, nth, to.to_signature) != 0) { + m_freem(m); + return; + } } #endif m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; nth->th_sum = in6_cksum_pseudo(ip6, tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb : NULL, NULL); } #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET { m->m_pkthdr.csum_flags = CSUM_TCP; nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); } #endif /* INET */ #ifdef TCPDEBUG if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); #endif TCP_PROBE3(debug__output, tp, th, m); if (flags & TH_RST) TCP_PROBE5(accept__refused, NULL, NULL, m, tp, nth); TCP_PROBE5(send, NULL, tp, m, tp, nth); #ifdef INET6 if (isipv6) (void) ip6_output(m, NULL, NULL, 0, NULL, NULL, inp); #endif /* INET6 */ #if defined(INET) && defined(INET6) else #endif #ifdef INET (void) ip_output(m, NULL, NULL, 0, NULL, inp); #endif } /* * Create a new TCP control block, making an * empty reassembly queue and hooking it to the argument * protocol control block. The `inp' parameter must have * come from the zone allocator set up in tcp_init(). */ struct tcpcb * tcp_newtcpcb(struct inpcb *inp) { struct tcpcb_mem *tm; struct tcpcb *tp; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ tm = uma_zalloc(V_tcpcb_zone, M_NOWAIT | M_ZERO); if (tm == NULL) return (NULL); tp = &tm->tcb; /* Initialise cc_var struct for this tcpcb. */ tp->ccv = &tm->ccv; tp->ccv->type = IPPROTO_TCP; tp->ccv->ccvc.tcp = tp; rw_rlock(&tcp_function_lock); tp->t_fb = tcp_func_set_ptr; refcount_acquire(&tp->t_fb->tfb_refcnt); rw_runlock(&tcp_function_lock); /* * Use the current system default CC algorithm. */ CC_LIST_RLOCK(); KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!")); CC_ALGO(tp) = CC_DEFAULT(); CC_LIST_RUNLOCK(); if (CC_ALGO(tp)->cb_init != NULL) if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) { if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); refcount_release(&tp->t_fb->tfb_refcnt); uma_zfree(V_tcpcb_zone, tm); return (NULL); } #ifdef TCP_HHOOK tp->osd = &tm->osd; if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) { if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); refcount_release(&tp->t_fb->tfb_refcnt); uma_zfree(V_tcpcb_zone, tm); return (NULL); } #endif #ifdef VIMAGE tp->t_vnet = inp->inp_vnet; #endif tp->t_timers = &tm->tt; /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */ tp->t_maxseg = #ifdef INET6 isipv6 ? V_tcp_v6mssdflt : #endif /* INET6 */ V_tcp_mssdflt; /* Set up our timeouts. */ callout_init(&tp->t_timers->tt_rexmt, 1); callout_init(&tp->t_timers->tt_persist, 1); callout_init(&tp->t_timers->tt_keep, 1); callout_init(&tp->t_timers->tt_2msl, 1); callout_init(&tp->t_timers->tt_delack, 1); if (V_tcp_do_rfc1323) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); if (V_tcp_do_sack) tp->t_flags |= TF_SACK_PERMIT; TAILQ_INIT(&tp->snd_holes); /* * The tcpcb will hold a reference on its inpcb until tcp_discardcb() * is called. */ in_pcbref(inp); /* Reference for tcpcb */ tp->t_inpcb = inp; /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives * reasonable initial retransmit time. */ tp->t_srtt = TCPTV_SRTTBASE; tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; tp->t_rttmin = tcp_rexmit_min; tp->t_rxtcur = TCPTV_RTOBASE; tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->t_rcvtime = ticks; /* * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = V_ip_defttl; inp->inp_ppcb = tp; #ifdef TCPPCAP /* * Init the TCP PCAP queues. */ tcp_pcap_tcpcb_init(tp); #endif if (tp->t_fb->tfb_tcp_fb_init) { (*tp->t_fb->tfb_tcp_fb_init)(tp); } return (tp); /* XXX */ } /* * Switch the congestion control algorithm back to NewReno for any active * control blocks using an algorithm which is about to go away. * This ensures the CC framework can allow the unload to proceed without leaving * any dangling pointers which would trigger a panic. * Returning non-zero would inform the CC framework that something went wrong * and it would be unsafe to allow the unload to proceed. However, there is no * way for this to occur with this implementation so we always return zero. */ int tcp_ccalgounload(struct cc_algo *unload_algo) { struct cc_algo *tmpalgo; struct inpcb *inp; struct tcpcb *tp; VNET_ITERATOR_DECL(vnet_iter); /* * Check all active control blocks across all network stacks and change * any that are using "unload_algo" back to NewReno. If "unload_algo" * requires cleanup code to be run, call it. */ VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); INP_INFO_WLOCK(&V_tcbinfo); /* * New connections already part way through being initialised * with the CC algo we're removing will not race with this code * because the INP_INFO_WLOCK is held during initialisation. We * therefore don't enter the loop below until the connection * list has stabilised. */ LIST_FOREACH(inp, &V_tcb, inp_list) { INP_WLOCK(inp); /* Important to skip tcptw structs. */ if (!(inp->inp_flags & INP_TIMEWAIT) && (tp = intotcpcb(inp)) != NULL) { /* * By holding INP_WLOCK here, we are assured * that the connection is not currently * executing inside the CC module's functions * i.e. it is safe to make the switch back to * NewReno. */ if (CC_ALGO(tp) == unload_algo) { tmpalgo = CC_ALGO(tp); /* NewReno does not require any init. */ CC_ALGO(tp) = &newreno_cc_algo; if (tmpalgo->cb_destroy != NULL) tmpalgo->cb_destroy(tp->ccv); } } INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); return (0); } /* * Drop a TCP connection, reporting * the specified error. If connection is synchronized, * then send a RST to peer. */ struct tcpcb * tcp_drop(struct tcpcb *tp, int errno) { struct socket *so = tp->t_inpcb->inp_socket; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (TCPS_HAVERCVDSYN(tp->t_state)) { tcp_state_change(tp, TCPS_CLOSED); (void) tp->t_fb->tfb_tcp_output(tp); TCPSTAT_INC(tcps_drops); } else TCPSTAT_INC(tcps_conndrops); if (errno == ETIMEDOUT && tp->t_softerror) errno = tp->t_softerror; so->so_error = errno; return (tcp_close(tp)); } void tcp_discardcb(struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ int released; INP_WLOCK_ASSERT(inp); /* * Make sure that all of our timers are stopped before we delete the * PCB. * * If stopping a timer fails, we schedule a discard function in same * callout, and the last discard function called will take care of * deleting the tcpcb. */ tp->t_timers->tt_draincnt = 0; tcp_timer_stop(tp, TT_REXMT); tcp_timer_stop(tp, TT_PERSIST); tcp_timer_stop(tp, TT_KEEP); tcp_timer_stop(tp, TT_2MSL); tcp_timer_stop(tp, TT_DELACK); if (tp->t_fb->tfb_tcp_timer_stop_all) { /* * Call the stop-all function of the methods, * this function should call the tcp_timer_stop() * method with each of the function specific timeouts. * That stop will be called via the tfb_tcp_timer_stop() * which should use the async drain function of the * callout system (see tcp_var.h). */ tp->t_fb->tfb_tcp_timer_stop_all(tp); } /* * If we got enough samples through the srtt filter, * save the rtt and rttvar in the routing entry. * 'Enough' is arbitrarily defined as 4 rtt samples. * 4 samples is enough for the srtt filter to converge * to within enough % of the correct value; fewer samples * and we could save a bogus rtt. The danger is not high * as tcp quickly recovers from everything. * XXX: Works very well but needs some more statistics! */ if (tp->t_rttupdated >= 4) { struct hc_metrics_lite metrics; uint32_t ssthresh; bzero(&metrics, sizeof(metrics)); /* * Update the ssthresh always when the conditions below * are satisfied. This gives us better new start value * for the congestion avoidance for new connections. * ssthresh is only set if packet loss occurred on a session. * * XXXRW: 'so' may be NULL here, and/or socket buffer may be * being torn down. Ideally this code would not use 'so'. */ ssthresh = tp->snd_ssthresh; if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) { /* * convert the limit from user data bytes to * packets then to packet data bytes. */ ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg; if (ssthresh < 2) ssthresh = 2; ssthresh *= (tp->t_maxseg + #ifdef INET6 (isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : #endif sizeof (struct tcpiphdr) #ifdef INET6 ) #endif ); } else ssthresh = 0; metrics.rmx_ssthresh = ssthresh; metrics.rmx_rtt = tp->t_srtt; metrics.rmx_rttvar = tp->t_rttvar; metrics.rmx_cwnd = tp->snd_cwnd; metrics.rmx_sendpipe = 0; metrics.rmx_recvpipe = 0; tcp_hc_update(&inp->inp_inc, &metrics); } /* free the reassembly queue, if any */ tcp_reass_flush(tp); #ifdef TCP_OFFLOAD /* Disconnect offload device, if any. */ if (tp->t_flags & TF_TOE) tcp_offload_detach(tp); #endif tcp_free_sackholes(tp); #ifdef TCPPCAP /* Free the TCP PCAP queues. */ tcp_pcap_drain(&(tp->t_inpkts)); tcp_pcap_drain(&(tp->t_outpkts)); #endif /* Allow the CC algorithm to clean up after itself. */ if (CC_ALGO(tp)->cb_destroy != NULL) CC_ALGO(tp)->cb_destroy(tp->ccv); #ifdef TCP_HHOOK khelp_destroy_osd(tp->osd); #endif CC_ALGO(tp) = NULL; inp->inp_ppcb = NULL; if (tp->t_timers->tt_draincnt == 0) { /* We own the last reference on tcpcb, let's free it. */ TCPSTATES_DEC(tp->t_state); if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); refcount_release(&tp->t_fb->tfb_refcnt); tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); released = in_pcbrele_wlocked(inp); KASSERT(!released, ("%s: inp %p should not have been released " "here", __func__, inp)); } } void tcp_timer_discard(void *ptp) { struct inpcb *inp; struct tcpcb *tp; tp = (struct tcpcb *)ptp; CURVNET_SET(tp->t_vnet); INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0, ("%s: tcpcb has to be stopped here", __func__)); tp->t_timers->tt_draincnt--; if (tp->t_timers->tt_draincnt == 0) { /* We own the last reference on this tcpcb, let's free it. */ TCPSTATES_DEC(tp->t_state); if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); refcount_release(&tp->t_fb->tfb_refcnt); tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); if (in_pcbrele_wlocked(inp)) { INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } } INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } /* * Attempt to close a TCP control block, marking it as dropped, and freeing * the socket if we hold the only reference. */ struct tcpcb * tcp_close(struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); #ifdef TCP_OFFLOAD if (tp->t_state == TCPS_LISTEN) tcp_offload_listen_stop(tp); #endif #ifdef TCP_RFC7413 /* * This releases the TFO pending counter resource for TFO listen * sockets as well as passively-created TFO sockets that transition * from SYN_RECEIVED to CLOSED. */ if (tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; } #endif in_pcbdrop(inp); TCPSTAT_INC(tcps_closed); if (tp->t_state != TCPS_CLOSED) tcp_state_change(tp, TCPS_CLOSED); KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL")); so = inp->inp_socket; soisdisconnected(so); if (inp->inp_flags & INP_SOCKREF) { KASSERT(so->so_state & SS_PROTOREF, ("tcp_close: !SS_PROTOREF")); inp->inp_flags &= ~INP_SOCKREF; INP_WUNLOCK(inp); ACCEPT_LOCK(); SOCK_LOCK(so); so->so_state &= ~SS_PROTOREF; sofree(so); return (NULL); } return (tp); } void tcp_drain(void) { VNET_ITERATOR_DECL(vnet_iter); if (!do_tcpdrain) return; VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); struct inpcb *inpb; struct tcpcb *tcpb; /* * Walk the tcpbs, if existing, and flush the reassembly queue, * if there is one... * XXX: The "Net/3" implementation doesn't imply that the TCP * reassembly queue should be flushed, but in a situation * where we're really low on mbufs, this is potentially * useful. */ INP_INFO_WLOCK(&V_tcbinfo); LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) { if (inpb->inp_flags & INP_TIMEWAIT) continue; INP_WLOCK(inpb); if ((tcpb = intotcpcb(inpb)) != NULL) { tcp_reass_flush(tcpb); tcp_clean_sackreport(tcpb); #ifdef TCPPCAP if (tcp_pcap_aggressive_free) { /* Free the TCP PCAP queues. */ tcp_pcap_drain(&(tcpb->t_inpkts)); tcp_pcap_drain(&(tcpb->t_outpkts)); } #endif } INP_WUNLOCK(inpb); } INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * Notify a tcp user of an asynchronous error; * store error as soft error, but wake up user * (for now, won't do anything until can select for soft error). * * Do not wake up user since there currently is no mechanism for * reporting soft errors (yet - a kqueue filter may be added). */ static struct inpcb * tcp_notify(struct inpcb *inp, int error) { struct tcpcb *tp; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return (inp); tp = intotcpcb(inp); KASSERT(tp != NULL, ("tcp_notify: tp == NULL")); /* * Ignore some errors if we are hooked up. * If connection hasn't completed, has retransmitted several times, * and receives a second error, give up now. This is better * than waiting a long time to establish a connection that * can never complete. */ if (tp->t_state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) { if (inp->inp_route.ro_rt) { RTFREE(inp->inp_route.ro_rt); inp->inp_route.ro_rt = (struct rtentry *)NULL; } return (inp); } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && tp->t_softerror) { tp = tcp_drop(tp, error); if (tp != NULL) return (inp); else return (NULL); } else { tp->t_softerror = error; return (inp); } #if 0 wakeup( &so->so_timeo); sorwakeup(so); sowwakeup(so); #endif } static int tcp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, m, n, pcb_count; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == NULL) { n = V_tcbinfo.ipi_count + counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]); n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb); return (0); } if (req->newptr != NULL) return (EPERM); /* * OK, now we're committed to doing something. */ INP_LIST_RLOCK(&V_tcbinfo); gencnt = V_tcbinfo.ipi_gencnt; n = V_tcbinfo.ipi_count; INP_LIST_RUNLOCK(&V_tcbinfo); m = counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]); error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) + (n + m) * sizeof(struct xtcpcb)); if (error != 0) return (error); xig.xig_len = sizeof xig; xig.xig_count = n + m; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); error = syncache_pcblist(req, m, &pcb_count); if (error) return (error); inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); INP_INFO_WLOCK(&V_tcbinfo); for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt) { /* * XXX: This use of cr_cansee(), introduced with * TCP state changes, is not quite right, but for * now, better than nothing. */ if (inp->inp_flags & INP_TIMEWAIT) { if (intotw(inp) != NULL) error = cr_cansee(req->td->td_ucred, intotw(inp)->tw_cred); else error = EINVAL; /* Skip this inp. */ } else error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) { in_pcbref(inp); inp_list[i++] = inp; } } INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(&V_tcbinfo); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xtcpcb xt; void *inp_ppcb; bzero(&xt, sizeof(xt)); xt.xt_len = sizeof xt; /* XXX should avoid extra copy */ bcopy(inp, &xt.xt_inp, sizeof *inp); inp_ppcb = inp->inp_ppcb; if (inp_ppcb == NULL) bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); else if (inp->inp_flags & INP_TIMEWAIT) { bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); xt.xt_tp.t_state = TCPS_TIME_WAIT; } else { bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); if (xt.xt_tp.t_timers) tcp_timer_to_xtimer(&xt.xt_tp, xt.xt_tp.t_timers, &xt.xt_timer); } if (inp->inp_socket != NULL) sotoxsocket(inp->inp_socket, &xt.xt_socket); else { bzero(&xt.xt_socket, sizeof xt.xt_socket); xt.xt_socket.xso_protocol = IPPROTO_TCP; } xt.xt_inp.inp_gencnt = inp->inp_gencnt; INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xt, sizeof xt); } else INP_RUNLOCK(inp); } INP_INFO_RLOCK(&V_tcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } INP_INFO_RUNLOCK(&V_tcbinfo); if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ INP_LIST_RLOCK(&V_tcbinfo); xig.xig_gen = V_tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_tcbinfo.ipi_count + pcb_count; INP_LIST_RUNLOCK(&V_tcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); #ifdef INET static int tcp_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in addrs[2]; struct inpcb *inp; int error; error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, tcp_getcred, "S,xucred", "Get the xucred of a TCP connection"); #endif /* INET */ #ifdef INET6 static int tcp6_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error; #ifdef INET int mapped = 0; #endif error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 || (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { return (error); } if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) { #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr)) mapped = 1; else #endif return (EINVAL); } #ifdef INET if (mapped == 1) inp = in_pcblookup(&V_tcbinfo, *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], addrs[1].sin6_port, *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL); else #endif inp = in6_pcblookup(&V_tcbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection"); #endif /* INET6 */ #ifdef INET void tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct ip *ip = vip; struct tcphdr *th; struct in_addr faddr; struct inpcb *inp; struct tcpcb *tp; struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct icmp *icp; struct in_conninfo inc; tcp_seq icmp_tcp_seq; int mtu; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc_notify; else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || cmd == PRC_UNREACH_PORT || cmd == PRC_UNREACH_PROTOCOL || cmd == PRC_TIMXCEED_INTRANS) && ip) notify = tcp_drop_syn_sent; /* * Hostdead is ugly because it goes linearly through all PCBs. * XXX: We never get this from ICMP, otherwise it makes an * excellent DoS attack on machines with many connections. */ else if (cmd == PRC_HOSTDEAD) ip = NULL; else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) return; if (ip == NULL) { in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify); return; } icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip)); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); INP_INFO_RLOCK(&V_tcbinfo); inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL && PRC_IS_REDIRECT(cmd)) { /* signal EHOSTDOWN, as it flushes the cached route */ inp = (*notify)(inp, EHOSTDOWN); if (inp != NULL) INP_WUNLOCK(inp); } else if (inp != NULL) { if (!(inp->inp_flags & INP_TIMEWAIT) && !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { icmp_tcp_seq = ntohl(th->th_seq); tp = intotcpcb(inp); if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) && SEQ_LT(icmp_tcp_seq, tp->snd_max)) { if (cmd == PRC_MSGSIZE) { /* * MTU discovery: * If we got a needfrag set the MTU * in the route to the suggested new * value (if given) and then notify. */ mtu = ntohs(icp->icmp_nextmtu); /* * If no alternative MTU was * proposed, try the next smaller * one. */ if (!mtu) mtu = ip_next_mtu( ntohs(ip->ip_len), 1); if (mtu < V_tcp_minmss + sizeof(struct tcpiphdr)) mtu = V_tcp_minmss + sizeof(struct tcpiphdr); /* * Only process the offered MTU if it * is smaller than the current one. */ if (mtu < tp->t_maxseg + sizeof(struct tcpiphdr)) { bzero(&inc, sizeof(inc)); inc.inc_faddr = faddr; inc.inc_fibnum = inp->inp_inc.inc_fibnum; tcp_hc_updatemtu(&inc, mtu); tcp_mtudisc(inp, mtu); } } else inp = (*notify)(inp, inetctlerrmap[cmd]); } } if (inp != NULL) INP_WUNLOCK(inp); } else { bzero(&inc, sizeof(inc)); inc.inc_fport = th->th_dport; inc.inc_lport = th->th_sport; inc.inc_faddr = faddr; inc.inc_laddr = ip->ip_src; syncache_unreach(&inc, th); } INP_INFO_RUNLOCK(&V_tcbinfo); } #endif /* INET */ #ifdef INET6 void tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) { struct in6_addr *dst; struct tcphdr *th; struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct ip6_hdr *ip6; struct mbuf *m; struct inpcb *inp; struct tcpcb *tp; struct icmp6_hdr *icmp6; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; struct in_conninfo inc; tcp_seq icmp_tcp_seq; unsigned int mtu; unsigned int off; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; icmp6 = ip6cp->ip6c_icmp6; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; sa6_src = ip6cp->ip6c_src; dst = ip6cp->ip6c_finaldst; } else { m = NULL; ip6 = NULL; off = 0; /* fool gcc */ sa6_src = &sa6_any; dst = NULL; } if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc_notify; else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || cmd == PRC_UNREACH_PORT || cmd == PRC_UNREACH_PROTOCOL || cmd == PRC_TIMXCEED_INTRANS) && ip6 != NULL) notify = tcp_drop_syn_sent; /* * Hostdead is ugly because it goes linearly through all PCBs. * XXX: We never get this from ICMP, otherwise it makes an * excellent DoS attack on machines with many connections. */ else if (cmd == PRC_HOSTDEAD) ip6 = NULL; else if ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0) return; if (ip6 == NULL) { in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, NULL, notify); return; } /* Check if we can safely get the ports from the tcp hdr */ if (m == NULL || (m->m_pkthdr.len < (int32_t) (off + offsetof(struct tcphdr, th_seq)))) { return; } th = (struct tcphdr *) mtodo(ip6cp->ip6c_m, ip6cp->ip6c_off); INP_INFO_RLOCK(&V_tcbinfo); inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, th->th_dport, &ip6->ip6_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL && PRC_IS_REDIRECT(cmd)) { /* signal EHOSTDOWN, as it flushes the cached route */ inp = (*notify)(inp, EHOSTDOWN); if (inp != NULL) INP_WUNLOCK(inp); } else if (inp != NULL) { if (!(inp->inp_flags & INP_TIMEWAIT) && !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { icmp_tcp_seq = ntohl(th->th_seq); tp = intotcpcb(inp); if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) && SEQ_LT(icmp_tcp_seq, tp->snd_max)) { if (cmd == PRC_MSGSIZE) { /* * MTU discovery: * If we got a needfrag set the MTU * in the route to the suggested new * value (if given) and then notify. */ mtu = ntohl(icmp6->icmp6_mtu); /* * If no alternative MTU was * proposed, or the proposed * MTU was too small, set to * the min. */ if (mtu < IPV6_MMTU) mtu = IPV6_MMTU - 8; bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL)) goto unlock_inp; /* * Only process the offered MTU if it * is smaller than the current one. */ if (mtu < tp->t_maxseg + (sizeof (*th) + sizeof (*ip6))) { tcp_hc_updatemtu(&inc, mtu); tcp_mtudisc(inp, mtu); ICMP6STAT_INC(icp6s_pmtuchg); } } else inp = (*notify)(inp, inet6ctlerrmap[cmd]); } } unlock_inp: if (inp != NULL) INP_WUNLOCK(inp); } else { bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; inc.inc_fport = th->th_dport; inc.inc_lport = th->th_sport; inc.inc6_faddr = *dst; inc.inc6_laddr = ip6->ip6_src; syncache_unreach(&inc, th); } INP_INFO_RUNLOCK(&V_tcbinfo); } #endif /* INET6 */ /* * Following is where TCP initial sequence number generation occurs. * * There are two places where we must use initial sequence numbers: * 1. In SYN-ACK packets. * 2. In SYN packets. * * All ISNs for SYN-ACK packets are generated by the syncache. See * tcp_syncache.c for details. * * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling * depends on this property. In addition, these ISNs should be * unguessable so as to prevent connection hijacking. To satisfy * the requirements of this situation, the algorithm outlined in * RFC 1948 is used, with only small modifications. * * Implementation details: * * Time is based off the system timer, and is corrected so that it * increases by one megabyte per second. This allows for proper * recycling on high speed LANs while still leaving over an hour * before rollover. * * As reading the *exact* system time is too expensive to be done * whenever setting up a TCP connection, we increment the time * offset in two ways. First, a small random positive increment * is added to isn_offset for each connection that is set up. * Second, the function tcp_isn_tick fires once per clock tick * and increments isn_offset as necessary so that sequence numbers * are incremented at approximately ISN_BYTES_PER_SECOND. The * random positive increments serve only to ensure that the same * exact sequence number is never sent out twice (as could otherwise * happen when a port is recycled in less than the system tick * interval.) * * net.inet.tcp.isn_reseed_interval controls the number of seconds * between seeding of isn_secret. This is normally set to zero, * as reseeding should not be necessary. * * Locking of the global variables isn_secret, isn_last_reseed, isn_offset, * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock. In * general, this means holding an exclusive (write) lock. */ #define ISN_BYTES_PER_SECOND 1048576 #define ISN_STATIC_INCREMENT 4096 #define ISN_RANDOM_INCREMENT (4096 - 1) static VNET_DEFINE(u_char, isn_secret[32]); static VNET_DEFINE(int, isn_last); static VNET_DEFINE(int, isn_last_reseed); static VNET_DEFINE(u_int32_t, isn_offset); static VNET_DEFINE(u_int32_t, isn_offset_old); #define V_isn_secret VNET(isn_secret) #define V_isn_last VNET(isn_last) #define V_isn_last_reseed VNET(isn_last_reseed) #define V_isn_offset VNET(isn_offset) #define V_isn_offset_old VNET(isn_offset_old) tcp_seq tcp_new_isn(struct tcpcb *tp) { MD5_CTX isn_ctx; u_int32_t md5_buffer[4]; tcp_seq new_isn; u_int32_t projected_offset; INP_WLOCK_ASSERT(tp->t_inpcb); ISN_LOCK(); /* Seed if this is the first use, reseed if requested. */ if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) && (((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz) < (u_int)ticks))) { read_random(&V_isn_secret, sizeof(V_isn_secret)); V_isn_last_reseed = ticks; } /* Compute the md5 hash and return the ISN. */ MD5Init(&isn_ctx); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short)); #ifdef INET6 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) { MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr, sizeof(struct in6_addr)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr, sizeof(struct in6_addr)); } else #endif { MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr, sizeof(struct in_addr)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr, sizeof(struct in_addr)); } MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret)); MD5Final((u_char *) &md5_buffer, &isn_ctx); new_isn = (tcp_seq) md5_buffer[0]; V_isn_offset += ISN_STATIC_INCREMENT + (arc4random() & ISN_RANDOM_INCREMENT); if (ticks != V_isn_last) { projected_offset = V_isn_offset_old + ISN_BYTES_PER_SECOND / hz * (ticks - V_isn_last); if (SEQ_GT(projected_offset, V_isn_offset)) V_isn_offset = projected_offset; V_isn_offset_old = V_isn_offset; V_isn_last = ticks; } new_isn += V_isn_offset; ISN_UNLOCK(); return (new_isn); } /* * When a specific ICMP unreachable message is received and the * connection state is SYN-SENT, drop the connection. This behavior * is controlled by the icmp_may_rst sysctl. */ struct inpcb * tcp_drop_syn_sent(struct inpcb *inp, int errno) { struct tcpcb *tp; INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return (inp); tp = intotcpcb(inp); if (tp->t_state != TCPS_SYN_SENT) return (inp); tp = tcp_drop(tp, errno); if (tp != NULL) return (inp); else return (NULL); } /* * When `need fragmentation' ICMP is received, update our idea of the MSS * based on the new value. Also nudge TCP to send something, since we * know the packet we just sent was dropped. * This duplicates some code in the tcp_mss() function in tcp_input.c. */ static struct inpcb * tcp_mtudisc_notify(struct inpcb *inp, int error) { tcp_mtudisc(inp, -1); return (inp); } static void tcp_mtudisc(struct inpcb *inp, int mtuoffer) { struct tcpcb *tp; struct socket *so; INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return; tp = intotcpcb(inp); KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL")); tcp_mss_update(tp, -1, mtuoffer, NULL, NULL); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_snd); /* If the mss is larger than the socket buffer, decrease the mss. */ if (so->so_snd.sb_hiwat < tp->t_maxseg) tp->t_maxseg = so->so_snd.sb_hiwat; SOCKBUF_UNLOCK(&so->so_snd); TCPSTAT_INC(tcps_mturesent); tp->t_rtttime = 0; tp->snd_nxt = tp->snd_una; tcp_free_sackholes(tp); tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_SACK_PERMIT) EXIT_FASTRECOVERY(tp->t_flags); tp->t_fb->tfb_tcp_output(tp); } #ifdef INET /* * Look-up the routing entry to the peer of this inpcb. If no route * is found and it cannot be allocated, then return 0. This routine * is called by TCP routines that access the rmx structure and by * tcp_mss_update to get the peer/interface MTU. */ uint32_t tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap) { struct nhop4_extended nh4; struct ifnet *ifp; uint32_t maxmtu = 0; KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer")); if (inc->inc_faddr.s_addr != INADDR_ANY) { if (fib4_lookup_nh_ext(inc->inc_fibnum, inc->inc_faddr, NHR_REF, 0, &nh4) != 0) return (0); ifp = nh4.nh_ifp; maxmtu = nh4.nh_mtu; /* Report additional interface capabilities. */ if (cap != NULL) { if (ifp->if_capenable & IFCAP_TSO4 && ifp->if_hwassist & CSUM_TSO) { cap->ifcap |= CSUM_TSO; cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } fib4_free_nh_ext(inc->inc_fibnum, &nh4); } return (maxmtu); } #endif /* INET */ #ifdef INET6 uint32_t tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap) { struct nhop6_extended nh6; struct in6_addr dst6; uint32_t scopeid; struct ifnet *ifp; uint32_t maxmtu = 0; KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer")); if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) { in6_splitscope(&inc->inc6_faddr, &dst6, &scopeid); if (fib6_lookup_nh_ext(inc->inc_fibnum, &dst6, scopeid, 0, 0, &nh6) != 0) return (0); ifp = nh6.nh_ifp; maxmtu = nh6.nh_mtu; /* Report additional interface capabilities. */ if (cap != NULL) { if (ifp->if_capenable & IFCAP_TSO6 && ifp->if_hwassist & CSUM_TSO) { cap->ifcap |= CSUM_TSO; cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } fib6_free_nh_ext(inc->inc_fibnum, &nh6); } return (maxmtu); } #endif /* INET6 */ /* * Calculate effective SMSS per RFC5681 definition for a given TCP * connection at its current state, taking into account SACK and etc. */ u_int tcp_maxseg(const struct tcpcb *tp) { u_int optlen; if (tp->t_flags & TF_NOOPT) return (tp->t_maxseg); /* * Here we have a simplified code from tcp_addoptions(), * without a proper loop, and having most of paddings hardcoded. * We might make mistakes with padding here in some edge cases, * but this is harmless, since result of tcp_maxseg() is used * only in cwnd and ssthresh estimations. */ #define PAD(len) ((((len) / 4) + !!((len) % 4)) * 4) if (TCPS_HAVEESTABLISHED(tp->t_state)) { if (tp->t_flags & TF_RCVD_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; else optlen = 0; -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) { optlen += TCPOLEN_SACKHDR; optlen += tp->rcv_numsacks * TCPOLEN_SACK; optlen = PAD(optlen); } } else { if (tp->t_flags & TF_REQ_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; else optlen = PAD(TCPOLEN_MAXSEG); if (tp->t_flags & TF_REQ_SCALE) optlen += PAD(TCPOLEN_WINDOW); -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif if (tp->t_flags & TF_SACK_PERMIT) optlen += PAD(TCPOLEN_SACK_PERMITTED); } #undef PAD optlen = min(optlen, TCP_MAXOLEN); return (tp->t_maxseg - optlen); } -#ifdef IPSEC -/* compute ESP/AH header size for TCP, including outer IP header. */ -size_t -ipsec_hdrsiz_tcp(struct tcpcb *tp) -{ - struct inpcb *inp; - struct mbuf *m; - size_t hdrsiz; - struct ip *ip; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif - struct tcphdr *th; - - if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL) || - (!key_havesp(IPSEC_DIR_OUTBOUND))) - return (0); - m = m_gethdr(M_NOWAIT, MT_DATA); - if (!m) - return (0); - -#ifdef INET6 - if ((inp->inp_vflag & INP_IPV6) != 0) { - ip6 = mtod(m, struct ip6_hdr *); - th = (struct tcphdr *)(ip6 + 1); - m->m_pkthdr.len = m->m_len = - sizeof(struct ip6_hdr) + sizeof(struct tcphdr); - tcpip_fillheaders(inp, ip6, th); - hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); - } else -#endif /* INET6 */ - { - ip = mtod(m, struct ip *); - th = (struct tcphdr *)(ip + 1); - m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); - tcpip_fillheaders(inp, ip, th); - hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); - } - - m_free(m); - return (hdrsiz); -} -#endif /* IPSEC */ - -#ifdef TCP_SIGNATURE -/* - * Callback function invoked by m_apply() to digest TCP segment data - * contained within an mbuf chain. - */ -static int -tcp_signature_apply(void *fstate, void *data, u_int len) -{ - - MD5Update(fstate, (u_char *)data, len); - return (0); -} - -/* - * XXX The key is retrieved from the system's PF_KEY SADB, by keying a - * search with the destination IP address, and a 'magic SPI' to be - * determined by the application. This is hardcoded elsewhere to 1179 -*/ -struct secasvar * -tcp_get_sav(struct mbuf *m, u_int direction) -{ - union sockaddr_union dst; - struct secasvar *sav; - struct ip *ip; -#ifdef INET6 - struct ip6_hdr *ip6; - char ip6buf[INET6_ADDRSTRLEN]; -#endif - - /* Extract the destination from the IP header in the mbuf. */ - bzero(&dst, sizeof(union sockaddr_union)); - ip = mtod(m, struct ip *); -#ifdef INET6 - ip6 = NULL; /* Make the compiler happy. */ -#endif - switch (ip->ip_v) { -#ifdef INET - case IPVERSION: - dst.sa.sa_len = sizeof(struct sockaddr_in); - dst.sa.sa_family = AF_INET; - dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ? - ip->ip_src : ip->ip_dst; - break; -#endif -#ifdef INET6 - case (IPV6_VERSION >> 4): - ip6 = mtod(m, struct ip6_hdr *); - dst.sa.sa_len = sizeof(struct sockaddr_in6); - dst.sa.sa_family = AF_INET6; - dst.sin6.sin6_addr = (direction == IPSEC_DIR_INBOUND) ? - ip6->ip6_src : ip6->ip6_dst; - break; -#endif - default: - return (NULL); - /* NOTREACHED */ - break; - } - - /* Look up an SADB entry which matches the address of the peer. */ - sav = KEY_ALLOCSA(&dst, IPPROTO_TCP, htonl(TCP_SIG_SPI)); - if (sav == NULL) { - ipseclog((LOG_ERR, "%s: SADB lookup failed for %s\n", __func__, - (ip->ip_v == IPVERSION) ? inet_ntoa(dst.sin.sin_addr) : -#ifdef INET6 - (ip->ip_v == (IPV6_VERSION >> 4)) ? - ip6_sprintf(ip6buf, &dst.sin6.sin6_addr) : -#endif - "(unsupported)")); - } - - return (sav); -} - -/* - * Compute TCP-MD5 hash of a TCP segment. (RFC2385) - * - * Parameters: - * m pointer to head of mbuf chain - * len length of TCP segment data, excluding options - * optlen length of TCP segment options - * buf pointer to storage for computed MD5 digest - * sav pointer to security assosiation - * - * We do this over ip, tcphdr, segment data, and the key in the SADB. - * When called from tcp_input(), we can be sure that th_sum has been - * zeroed out and verified already. - * - * Releases reference to SADB key before return. - * - * Return 0 if successful, otherwise return -1. - * - */ -int -tcp_signature_do_compute(struct mbuf *m, int len, int optlen, - u_char *buf, struct secasvar *sav) -{ -#ifdef INET - struct ippseudo ippseudo; -#endif - MD5_CTX ctx; - int doff; - struct ip *ip; -#ifdef INET - struct ipovly *ipovly; -#endif - struct tcphdr *th; -#ifdef INET6 - struct ip6_hdr *ip6; - struct in6_addr in6; - uint32_t plen; - uint16_t nhdr; -#endif - u_short savecsum; - - KASSERT(m != NULL, ("NULL mbuf chain")); - KASSERT(buf != NULL, ("NULL signature pointer")); - - /* Extract the destination from the IP header in the mbuf. */ - ip = mtod(m, struct ip *); -#ifdef INET6 - ip6 = NULL; /* Make the compiler happy. */ -#endif - - MD5Init(&ctx); - /* - * Step 1: Update MD5 hash with IP(v6) pseudo-header. - * - * XXX The ippseudo header MUST be digested in network byte order, - * or else we'll fail the regression test. Assume all fields we've - * been doing arithmetic on have been in host byte order. - * XXX One cannot depend on ipovly->ih_len here. When called from - * tcp_output(), the underlying ip_len member has not yet been set. - */ - switch (ip->ip_v) { -#ifdef INET - case IPVERSION: - ipovly = (struct ipovly *)ip; - ippseudo.ippseudo_src = ipovly->ih_src; - ippseudo.ippseudo_dst = ipovly->ih_dst; - ippseudo.ippseudo_pad = 0; - ippseudo.ippseudo_p = IPPROTO_TCP; - ippseudo.ippseudo_len = htons(len + sizeof(struct tcphdr) + - optlen); - MD5Update(&ctx, (char *)&ippseudo, sizeof(struct ippseudo)); - - th = (struct tcphdr *)((u_char *)ip + sizeof(struct ip)); - doff = sizeof(struct ip) + sizeof(struct tcphdr) + optlen; - break; -#endif -#ifdef INET6 - /* - * RFC 2385, 2.0 Proposal - * For IPv6, the pseudo-header is as described in RFC 2460, namely the - * 128-bit source IPv6 address, 128-bit destination IPv6 address, zero- - * extended next header value (to form 32 bits), and 32-bit segment - * length. - * Note: Upper-Layer Packet Length comes before Next Header. - */ - case (IPV6_VERSION >> 4): - ip6 = mtod(m, struct ip6_hdr *); - in6 = ip6->ip6_src; - in6_clearscope(&in6); - MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr)); - in6 = ip6->ip6_dst; - in6_clearscope(&in6); - MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr)); - plen = htonl(len + sizeof(struct tcphdr) + optlen); - MD5Update(&ctx, (char *)&plen, sizeof(uint32_t)); - nhdr = 0; - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - nhdr = IPPROTO_TCP; - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - - th = (struct tcphdr *)((u_char *)ip6 + sizeof(struct ip6_hdr)); - doff = sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + optlen; - break; -#endif - default: - KEY_FREESAV(&sav); - return (-1); - /* NOTREACHED */ - break; - } - - - /* - * Step 2: Update MD5 hash with TCP header, excluding options. - * The TCP checksum must be set to zero. - */ - savecsum = th->th_sum; - th->th_sum = 0; - MD5Update(&ctx, (char *)th, sizeof(struct tcphdr)); - th->th_sum = savecsum; - - /* - * Step 3: Update MD5 hash with TCP segment data. - * Use m_apply() to avoid an early m_pullup(). - */ - if (len > 0) - m_apply(m, doff, len, tcp_signature_apply, &ctx); - - /* - * Step 4: Update MD5 hash with shared secret. - */ - MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth)); - MD5Final(buf, &ctx); - - key_sa_recordxfer(sav, m); - KEY_FREESAV(&sav); - return (0); -} - -/* - * Compute TCP-MD5 hash of a TCP segment. (RFC2385) - * - * Return 0 if successful, otherwise return -1. - */ -int -tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen, - u_char *buf, u_int direction) -{ - struct secasvar *sav; - - if ((sav = tcp_get_sav(m, direction)) == NULL) - return (-1); - - return (tcp_signature_do_compute(m, len, optlen, buf, sav)); -} - -/* - * Verify the TCP-MD5 hash of a TCP segment. (RFC2385) - * - * Parameters: - * m pointer to head of mbuf chain - * len length of TCP segment data, excluding options - * optlen length of TCP segment options - * buf pointer to storage for computed MD5 digest - * direction direction of flow (IPSEC_DIR_INBOUND or OUTBOUND) - * - * Return 1 if successful, otherwise return 0. - */ -int -tcp_signature_verify(struct mbuf *m, int off0, int tlen, int optlen, - struct tcpopt *to, struct tcphdr *th, u_int tcpbflag) -{ - char tmpdigest[TCP_SIGLEN]; - - if (tcp_sig_checksigs == 0) - return (1); - if ((tcpbflag & TF_SIGNATURE) == 0) { - if ((to->to_flags & TOF_SIGNATURE) != 0) { - - /* - * If this socket is not expecting signature but - * the segment contains signature just fail. - */ - TCPSTAT_INC(tcps_sig_err_sigopt); - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - - /* Signature is not expected, and not present in segment. */ - return (1); - } - - /* - * If this socket is expecting signature but the segment does not - * contain any just fail. - */ - if ((to->to_flags & TOF_SIGNATURE) == 0) { - TCPSTAT_INC(tcps_sig_err_nosigopt); - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - if (tcp_signature_compute(m, off0, tlen, optlen, &tmpdigest[0], - IPSEC_DIR_INBOUND) == -1) { - TCPSTAT_INC(tcps_sig_err_buildsig); - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - - if (bcmp(to->to_signature, &tmpdigest[0], TCP_SIGLEN) != 0) { - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - TCPSTAT_INC(tcps_sig_rcvgoodsig); - return (1); -} -#endif /* TCP_SIGNATURE */ - static int sysctl_drop(SYSCTL_HANDLER_ARGS) { /* addrs[0] is a foreign socket, addrs[1] is a local one. */ struct sockaddr_storage addrs[2]; struct inpcb *inp; struct tcpcb *tp; struct tcptw *tw; struct sockaddr_in *fin, *lin; #ifdef INET6 struct sockaddr_in6 *fin6, *lin6; #endif int error; inp = NULL; fin = lin = NULL; #ifdef INET6 fin6 = lin6 = NULL; #endif error = 0; if (req->oldptr != NULL || req->oldlen != 0) return (EINVAL); if (req->newptr == NULL) return (EPERM); if (req->newlen < sizeof(addrs)) return (ENOMEM); error = SYSCTL_IN(req, &addrs, sizeof(addrs)); if (error) return (error); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: fin6 = (struct sockaddr_in6 *)&addrs[0]; lin6 = (struct sockaddr_in6 *)&addrs[1]; if (fin6->sin6_len != sizeof(struct sockaddr_in6) || lin6->sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) { if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr)) return (EINVAL); in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]); in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]); fin = (struct sockaddr_in *)&addrs[0]; lin = (struct sockaddr_in *)&addrs[1]; break; } error = sa6_embedscope(fin6, V_ip6_use_defzone); if (error) return (error); error = sa6_embedscope(lin6, V_ip6_use_defzone); if (error) return (error); break; #endif #ifdef INET case AF_INET: fin = (struct sockaddr_in *)&addrs[0]; lin = (struct sockaddr_in *)&addrs[1]; if (fin->sin_len != sizeof(struct sockaddr_in) || lin->sin_len != sizeof(struct sockaddr_in)) return (EINVAL); break; #endif default: return (EINVAL); } INP_INFO_RLOCK(&V_tcbinfo); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr, fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, INPLOOKUP_WLOCKPCB, NULL); break; #endif #ifdef INET case AF_INET: inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port, lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL); break; #endif } if (inp != NULL) { if (inp->inp_flags & INP_TIMEWAIT) { /* * XXXRW: There currently exists a state where an * inpcb is present, but its timewait state has been * discarded. For now, don't allow dropping of this * type of inpcb. */ tw = intotw(inp); if (tw != NULL) tcp_twclose(tw, 0); else INP_WUNLOCK(inp); } else if (!(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket->so_options & SO_ACCEPTCONN)) { tp = intotcpcb(inp); tp = tcp_drop(tp, ECONNABORTED); if (tp != NULL) INP_WUNLOCK(inp); } else INP_WUNLOCK(inp); } else error = ESRCH; INP_INFO_RUNLOCK(&V_tcbinfo); return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL, 0, sysctl_drop, "", "Drop TCP connection"); /* * Generate a standardized TCP log line for use throughout the * tcp subsystem. Memory allocation is done with M_NOWAIT to * allow use in the interrupt context. * * NB: The caller MUST free(s, M_TCPLOG) the returned string. * NB: The function may return NULL if memory allocation failed. * * Due to header inclusion and ordering limitations the struct ip * and ip6_hdr pointers have to be passed as void pointers. */ char * tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { /* Is logging enabled? */ if (tcp_log_in_vain == 0) return (NULL); return (tcp_log_addr(inc, th, ip4hdr, ip6hdr)); } char * tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { /* Is logging enabled? */ if (tcp_log_debug == 0) return (NULL); return (tcp_log_addr(inc, th, ip4hdr, ip6hdr)); } static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { char *s, *sp; size_t size; struct ip *ip; #ifdef INET6 const struct ip6_hdr *ip6; ip6 = (const struct ip6_hdr *)ip6hdr; #endif /* INET6 */ ip = (struct ip *)ip4hdr; /* * The log line looks like this: * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2" */ size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") + sizeof(PRINT_TH_FLAGS) + 1 + #ifdef INET6 2 * INET6_ADDRSTRLEN; #else 2 * INET_ADDRSTRLEN; #endif /* INET6 */ s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT); if (s == NULL) return (NULL); strcat(s, "TCP: ["); sp = s + strlen(s); if (inc && ((inc->inc_flags & INC_ISIPV6) == 0)) { inet_ntoa_r(inc->inc_faddr, sp); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(inc->inc_fport)); sp = s + strlen(s); inet_ntoa_r(inc->inc_laddr, sp); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(inc->inc_lport)); #ifdef INET6 } else if (inc) { ip6_sprintf(sp, &inc->inc6_faddr); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(inc->inc_fport)); sp = s + strlen(s); ip6_sprintf(sp, &inc->inc6_laddr); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(inc->inc_lport)); } else if (ip6 && th) { ip6_sprintf(sp, &ip6->ip6_src); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(th->th_sport)); sp = s + strlen(s); ip6_sprintf(sp, &ip6->ip6_dst); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(th->th_dport)); #endif /* INET6 */ #ifdef INET } else if (ip && th) { inet_ntoa_r(ip->ip_src, sp); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(th->th_sport)); sp = s + strlen(s); inet_ntoa_r(ip->ip_dst, sp); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(th->th_dport)); #endif /* INET */ } else { free(s, M_TCPLOG); return (NULL); } sp = s + strlen(s); if (th) sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS); if (*(s + size - 1) != '\0') panic("%s: string too long", __func__); return (s); } /* * A subroutine which makes it easy to track TCP state changes with DTrace. * This function shouldn't be called for t_state initializations that don't * correspond to actual TCP state transitions. */ void tcp_state_change(struct tcpcb *tp, int newstate) { #if defined(KDTRACE_HOOKS) int pstate = tp->t_state; #endif TCPSTATES_DEC(tp->t_state); TCPSTATES_INC(newstate); tp->t_state = newstate; TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate); } diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 2eba51d64269..66607ecab0fa 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1,2198 +1,2239 @@ /*- * Copyright (c) 2001 McAfee, Inc. * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG * All rights reserved. * * This software was developed for the FreeBSD Project by Jonathan Lemon * and McAfee Research, the Security Research Division of McAfee, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. [2001 McAfee, Inc.] * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_pcbgroup.h" #include #include #include #include #include #include #include #include #include #include #include #include /* for proc0 declaration */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #include #include #endif #include #ifdef TCP_RFC7413 #include #endif #include #include #include #include #include #ifdef INET6 #include #endif #ifdef TCP_OFFLOAD #include #endif -#ifdef IPSEC -#include -#ifdef INET6 -#include -#endif -#include -#endif /*IPSEC*/ +#include #include #include static VNET_DEFINE(int, tcp_syncookies) = 1; #define V_tcp_syncookies VNET(tcp_syncookies) SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_syncookies), 0, "Use TCP SYN cookies if the syncache overflows"); static VNET_DEFINE(int, tcp_syncookiesonly) = 0; #define V_tcp_syncookiesonly VNET(tcp_syncookiesonly) SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_syncookiesonly), 0, "Use only TCP SYN cookies"); static VNET_DEFINE(int, functions_inherit_listen_socket_stack) = 1; #define V_functions_inherit_listen_socket_stack \ VNET(functions_inherit_listen_socket_stack) SYSCTL_INT(_net_inet_tcp, OID_AUTO, functions_inherit_listen_socket_stack, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(functions_inherit_listen_socket_stack), 0, "Inherit listen socket's stack"); #ifdef TCP_OFFLOAD #define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL) #endif static void syncache_drop(struct syncache *, struct syncache_head *); static void syncache_free(struct syncache *); static void syncache_insert(struct syncache *, struct syncache_head *); static int syncache_respond(struct syncache *, struct syncache_head *, int, const struct mbuf *); static struct socket *syncache_socket(struct syncache *, struct socket *, struct mbuf *m); static void syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout); static void syncache_timer(void *); static uint32_t syncookie_mac(struct in_conninfo *, tcp_seq, uint8_t, uint8_t *, uintptr_t); static tcp_seq syncookie_generate(struct syncache_head *, struct syncache *); static struct syncache *syncookie_lookup(struct in_conninfo *, struct syncache_head *, struct syncache *, struct tcphdr *, struct tcpopt *, struct socket *); static void syncookie_reseed(void *); #ifdef INVARIANTS static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch, struct syncache *sc, struct tcphdr *th, struct tcpopt *to, struct socket *lso); #endif /* * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies. * 3 retransmits corresponds to a timeout of 3 * (1 + 2 + 4 + 8) == 45 seconds, * the odds are that the user has given up attempting to connect by then. */ #define SYNCACHE_MAXREXMTS 3 /* Arbitrary values */ #define TCP_SYNCACHE_HASHSIZE 512 #define TCP_SYNCACHE_BUCKETLIMIT 30 static VNET_DEFINE(struct tcp_syncache, tcp_syncache); #define V_tcp_syncache VNET(tcp_syncache) static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache"); SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_syncache.bucket_limit), 0, "Per-bucket hash limit for syncache"); SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_syncache.cache_limit), 0, "Overall entry limit for syncache"); SYSCTL_UMA_CUR(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_VNET, &VNET_NAME(tcp_syncache.zone), "Current number of entries in syncache"); SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_syncache.hashsize), 0, "Size of TCP syncache hashtable"); SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_syncache.rexmt_limit), 0, "Limit on SYN/ACK retransmissions"); VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 1; SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_sc_rst_sock_fail), 0, "Send reset on socket allocation failure"); static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache"); #define SCH_LOCK(sch) mtx_lock(&(sch)->sch_mtx) #define SCH_UNLOCK(sch) mtx_unlock(&(sch)->sch_mtx) #define SCH_LOCK_ASSERT(sch) mtx_assert(&(sch)->sch_mtx, MA_OWNED) /* * Requires the syncache entry to be already removed from the bucket list. */ static void syncache_free(struct syncache *sc) { if (sc->sc_ipopts) (void) m_free(sc->sc_ipopts); if (sc->sc_cred) crfree(sc->sc_cred); #ifdef MAC mac_syncache_destroy(&sc->sc_label); #endif uma_zfree(V_tcp_syncache.zone, sc); } void syncache_init(void) { int i; V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT; V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS; V_tcp_syncache.hash_secret = arc4random(); TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize", &V_tcp_syncache.hashsize); TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit", &V_tcp_syncache.bucket_limit); if (!powerof2(V_tcp_syncache.hashsize) || V_tcp_syncache.hashsize == 0) { printf("WARNING: syncache hash size is not a power of 2.\n"); V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; } V_tcp_syncache.hashmask = V_tcp_syncache.hashsize - 1; /* Set limits. */ V_tcp_syncache.cache_limit = V_tcp_syncache.hashsize * V_tcp_syncache.bucket_limit; TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit", &V_tcp_syncache.cache_limit); /* Allocate the hash table. */ V_tcp_syncache.hashbase = malloc(V_tcp_syncache.hashsize * sizeof(struct syncache_head), M_SYNCACHE, M_WAITOK | M_ZERO); #ifdef VIMAGE V_tcp_syncache.vnet = curvnet; #endif /* Initialize the hash buckets. */ for (i = 0; i < V_tcp_syncache.hashsize; i++) { TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket); mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head", NULL, MTX_DEF); callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer, &V_tcp_syncache.hashbase[i].sch_mtx, 0); V_tcp_syncache.hashbase[i].sch_length = 0; V_tcp_syncache.hashbase[i].sch_sc = &V_tcp_syncache; } /* Create the syncache entry zone. */ V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); V_tcp_syncache.cache_limit = uma_zone_set_max(V_tcp_syncache.zone, V_tcp_syncache.cache_limit); /* Start the SYN cookie reseeder callout. */ callout_init(&V_tcp_syncache.secret.reseed, 1); arc4rand(V_tcp_syncache.secret.key[0], SYNCOOKIE_SECRET_SIZE, 0); arc4rand(V_tcp_syncache.secret.key[1], SYNCOOKIE_SECRET_SIZE, 0); callout_reset(&V_tcp_syncache.secret.reseed, SYNCOOKIE_LIFETIME * hz, syncookie_reseed, &V_tcp_syncache); } #ifdef VIMAGE void syncache_destroy(void) { struct syncache_head *sch; struct syncache *sc, *nsc; int i; /* * Stop the re-seed timer before freeing resources. No need to * possibly schedule it another time. */ callout_drain(&V_tcp_syncache.secret.reseed); /* Cleanup hash buckets: stop timers, free entries, destroy locks. */ for (i = 0; i < V_tcp_syncache.hashsize; i++) { sch = &V_tcp_syncache.hashbase[i]; callout_drain(&sch->sch_timer); SCH_LOCK(sch); TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) syncache_drop(sc, sch); SCH_UNLOCK(sch); KASSERT(TAILQ_EMPTY(&sch->sch_bucket), ("%s: sch->sch_bucket not empty", __func__)); KASSERT(sch->sch_length == 0, ("%s: sch->sch_length %d not 0", __func__, sch->sch_length)); mtx_destroy(&sch->sch_mtx); } KASSERT(uma_zone_get_cur(V_tcp_syncache.zone) == 0, ("%s: cache_count not 0", __func__)); /* Free the allocated global resources. */ uma_zdestroy(V_tcp_syncache.zone); free(V_tcp_syncache.hashbase, M_SYNCACHE); } #endif /* * Inserts a syncache entry into the specified bucket row. * Locks and unlocks the syncache_head autonomously. */ static void syncache_insert(struct syncache *sc, struct syncache_head *sch) { struct syncache *sc2; SCH_LOCK(sch); /* * Make sure that we don't overflow the per-bucket limit. * If the bucket is full, toss the oldest element. */ if (sch->sch_length >= V_tcp_syncache.bucket_limit) { KASSERT(!TAILQ_EMPTY(&sch->sch_bucket), ("sch->sch_length incorrect")); sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head); syncache_drop(sc2, sch); TCPSTAT_INC(tcps_sc_bucketoverflow); } /* Put it into the bucket. */ TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash); sch->sch_length++; #ifdef TCP_OFFLOAD if (ADDED_BY_TOE(sc)) { struct toedev *tod = sc->sc_tod; tod->tod_syncache_added(tod, sc->sc_todctx); } #endif /* Reinitialize the bucket row's timer. */ if (sch->sch_length == 1) sch->sch_nextc = ticks + INT_MAX; syncache_timeout(sc, sch, 1); SCH_UNLOCK(sch); TCPSTATES_INC(TCPS_SYN_RECEIVED); TCPSTAT_INC(tcps_sc_added); } /* * Remove and free entry from syncache bucket row. * Expects locked syncache head. */ static void syncache_drop(struct syncache *sc, struct syncache_head *sch) { SCH_LOCK_ASSERT(sch); TCPSTATES_DEC(TCPS_SYN_RECEIVED); TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); sch->sch_length--; #ifdef TCP_OFFLOAD if (ADDED_BY_TOE(sc)) { struct toedev *tod = sc->sc_tod; tod->tod_syncache_removed(tod, sc->sc_todctx); } #endif syncache_free(sc); } /* * Engage/reengage time on bucket row. */ static void syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout) { sc->sc_rxttime = ticks + TCPTV_RTOBASE * (tcp_syn_backoff[sc->sc_rxmits]); sc->sc_rxmits++; if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) { sch->sch_nextc = sc->sc_rxttime; if (docallout) callout_reset(&sch->sch_timer, sch->sch_nextc - ticks, syncache_timer, (void *)sch); } } /* * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted. * If we have retransmitted an entry the maximum number of times, expire it. * One separate timer for each bucket row. */ static void syncache_timer(void *xsch) { struct syncache_head *sch = (struct syncache_head *)xsch; struct syncache *sc, *nsc; int tick = ticks; char *s; CURVNET_SET(sch->sch_sc->vnet); /* NB: syncache_head has already been locked by the callout. */ SCH_LOCK_ASSERT(sch); /* * In the following cycle we may remove some entries and/or * advance some timeouts, so re-initialize the bucket timer. */ sch->sch_nextc = tick + INT_MAX; TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) { /* * We do not check if the listen socket still exists * and accept the case where the listen socket may be * gone by the time we resend the SYN/ACK. We do * not expect this to happens often. If it does, * then the RST will be sent by the time the remote * host does the SYN/ACK->ACK. */ if (TSTMP_GT(sc->sc_rxttime, tick)) { if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) sch->sch_nextc = sc->sc_rxttime; continue; } if (sc->sc_rxmits > V_tcp_syncache.rexmt_limit) { if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Retransmits exhausted, " "giving up and removing syncache entry\n", s, __func__); free(s, M_TCPLOG); } syncache_drop(sc, sch); TCPSTAT_INC(tcps_sc_stale); continue; } if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Response timeout, " "retransmitting (%u) SYN|ACK\n", s, __func__, sc->sc_rxmits); free(s, M_TCPLOG); } syncache_respond(sc, sch, 1, NULL); TCPSTAT_INC(tcps_sc_retransmitted); syncache_timeout(sc, sch, 0); } if (!TAILQ_EMPTY(&(sch)->sch_bucket)) callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick, syncache_timer, (void *)(sch)); CURVNET_RESTORE(); } /* * Find an entry in the syncache. * Returns always with locked syncache_head plus a matching entry or NULL. */ static struct syncache * syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp) { struct syncache *sc; struct syncache_head *sch; uint32_t hash; /* * The hash is built on foreign port + local port + foreign address. * We rely on the fact that struct in_conninfo starts with 16 bits * of foreign port, then 16 bits of local port then followed by 128 * bits of foreign address. In case of IPv4 address, the first 3 * 32-bit words of the address always are zeroes. */ hash = jenkins_hash32((uint32_t *)&inc->inc_ie, 5, V_tcp_syncache.hash_secret) & V_tcp_syncache.hashmask; sch = &V_tcp_syncache.hashbase[hash]; *schp = sch; SCH_LOCK(sch); /* Circle through bucket row to find matching entry. */ TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) if (bcmp(&inc->inc_ie, &sc->sc_inc.inc_ie, sizeof(struct in_endpoints)) == 0) break; return (sc); /* Always returns with locked sch. */ } /* * This function is called when we get a RST for a * non-existent connection, so that we can see if the * connection is in the syn cache. If it is, zap it. */ void syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th) { struct syncache *sc; struct syncache_head *sch; char *s = NULL; sc = syncache_lookup(inc, &sch); /* returns locked sch */ SCH_LOCK_ASSERT(sch); /* * Any RST to our SYN|ACK must not carry ACK, SYN or FIN flags. * See RFC 793 page 65, section SEGMENT ARRIVES. */ if (th->th_flags & (TH_ACK|TH_SYN|TH_FIN)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Spurious RST with ACK, SYN or " "FIN flag set, segment ignored\n", s, __func__); TCPSTAT_INC(tcps_badrst); goto done; } /* * No corresponding connection was found in syncache. * If syncookies are enabled and possibly exclusively * used, or we are under memory pressure, a valid RST * may not find a syncache entry. In that case we're * done and no SYN|ACK retransmissions will happen. * Otherwise the RST was misdirected or spoofed. */ if (sc == NULL) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Spurious RST without matching " "syncache entry (possibly syncookie only), " "segment ignored\n", s, __func__); TCPSTAT_INC(tcps_badrst); goto done; } /* * If the RST bit is set, check the sequence number to see * if this is a valid reset segment. * RFC 793 page 37: * In all states except SYN-SENT, all reset (RST) segments * are validated by checking their SEQ-fields. A reset is * valid if its sequence number is in the window. * * The sequence number in the reset segment is normally an * echo of our outgoing acknowlegement numbers, but some hosts * send a reset with the sequence number at the rightmost edge * of our receive window, and we have to handle this case. */ if (SEQ_GEQ(th->th_seq, sc->sc_irs) && SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) { syncache_drop(sc, sch); if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Our SYN|ACK was rejected, " "connection attempt aborted by remote endpoint\n", s, __func__); TCPSTAT_INC(tcps_sc_reset); } else { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: RST with invalid SEQ %u != " "IRS %u (+WND %u), segment ignored\n", s, __func__, th->th_seq, sc->sc_irs, sc->sc_wnd); TCPSTAT_INC(tcps_badrst); } done: if (s != NULL) free(s, M_TCPLOG); SCH_UNLOCK(sch); } void syncache_badack(struct in_conninfo *inc) { struct syncache *sc; struct syncache_head *sch; sc = syncache_lookup(inc, &sch); /* returns locked sch */ SCH_LOCK_ASSERT(sch); if (sc != NULL) { syncache_drop(sc, sch); TCPSTAT_INC(tcps_sc_badack); } SCH_UNLOCK(sch); } void syncache_unreach(struct in_conninfo *inc, struct tcphdr *th) { struct syncache *sc; struct syncache_head *sch; sc = syncache_lookup(inc, &sch); /* returns locked sch */ SCH_LOCK_ASSERT(sch); if (sc == NULL) goto done; /* If the sequence number != sc_iss, then it's a bogus ICMP msg */ if (ntohl(th->th_seq) != sc->sc_iss) goto done; /* * If we've rertransmitted 3 times and this is our second error, * we remove the entry. Otherwise, we allow it to continue on. * This prevents us from incorrectly nuking an entry during a * spurious network outage. * * See tcp_notify(). */ if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxmits < 3 + 1) { sc->sc_flags |= SCF_UNREACH; goto done; } syncache_drop(sc, sch); TCPSTAT_INC(tcps_sc_unreach); done: SCH_UNLOCK(sch); } /* * Build a new TCP socket structure from a syncache entry. * * On success return the newly created socket with its underlying inp locked. */ static struct socket * syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) { struct tcp_function_block *blk; struct inpcb *inp = NULL; struct socket *so; struct tcpcb *tp; int error; char *s; INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* * Ok, create the full blown connection, and set things up * as they would have been set up if we had created the * connection when the SYN arrived. If we can't create * the connection, abort it. */ so = sonewconn(lso, 0); if (so == NULL) { /* * Drop the connection; we will either send a RST or * have the peer retransmit its SYN again after its * RTO and try again. */ TCPSTAT_INC(tcps_listendrop); if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Socket create failed " "due to limits or memory shortage\n", s, __func__); free(s, M_TCPLOG); } goto abort2; } #ifdef MAC mac_socketpeer_set_from_mbuf(m, so); #endif inp = sotoinpcb(so); inp->inp_inc.inc_fibnum = so->so_fibnum; INP_WLOCK(inp); /* * Exclusive pcbinfo lock is not required in syncache socket case even * if two inpcb locks can be acquired simultaneously: * - the inpcb in LISTEN state, * - the newly created inp. * * In this case, an inp cannot be at same time in LISTEN state and * just created by an accept() call. */ INP_HASH_WLOCK(&V_tcbinfo); /* Insert new socket into PCB hash list. */ inp->inp_inc.inc_flags = sc->sc_inc.inc_flags; #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) { inp->in6p_laddr = sc->sc_inc.inc6_laddr; } else { inp->inp_vflag &= ~INP_IPV6; inp->inp_vflag |= INP_IPV4; #endif inp->inp_laddr = sc->sc_inc.inc_laddr; #ifdef INET6 } #endif /* * If there's an mbuf and it has a flowid, then let's initialise the * inp with that particular flowid. */ if (m != NULL && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { inp->inp_flowid = m->m_pkthdr.flowid; inp->inp_flowtype = M_HASHTYPE_GET(m); } /* * Install in the reservation hash table for now, but don't yet * install a connection group since the full 4-tuple isn't yet * configured. */ inp->inp_lport = sc->sc_inc.inc_lport; if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) { /* * Undo the assignments above if we failed to * put the PCB on the hash lists. */ #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) inp->in6p_laddr = in6addr_any; else #endif inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: in_pcbinshash failed " "with error %i\n", s, __func__, error); free(s, M_TCPLOG); } INP_HASH_WUNLOCK(&V_tcbinfo); goto abort; } -#ifdef IPSEC - /* Copy old policy into new socket's. */ - if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp)) - printf("syncache_socket: could not copy policy\n"); -#endif #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) { struct inpcb *oinp = sotoinpcb(lso); struct in6_addr laddr6; struct sockaddr_in6 sin6; /* * Inherit socket options from the listening socket. * Note that in6p_inputopts are not (and should not be) * copied, since it stores previously received options and is * used to detect if each new option is different than the * previous one and hence should be passed to a user. * If we copied in6p_inputopts, a user would not be able to * receive options just after calling the accept system call. */ inp->inp_flags |= oinp->inp_flags & INP_CONTROLOPTS; if (oinp->in6p_outputopts) inp->in6p_outputopts = ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(sin6); sin6.sin6_addr = sc->sc_inc.inc6_faddr; sin6.sin6_port = sc->sc_inc.inc_fport; sin6.sin6_flowinfo = sin6.sin6_scope_id = 0; laddr6 = inp->in6p_laddr; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) inp->in6p_laddr = sc->sc_inc.inc6_laddr; if ((error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6, thread0.td_ucred, m)) != 0) { inp->in6p_laddr = laddr6; if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed " "with error %i\n", s, __func__, error); free(s, M_TCPLOG); } INP_HASH_WUNLOCK(&V_tcbinfo); goto abort; } /* Override flowlabel from in6_pcbconnect. */ inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; inp->inp_flow |= sc->sc_flowlabel; } #endif /* INET6 */ #if defined(INET) && defined(INET6) else #endif #ifdef INET { struct in_addr laddr; struct sockaddr_in sin; inp->inp_options = (m) ? ip_srcroute(m) : NULL; if (inp->inp_options == NULL) { inp->inp_options = sc->sc_ipopts; sc->sc_ipopts = NULL; } sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_addr = sc->sc_inc.inc_faddr; sin.sin_port = sc->sc_inc.inc_fport; bzero((caddr_t)sin.sin_zero, sizeof(sin.sin_zero)); laddr = inp->inp_laddr; if (inp->inp_laddr.s_addr == INADDR_ANY) inp->inp_laddr = sc->sc_inc.inc_laddr; if ((error = in_pcbconnect_mbuf(inp, (struct sockaddr *)&sin, thread0.td_ucred, m)) != 0) { inp->inp_laddr = laddr; if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: in_pcbconnect failed " "with error %i\n", s, __func__, error); free(s, M_TCPLOG); } INP_HASH_WUNLOCK(&V_tcbinfo); goto abort; } } #endif /* INET */ +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + /* Copy old policy into new socket's. */ + if (ipsec_copy_pcbpolicy(sotoinpcb(lso), inp) != 0) + printf("syncache_socket: could not copy policy\n"); +#endif INP_HASH_WUNLOCK(&V_tcbinfo); tp = intotcpcb(inp); tcp_state_change(tp, TCPS_SYN_RECEIVED); tp->iss = sc->sc_iss; tp->irs = sc->sc_irs; tcp_rcvseqinit(tp); tcp_sendseqinit(tp); blk = sototcpcb(lso)->t_fb; if (V_functions_inherit_listen_socket_stack && blk != tp->t_fb) { /* * Our parents t_fb was not the default, * we need to release our ref on tp->t_fb and * pickup one on the new entry. */ struct tcp_function_block *rblk; rblk = find_and_ref_tcp_fb(blk); KASSERT(rblk != NULL, ("cannot find blk %p out of syncache?", blk)); if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 0); refcount_release(&tp->t_fb->tfb_refcnt); tp->t_fb = rblk; if (tp->t_fb->tfb_tcp_fb_init) { (*tp->t_fb->tfb_tcp_fb_init)(tp); } } tp->snd_wl1 = sc->sc_irs; tp->snd_max = tp->iss + 1; tp->snd_nxt = tp->iss + 1; tp->rcv_up = sc->sc_irs + 1; tp->rcv_wnd = sc->sc_wnd; tp->rcv_adv += tp->rcv_wnd; tp->last_ack_sent = tp->rcv_nxt; tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY); if (sc->sc_flags & SCF_NOOPT) tp->t_flags |= TF_NOOPT; else { if (sc->sc_flags & SCF_WINSCALE) { tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE; tp->snd_scale = sc->sc_requested_s_scale; tp->request_r_scale = sc->sc_requested_r_scale; } if (sc->sc_flags & SCF_TIMESTAMP) { tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP; tp->ts_recent = sc->sc_tsreflect; tp->ts_recent_age = tcp_ts_getticks(); tp->ts_offset = sc->sc_tsoff; } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (sc->sc_flags & SCF_SIGNATURE) tp->t_flags |= TF_SIGNATURE; #endif if (sc->sc_flags & SCF_SACK) tp->t_flags |= TF_SACK_PERMIT; } if (sc->sc_flags & SCF_ECN) tp->t_flags |= TF_ECN_PERMIT; /* * Set up MSS and get cached values from tcp_hostcache. * This might overwrite some of the defaults we just set. */ tcp_mss(tp, sc->sc_peer_mss); /* * If the SYN,ACK was retransmitted, indicate that CWND to be * limited to one segment in cc_conn_init(). * NB: sc_rxmits counts all SYN,ACK transmits, not just retransmits. */ if (sc->sc_rxmits > 1) tp->snd_cwnd = 1; #ifdef TCP_OFFLOAD /* * Allow a TOE driver to install its hooks. Note that we hold the * pcbinfo lock too and that prevents tcp_usr_accept from accepting a * new connection before the TOE driver has done its thing. */ if (ADDED_BY_TOE(sc)) { struct toedev *tod = sc->sc_tod; tod->tod_offload_socket(tod, sc->sc_todctx, so); } #endif /* * Copy and activate timers. */ tp->t_keepinit = sototcpcb(lso)->t_keepinit; tp->t_keepidle = sototcpcb(lso)->t_keepidle; tp->t_keepintvl = sototcpcb(lso)->t_keepintvl; tp->t_keepcnt = sototcpcb(lso)->t_keepcnt; tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); TCPSTAT_INC(tcps_accepts); return (so); abort: INP_WUNLOCK(inp); abort2: if (so != NULL) soabort(so); return (NULL); } /* * This function gets called when we receive an ACK for a * socket in the LISTEN state. We look up the connection * in the syncache, and if its there, we pull it out of * the cache and turn it into a full-blown connection in * the SYN-RECEIVED state. * * On syncache_socket() success the newly created socket * has its underlying inp locked. */ int syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct socket **lsop, struct mbuf *m) { struct syncache *sc; struct syncache_head *sch; struct syncache scs; char *s; /* * Global TCP locks are held because we manipulate the PCB lists * and create a new socket. */ INP_INFO_RLOCK_ASSERT(&V_tcbinfo); KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK, ("%s: can handle only ACK", __func__)); sc = syncache_lookup(inc, &sch); /* returns locked sch */ SCH_LOCK_ASSERT(sch); #ifdef INVARIANTS /* * Test code for syncookies comparing the syncache stored * values with the reconstructed values from the cookie. */ if (sc != NULL) syncookie_cmp(inc, sch, sc, th, to, *lsop); #endif if (sc == NULL) { /* * There is no syncache entry, so see if this ACK is * a returning syncookie. To do this, first: * A. See if this socket has had a syncache entry dropped in * the past. We don't want to accept a bogus syncookie * if we've never received a SYN. * B. check that the syncookie is valid. If it is, then * cobble up a fake syncache entry, and return. */ if (!V_tcp_syncookies) { SCH_UNLOCK(sch); if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Spurious ACK, " "segment rejected (syncookies disabled)\n", s, __func__); goto failed; } bzero(&scs, sizeof(scs)); sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop); SCH_UNLOCK(sch); if (sc == NULL) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Segment failed " "SYNCOOKIE authentication, segment rejected " "(probably spoofed)\n", s, __func__); goto failed; } +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + /* If received ACK has MD5 signature, check it. */ + if ((to->to_flags & TOF_SIGNATURE) != 0 && + (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to->to_signature) != 0)) { + /* Drop the ACK. */ + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: Segment rejected, " + "MD5 signature doesn't match.\n", + s, __func__); + free(s, M_TCPLOG); + } + TCPSTAT_INC(tcps_sig_err_sigopt); + return (-1); /* Do not send RST */ + } +#endif /* TCP_SIGNATURE */ } else { +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + /* + * If listening socket requested TCP digests, check that + * received ACK has signature and it is correct. + * If not, drop the ACK and leave sc entry in th cache, + * because SYN was received with correct signature. + */ + if (sc->sc_flags & SCF_SIGNATURE) { + if ((to->to_flags & TOF_SIGNATURE) == 0) { + /* No signature */ + TCPSTAT_INC(tcps_sig_err_nosigopt); + SCH_UNLOCK(sch); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: Segment " + "rejected, MD5 signature wasn't " + "provided.\n", s, __func__); + free(s, M_TCPLOG); + } + return (-1); /* Do not send RST */ + } + if (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to->to_signature) != 0) { + /* Doesn't match or no SA */ + SCH_UNLOCK(sch); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: Segment " + "rejected, MD5 signature doesn't " + "match.\n", s, __func__); + free(s, M_TCPLOG); + } + return (-1); /* Do not send RST */ + } + } +#endif /* TCP_SIGNATURE */ /* * Pull out the entry to unlock the bucket row. * * NOTE: We must decrease TCPS_SYN_RECEIVED count here, not * tcp_state_change(). The tcpcb is not existent at this * moment. A new one will be allocated via syncache_socket-> * sonewconn->tcp_usr_attach in TCPS_CLOSED state, then * syncache_socket() will change it to TCPS_SYN_RECEIVED. */ TCPSTATES_DEC(TCPS_SYN_RECEIVED); TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); sch->sch_length--; #ifdef TCP_OFFLOAD if (ADDED_BY_TOE(sc)) { struct toedev *tod = sc->sc_tod; tod->tod_syncache_removed(tod, sc->sc_todctx); } #endif SCH_UNLOCK(sch); } /* * Segment validation: * ACK must match our initial sequence number + 1 (the SYN|ACK). */ if (th->th_ack != sc->sc_iss + 1) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment " "rejected\n", s, __func__, th->th_ack, sc->sc_iss); goto failed; } /* * The SEQ must fall in the window starting at the received * initial receive sequence number + 1 (the SYN). */ if (SEQ_LEQ(th->th_seq, sc->sc_irs) || SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment " "rejected\n", s, __func__, th->th_seq, sc->sc_irs); goto failed; } /* * If timestamps were not negotiated during SYN/ACK they * must not appear on any segment during this session. */ if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Timestamp not expected, " "segment rejected\n", s, __func__); goto failed; } /* * If timestamps were negotiated during SYN/ACK they should * appear on every segment during this session. * XXXAO: This is only informal as there have been unverified * reports of non-compliants stacks. */ if ((sc->sc_flags & SCF_TIMESTAMP) && !(to->to_flags & TOF_TS)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Timestamp missing, " "no action\n", s, __func__); free(s, M_TCPLOG); s = NULL; } } /* * If timestamps were negotiated, the reflected timestamp * must be equal to what we actually sent in the SYN|ACK * except in the case of 0. Some boxes are known for sending * broken timestamp replies during the 3whs (and potentially * during the connection also). * * Accept the final ACK of 3whs with reflected timestamp of 0 * instead of sending a RST and deleting the syncache entry. */ if ((to->to_flags & TOF_TS) && to->to_tsecr && to->to_tsecr != sc->sc_ts) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, " "segment rejected\n", s, __func__, to->to_tsecr, sc->sc_ts); goto failed; } *lsop = syncache_socket(sc, *lsop, m); if (*lsop == NULL) TCPSTAT_INC(tcps_sc_aborted); else TCPSTAT_INC(tcps_sc_completed); /* how do we find the inp for the new socket? */ if (sc != &scs) syncache_free(sc); return (1); failed: if (sc != NULL && sc != &scs) syncache_free(sc); if (s != NULL) free(s, M_TCPLOG); *lsop = NULL; return (0); } #ifdef TCP_RFC7413 static void syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m, uint64_t response_cookie) { struct inpcb *inp; struct tcpcb *tp; unsigned int *pending_counter; /* * Global TCP locks are held because we manipulate the PCB lists * and create a new socket. */ INP_INFO_RLOCK_ASSERT(&V_tcbinfo); pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending; *lsop = syncache_socket(sc, *lsop, m); if (*lsop == NULL) { TCPSTAT_INC(tcps_sc_aborted); atomic_subtract_int(pending_counter, 1); } else { inp = sotoinpcb(*lsop); tp = intotcpcb(inp); tp->t_flags |= TF_FASTOPEN; tp->t_tfo_cookie = response_cookie; tp->snd_max = tp->iss; tp->snd_nxt = tp->iss; tp->t_tfo_pending = pending_counter; TCPSTAT_INC(tcps_sc_completed); } } #endif /* TCP_RFC7413 */ /* * Given a LISTEN socket and an inbound SYN request, add * this to the syn cache, and send back a segment: * * to the source. * * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN. * Doing so would require that we hold onto the data and deliver it * to the application. However, if we are the target of a SYN-flood * DoS attack, an attacker could send data which would eventually * consume all available buffer space if it were ACKed. By not ACKing * the data, we avoid this DoS scenario. * * The exception to the above is when a SYN with a valid TCP Fast Open (TFO) * cookie is processed and a new socket is created. In this case, any data * accompanying the SYN will be queued to the socket by tcp_input() and will * be ACKed either when the application sends response data or the delayed * ACK timer expires, whichever comes first. */ int syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod, void *todctx) { struct tcpcb *tp; struct socket *so; struct syncache *sc = NULL; struct syncache_head *sch; struct mbuf *ipopts = NULL; u_int ltflags; int win, ip_ttl, ip_tos; char *s; int rv = 0; #ifdef INET6 int autoflowlabel = 0; #endif #ifdef MAC struct label *maclabel; #endif struct syncache scs; struct ucred *cred; #ifdef TCP_RFC7413 uint64_t tfo_response_cookie; unsigned int *tfo_pending = NULL; int tfo_cookie_valid = 0; int tfo_response_cookie_valid = 0; #endif INP_WLOCK_ASSERT(inp); /* listen socket */ KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN, ("%s: unexpected tcp flags", __func__)); /* * Combine all so/tp operations very early to drop the INP lock as * soon as possible. */ so = *lsop; tp = sototcpcb(so); cred = crhold(so->so_cred); #ifdef INET6 if ((inc->inc_flags & INC_ISIPV6) && (inp->inp_flags & IN6P_AUTOFLOWLABEL)) autoflowlabel = 1; #endif ip_ttl = inp->inp_ip_ttl; ip_tos = inp->inp_ip_tos; win = sbspace(&so->so_rcv); ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE)); #ifdef TCP_RFC7413 if (V_tcp_fastopen_enabled && IS_FASTOPEN(tp->t_flags) && (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) { /* * Limit the number of pending TFO connections to * approximately half of the queue limit. This prevents TFO * SYN floods from starving the service by filling the * listen queue with bogus TFO connections. */ if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <= (so->so_qlimit / 2)) { int result; result = tcp_fastopen_check_cookie(inc, to->to_tfo_cookie, to->to_tfo_len, &tfo_response_cookie); tfo_cookie_valid = (result > 0); tfo_response_cookie_valid = (result >= 0); } /* * Remember the TFO pending counter as it will have to be * decremented below if we don't make it to syncache_tfo_expand(). */ tfo_pending = tp->t_tfo_pending; } #endif /* By the time we drop the lock these should no longer be used. */ so = NULL; tp = NULL; #ifdef MAC if (mac_syncache_init(&maclabel) != 0) { INP_WUNLOCK(inp); goto done; } else mac_syncache_create(maclabel, inp); #endif #ifdef TCP_RFC7413 if (!tfo_cookie_valid) #endif INP_WUNLOCK(inp); /* * Remember the IP options, if any. */ #ifdef INET6 if (!(inc->inc_flags & INC_ISIPV6)) #endif #ifdef INET ipopts = (m) ? ip_srcroute(m) : NULL; #else ipopts = NULL; #endif +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + /* + * If listening socket requested TCP digests, check that received + * SYN has signature and it is correct. If signature doesn't match + * or TCP_SIGNATURE support isn't enabled, drop the packet. + */ + if (ltflags & TF_SIGNATURE) { + if ((to->to_flags & TOF_SIGNATURE) == 0) { + TCPSTAT_INC(tcps_sig_err_nosigopt); + goto done; + } + if (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to->to_signature) != 0) + goto done; + } +#endif /* TCP_SIGNATURE */ /* * See if we already have an entry for this connection. * If we do, resend the SYN,ACK, and reset the retransmit timer. * * XXX: should the syncache be re-initialized with the contents * of the new SYN here (which may have different options?) * * XXX: We do not check the sequence number to see if this is a * real retransmit or a new connection attempt. The question is * how to handle such a case; either ignore it as spoofed, or * drop the current entry and create a new one? */ sc = syncache_lookup(inc, &sch); /* returns locked entry */ SCH_LOCK_ASSERT(sch); if (sc != NULL) { #ifdef TCP_RFC7413 if (tfo_cookie_valid) INP_WUNLOCK(inp); #endif TCPSTAT_INC(tcps_sc_dupsyn); if (ipopts) { /* * If we were remembering a previous source route, * forget it and use the new one we've been given. */ if (sc->sc_ipopts) (void) m_free(sc->sc_ipopts); sc->sc_ipopts = ipopts; } /* * Update timestamp if present. */ if ((sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) sc->sc_tsreflect = to->to_tsval; else sc->sc_flags &= ~SCF_TIMESTAMP; #ifdef MAC /* * Since we have already unconditionally allocated label * storage, free it up. The syncache entry will already * have an initialized label we can use. */ mac_syncache_destroy(&maclabel); #endif /* Retransmit SYN|ACK and reset retransmit count. */ if ((s = tcp_log_addrs(&sc->sc_inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Received duplicate SYN, " "resetting timer and retransmitting SYN|ACK\n", s, __func__); free(s, M_TCPLOG); } if (syncache_respond(sc, sch, 1, m) == 0) { sc->sc_rxmits = 0; syncache_timeout(sc, sch, 1); TCPSTAT_INC(tcps_sndacks); TCPSTAT_INC(tcps_sndtotal); } SCH_UNLOCK(sch); goto done; } #ifdef TCP_RFC7413 if (tfo_cookie_valid) { bzero(&scs, sizeof(scs)); sc = &scs; goto skip_alloc; } #endif sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO); if (sc == NULL) { /* * The zone allocator couldn't provide more entries. * Treat this as if the cache was full; drop the oldest * entry and insert the new one. */ TCPSTAT_INC(tcps_sc_zonefail); if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL) syncache_drop(sc, sch); sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO); if (sc == NULL) { if (V_tcp_syncookies) { bzero(&scs, sizeof(scs)); sc = &scs; } else { SCH_UNLOCK(sch); if (ipopts) (void) m_free(ipopts); goto done; } } } #ifdef TCP_RFC7413 skip_alloc: if (!tfo_cookie_valid && tfo_response_cookie_valid) sc->sc_tfo_cookie = &tfo_response_cookie; #endif /* * Fill in the syncache values. */ #ifdef MAC sc->sc_label = maclabel; #endif sc->sc_cred = cred; cred = NULL; sc->sc_ipopts = ipopts; bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo)); #ifdef INET6 if (!(inc->inc_flags & INC_ISIPV6)) #endif { sc->sc_ip_tos = ip_tos; sc->sc_ip_ttl = ip_ttl; } #ifdef TCP_OFFLOAD sc->sc_tod = tod; sc->sc_todctx = todctx; #endif sc->sc_irs = th->th_seq; sc->sc_iss = arc4random(); sc->sc_flags = 0; sc->sc_flowlabel = 0; /* * Initial receive window: clip sbspace to [0 .. TCP_MAXWIN]. * win was derived from socket earlier in the function. */ win = imax(win, 0); win = imin(win, TCP_MAXWIN); sc->sc_wnd = win; if (V_tcp_do_rfc1323) { /* * A timestamp received in a SYN makes * it ok to send timestamp requests and replies. */ if (to->to_flags & TOF_TS) { sc->sc_tsreflect = to->to_tsval; sc->sc_ts = tcp_ts_getticks(); sc->sc_flags |= SCF_TIMESTAMP; } if (to->to_flags & TOF_SCALE) { int wscale = 0; /* * Pick the smallest possible scaling factor that * will still allow us to scale up to sb_max, aka * kern.ipc.maxsockbuf. * * We do this because there are broken firewalls that * will corrupt the window scale option, leading to * the other endpoint believing that our advertised * window is unscaled. At scale factors larger than * 5 the unscaled window will drop below 1500 bytes, * leading to serious problems when traversing these * broken firewalls. * * With the default maxsockbuf of 256K, a scale factor * of 3 will be chosen by this algorithm. Those who * choose a larger maxsockbuf should watch out * for the compatibility problems mentioned above. * * RFC1323: The Window field in a SYN (i.e., a * or ) segment itself is never scaled. */ while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max) wscale++; sc->sc_requested_r_scale = wscale; sc->sc_requested_s_scale = to->to_wscale; sc->sc_flags |= SCF_WINSCALE; } } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* - * If listening socket requested TCP digests, OR received SYN - * contains the option, flag this in the syncache so that - * syncache_respond() will do the right thing with the SYN+ACK. + * If listening socket requested TCP digests, flag this in the + * syncache so that syncache_respond() will do the right thing + * with the SYN+ACK. */ - if (to->to_flags & TOF_SIGNATURE || ltflags & TF_SIGNATURE) + if (ltflags & TF_SIGNATURE) sc->sc_flags |= SCF_SIGNATURE; -#endif +#endif /* TCP_SIGNATURE */ if (to->to_flags & TOF_SACKPERM) sc->sc_flags |= SCF_SACK; if (to->to_flags & TOF_MSS) sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */ if (ltflags & TF_NOOPT) sc->sc_flags |= SCF_NOOPT; if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn) sc->sc_flags |= SCF_ECN; if (V_tcp_syncookies) sc->sc_iss = syncookie_generate(sch, sc); #ifdef INET6 if (autoflowlabel) { if (V_tcp_syncookies) sc->sc_flowlabel = sc->sc_iss; else sc->sc_flowlabel = ip6_randomflowlabel(); sc->sc_flowlabel = htonl(sc->sc_flowlabel) & IPV6_FLOWLABEL_MASK; } #endif SCH_UNLOCK(sch); #ifdef TCP_RFC7413 if (tfo_cookie_valid) { syncache_tfo_expand(sc, lsop, m, tfo_response_cookie); /* INP_WUNLOCK(inp) will be performed by the caller */ rv = 1; goto tfo_expanded; } #endif /* * Do a standard 3-way handshake. */ if (syncache_respond(sc, sch, 0, m) == 0) { if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs) syncache_free(sc); else if (sc != &scs) syncache_insert(sc, sch); /* locks and unlocks sch */ TCPSTAT_INC(tcps_sndacks); TCPSTAT_INC(tcps_sndtotal); } else { if (sc != &scs) syncache_free(sc); TCPSTAT_INC(tcps_sc_dropped); } done: if (m) { *lsop = NULL; m_freem(m); } #ifdef TCP_RFC7413 /* * If tfo_pending is not NULL here, then a TFO SYN that did not * result in a new socket was processed and the associated pending * counter has not yet been decremented. All such TFO processing paths * transit this point. */ if (tfo_pending != NULL) tcp_fastopen_decrement_counter(tfo_pending); tfo_expanded: #endif if (cred != NULL) crfree(cred); #ifdef MAC if (sc == &scs) mac_syncache_destroy(&maclabel); #endif return (rv); } /* * Send SYN|ACK to the peer. Either in response to the peer's SYN, * i.e. m0 != NULL, or upon 3WHS ACK timeout, i.e. m0 == NULL. */ static int syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked, const struct mbuf *m0) { struct ip *ip = NULL; struct mbuf *m; struct tcphdr *th = NULL; int optlen, error = 0; /* Make compiler happy */ u_int16_t hlen, tlen, mssopt; struct tcpopt to; #ifdef INET6 struct ip6_hdr *ip6 = NULL; #endif -#ifdef TCP_SIGNATURE - struct secasvar *sav; -#endif - hlen = #ifdef INET6 (sc->sc_inc.inc_flags & INC_ISIPV6) ? sizeof(struct ip6_hdr) : #endif sizeof(struct ip); tlen = hlen + sizeof(struct tcphdr); /* Determine MSS we advertize to other end of connection. */ mssopt = tcp_mssopt(&sc->sc_inc); if (sc->sc_peer_mss) mssopt = max( min(sc->sc_peer_mss, mssopt), V_tcp_minmss); /* XXX: Assume that the entire packet will fit in a header mbuf. */ KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN, ("syncache: mbuf too small")); /* Create the IP+TCP header from scratch. */ m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); #ifdef MAC mac_syncache_create_mbuf(sc->sc_label, m); #endif m->m_data += max_linkhdr; m->m_len = tlen; m->m_pkthdr.len = tlen; m->m_pkthdr.rcvif = NULL; #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) { ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_src = sc->sc_inc.inc6_laddr; ip6->ip6_dst = sc->sc_inc.inc6_faddr; ip6->ip6_plen = htons(tlen - hlen); /* ip6_hlim is set after checksum */ ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK; ip6->ip6_flow |= sc->sc_flowlabel; th = (struct tcphdr *)(ip6 + 1); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET { ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(struct ip) >> 2; ip->ip_len = htons(tlen); ip->ip_id = 0; ip->ip_off = 0; ip->ip_sum = 0; ip->ip_p = IPPROTO_TCP; ip->ip_src = sc->sc_inc.inc_laddr; ip->ip_dst = sc->sc_inc.inc_faddr; ip->ip_ttl = sc->sc_ip_ttl; ip->ip_tos = sc->sc_ip_tos; /* * See if we should do MTU discovery. Route lookups are * expensive, so we will only unset the DF bit if: * * 1) path_mtu_discovery is disabled * 2) the SCF_UNREACH flag has been set */ if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0)) ip->ip_off |= htons(IP_DF); th = (struct tcphdr *)(ip + 1); } #endif /* INET */ th->th_sport = sc->sc_inc.inc_lport; th->th_dport = sc->sc_inc.inc_fport; th->th_seq = htonl(sc->sc_iss); th->th_ack = htonl(sc->sc_irs + 1); th->th_off = sizeof(struct tcphdr) >> 2; th->th_x2 = 0; th->th_flags = TH_SYN|TH_ACK; th->th_win = htons(sc->sc_wnd); th->th_urp = 0; if (sc->sc_flags & SCF_ECN) { th->th_flags |= TH_ECE; TCPSTAT_INC(tcps_ecn_shs); } /* Tack on the TCP options. */ if ((sc->sc_flags & SCF_NOOPT) == 0) { to.to_flags = 0; to.to_mss = mssopt; to.to_flags = TOF_MSS; if (sc->sc_flags & SCF_WINSCALE) { to.to_wscale = sc->sc_requested_r_scale; to.to_flags |= TOF_SCALE; } if (sc->sc_flags & SCF_TIMESTAMP) { /* Virgin timestamp or TCP cookie enhanced one. */ to.to_tsval = sc->sc_ts; to.to_tsecr = sc->sc_tsreflect; to.to_flags |= TOF_TS; } if (sc->sc_flags & SCF_SACK) to.to_flags |= TOF_SACKPERM; -#ifdef TCP_SIGNATURE - sav = NULL; - if (sc->sc_flags & SCF_SIGNATURE) { - sav = tcp_get_sav(m, IPSEC_DIR_OUTBOUND); - if (sav != NULL) - to.to_flags |= TOF_SIGNATURE; - else { - - /* - * We've got SCF_SIGNATURE flag - * inherited from listening socket, - * but no SADB key for given source - * address. Assume signature is not - * required and remove signature flag - * instead of silently dropping - * connection. - */ - if (locked == 0) - SCH_LOCK(sch); - sc->sc_flags &= ~SCF_SIGNATURE; - if (locked == 0) - SCH_UNLOCK(sch); - } - } +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (sc->sc_flags & SCF_SIGNATURE) + to.to_flags |= TOF_SIGNATURE; #endif - #ifdef TCP_RFC7413 if (sc->sc_tfo_cookie) { to.to_flags |= TOF_FASTOPEN; to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN; to.to_tfo_cookie = sc->sc_tfo_cookie; /* don't send cookie again when retransmitting response */ sc->sc_tfo_cookie = NULL; } #endif optlen = tcp_addoptions(&to, (u_char *)(th + 1)); /* Adjust headers by option size. */ th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; m->m_len += optlen; m->m_pkthdr.len += optlen; - -#ifdef TCP_SIGNATURE - if (sc->sc_flags & SCF_SIGNATURE) - tcp_signature_do_compute(m, 0, optlen, - to.to_signature, sav); -#endif #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen); else #endif ip->ip_len = htons(ntohs(ip->ip_len) + optlen); +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (sc->sc_flags & SCF_SIGNATURE) { + KASSERT(to.to_flags & TOF_SIGNATURE, + ("tcp_addoptions() didn't set tcp_signature")); + + /* NOTE: to.to_signature is inside of mbuf */ + if (!TCPMD5_ENABLED() || + TCPMD5_OUTPUT(m, th, to.to_signature) != 0) { + m_freem(m); + return (EACCES); + } + } +#endif } else optlen = 0; M_SETFIB(m, sc->sc_inc.inc_fibnum); m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); /* * If we have peer's SYN and it has a flowid, then let's assign it to * our SYN|ACK. ip6_output() and ip_output() will not assign flowid * to SYN|ACK due to lack of inp here. */ if (m0 != NULL && M_HASHTYPE_GET(m0) != M_HASHTYPE_NONE) { m->m_pkthdr.flowid = m0->m_pkthdr.flowid; M_HASHTYPE_SET(m, M_HASHTYPE_GET(m0)); } #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) { m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen, IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(NULL, NULL); #ifdef TCP_OFFLOAD if (ADDED_BY_TOE(sc)) { struct toedev *tod = sc->sc_tod; error = tod->tod_syncache_respond(tod, sc->sc_todctx, m); return (error); } #endif error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET { m->m_pkthdr.csum_flags = CSUM_TCP; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(tlen + optlen - hlen + IPPROTO_TCP)); #ifdef TCP_OFFLOAD if (ADDED_BY_TOE(sc)) { struct toedev *tod = sc->sc_tod; error = tod->tod_syncache_respond(tod, sc->sc_todctx, m); return (error); } #endif error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL); } #endif return (error); } /* * The purpose of syncookies is to handle spoofed SYN flooding DoS attacks * that exceed the capacity of the syncache by avoiding the storage of any * of the SYNs we receive. Syncookies defend against blind SYN flooding * attacks where the attacker does not have access to our responses. * * Syncookies encode and include all necessary information about the * connection setup within the SYN|ACK that we send back. That way we * can avoid keeping any local state until the ACK to our SYN|ACK returns * (if ever). Normally the syncache and syncookies are running in parallel * with the latter taking over when the former is exhausted. When matching * syncache entry is found the syncookie is ignored. * * The only reliable information persisting the 3WHS is our initial sequence * number ISS of 32 bits. Syncookies embed a cryptographically sufficient * strong hash (MAC) value and a few bits of TCP SYN options in the ISS * of our SYN|ACK. The MAC can be recomputed when the ACK to our SYN|ACK * returns and signifies a legitimate connection if it matches the ACK. * * The available space of 32 bits to store the hash and to encode the SYN * option information is very tight and we should have at least 24 bits for * the MAC to keep the number of guesses by blind spoofing reasonably high. * * SYN option information we have to encode to fully restore a connection: * MSS: is imporant to chose an optimal segment size to avoid IP level * fragmentation along the path. The common MSS values can be encoded * in a 3-bit table. Uncommon values are captured by the next lower value * in the table leading to a slight increase in packetization overhead. * WSCALE: is necessary to allow large windows to be used for high delay- * bandwidth product links. Not scaling the window when it was initially * negotiated is bad for performance as lack of scaling further decreases * the apparent available send window. We only need to encode the WSCALE * we received from the remote end. Our end can be recalculated at any * time. The common WSCALE values can be encoded in a 3-bit table. * Uncommon values are captured by the next lower value in the table * making us under-estimate the available window size halving our * theoretically possible maximum throughput for that connection. * SACK: Greatly assists in packet loss recovery and requires 1 bit. * TIMESTAMP and SIGNATURE is not encoded because they are permanent options * that are included in all segments on a connection. We enable them when * the ACK has them. * * Security of syncookies and attack vectors: * * The MAC is computed over (faddr||laddr||fport||lport||irs||flags||secmod) * together with the gloabl secret to make it unique per connection attempt. * Thus any change of any of those parameters results in a different MAC output * in an unpredictable way unless a collision is encountered. 24 bits of the * MAC are embedded into the ISS. * * To prevent replay attacks two rotating global secrets are updated with a * new random value every 15 seconds. The life-time of a syncookie is thus * 15-30 seconds. * * Vector 1: Attacking the secret. This requires finding a weakness in the * MAC itself or the way it is used here. The attacker can do a chosen plain * text attack by varying and testing the all parameters under his control. * The strength depends on the size and randomness of the secret, and the * cryptographic security of the MAC function. Due to the constant updating * of the secret the attacker has at most 29.999 seconds to find the secret * and launch spoofed connections. After that he has to start all over again. * * Vector 2: Collision attack on the MAC of a single ACK. With a 24 bit MAC * size an average of 4,823 attempts are required for a 50% chance of success * to spoof a single syncookie (birthday collision paradox). However the * attacker is blind and doesn't know if one of his attempts succeeded unless * he has a side channel to interfere success from. A single connection setup * success average of 90% requires 8,790 packets, 99.99% requires 17,578 packets. * This many attempts are required for each one blind spoofed connection. For * every additional spoofed connection he has to launch another N attempts. * Thus for a sustained rate 100 spoofed connections per second approximately * 1,800,000 packets per second would have to be sent. * * NB: The MAC function should be fast so that it doesn't become a CPU * exhaustion attack vector itself. * * References: * RFC4987 TCP SYN Flooding Attacks and Common Mitigations * SYN cookies were first proposed by cryptographer Dan J. Bernstein in 1996 * http://cr.yp.to/syncookies.html (overview) * http://cr.yp.to/syncookies/archive (details) * * * Schematic construction of a syncookie enabled Initial Sequence Number: * 0 1 2 3 * 12345678901234567890123456789012 * |xxxxxxxxxxxxxxxxxxxxxxxxWWWMMMSP| * * x 24 MAC (truncated) * W 3 Send Window Scale index * M 3 MSS index * S 1 SACK permitted * P 1 Odd/even secret */ /* * Distribution and probability of certain MSS values. Those in between are * rounded down to the next lower one. * [An Analysis of TCP Maximum Segment Sizes, S. Alcock and R. Nelson, 2011] * .2% .3% 5% 7% 7% 20% 15% 45% */ static int tcp_sc_msstab[] = { 216, 536, 1200, 1360, 1400, 1440, 1452, 1460 }; /* * Distribution and probability of certain WSCALE values. We have to map the * (send) window scale (shift) option with a range of 0-14 from 4 bits into 3 * bits based on prevalence of certain values. Where we don't have an exact * match for are rounded down to the next lower one letting us under-estimate * the true available window. At the moment this would happen only for the * very uncommon values 3, 5 and those above 8 (more than 16MB socket buffer * and window size). The absence of the WSCALE option (no scaling in either * direction) is encoded with index zero. * [WSCALE values histograms, Allman, 2012] * X 10 10 35 5 6 14 10% by host * X 11 4 5 5 18 49 3% by connections */ static int tcp_sc_wstab[] = { 0, 0, 1, 2, 4, 6, 7, 8 }; /* * Compute the MAC for the SYN cookie. SIPHASH-2-4 is chosen for its speed * and good cryptographic properties. */ static uint32_t syncookie_mac(struct in_conninfo *inc, tcp_seq irs, uint8_t flags, uint8_t *secbits, uintptr_t secmod) { SIPHASH_CTX ctx; uint32_t siphash[2]; SipHash24_Init(&ctx); SipHash_SetKey(&ctx, secbits); switch (inc->inc_flags & INC_ISIPV6) { #ifdef INET case 0: SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr)); SipHash_Update(&ctx, &inc->inc_laddr, sizeof(inc->inc_laddr)); break; #endif #ifdef INET6 case INC_ISIPV6: SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr)); SipHash_Update(&ctx, &inc->inc6_laddr, sizeof(inc->inc6_laddr)); break; #endif } SipHash_Update(&ctx, &inc->inc_fport, sizeof(inc->inc_fport)); SipHash_Update(&ctx, &inc->inc_lport, sizeof(inc->inc_lport)); SipHash_Update(&ctx, &irs, sizeof(irs)); SipHash_Update(&ctx, &flags, sizeof(flags)); SipHash_Update(&ctx, &secmod, sizeof(secmod)); SipHash_Final((u_int8_t *)&siphash, &ctx); return (siphash[0] ^ siphash[1]); } static tcp_seq syncookie_generate(struct syncache_head *sch, struct syncache *sc) { u_int i, mss, secbit, wscale; uint32_t iss, hash; uint8_t *secbits; union syncookie cookie; SCH_LOCK_ASSERT(sch); cookie.cookie = 0; /* Map our computed MSS into the 3-bit index. */ mss = min(tcp_mssopt(&sc->sc_inc), max(sc->sc_peer_mss, V_tcp_minmss)); for (i = nitems(tcp_sc_msstab) - 1; tcp_sc_msstab[i] > mss && i > 0; i--) ; cookie.flags.mss_idx = i; /* * Map the send window scale into the 3-bit index but only if * the wscale option was received. */ if (sc->sc_flags & SCF_WINSCALE) { wscale = sc->sc_requested_s_scale; for (i = nitems(tcp_sc_wstab) - 1; tcp_sc_wstab[i] > wscale && i > 0; i--) ; cookie.flags.wscale_idx = i; } /* Can we do SACK? */ if (sc->sc_flags & SCF_SACK) cookie.flags.sack_ok = 1; /* Which of the two secrets to use. */ secbit = sch->sch_sc->secret.oddeven & 0x1; cookie.flags.odd_even = secbit; secbits = sch->sch_sc->secret.key[secbit]; hash = syncookie_mac(&sc->sc_inc, sc->sc_irs, cookie.cookie, secbits, (uintptr_t)sch); /* * Put the flags into the hash and XOR them to get better ISS number * variance. This doesn't enhance the cryptographic strength and is * done to prevent the 8 cookie bits from showing up directly on the * wire. */ iss = hash & ~0xff; iss |= cookie.cookie ^ (hash >> 24); /* Randomize the timestamp. */ if (sc->sc_flags & SCF_TIMESTAMP) { sc->sc_ts = arc4random(); sc->sc_tsoff = sc->sc_ts - tcp_ts_getticks(); } TCPSTAT_INC(tcps_sc_sendcookie); return (iss); } static struct syncache * syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, struct syncache *sc, struct tcphdr *th, struct tcpopt *to, struct socket *lso) { uint32_t hash; uint8_t *secbits; tcp_seq ack, seq; int wnd, wscale = 0; union syncookie cookie; SCH_LOCK_ASSERT(sch); /* * Pull information out of SYN-ACK/ACK and revert sequence number * advances. */ ack = th->th_ack - 1; seq = th->th_seq - 1; /* * Unpack the flags containing enough information to restore the * connection. */ cookie.cookie = (ack & 0xff) ^ (ack >> 24); /* Which of the two secrets to use. */ secbits = sch->sch_sc->secret.key[cookie.flags.odd_even]; hash = syncookie_mac(inc, seq, cookie.cookie, secbits, (uintptr_t)sch); /* The recomputed hash matches the ACK if this was a genuine cookie. */ if ((ack & ~0xff) != (hash & ~0xff)) return (NULL); /* Fill in the syncache values. */ sc->sc_flags = 0; bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo)); sc->sc_ipopts = NULL; sc->sc_irs = seq; sc->sc_iss = ack; switch (inc->inc_flags & INC_ISIPV6) { #ifdef INET case 0: sc->sc_ip_ttl = sotoinpcb(lso)->inp_ip_ttl; sc->sc_ip_tos = sotoinpcb(lso)->inp_ip_tos; break; #endif #ifdef INET6 case INC_ISIPV6: if (sotoinpcb(lso)->inp_flags & IN6P_AUTOFLOWLABEL) sc->sc_flowlabel = sc->sc_iss & IPV6_FLOWLABEL_MASK; break; #endif } sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx]; /* We can simply recompute receive window scale we sent earlier. */ while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max) wscale++; /* Only use wscale if it was enabled in the orignal SYN. */ if (cookie.flags.wscale_idx > 0) { sc->sc_requested_r_scale = wscale; sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx]; sc->sc_flags |= SCF_WINSCALE; } wnd = sbspace(&lso->so_rcv); wnd = imax(wnd, 0); wnd = imin(wnd, TCP_MAXWIN); sc->sc_wnd = wnd; if (cookie.flags.sack_ok) sc->sc_flags |= SCF_SACK; if (to->to_flags & TOF_TS) { sc->sc_flags |= SCF_TIMESTAMP; sc->sc_tsreflect = to->to_tsval; sc->sc_ts = to->to_tsecr; sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks(); } if (to->to_flags & TOF_SIGNATURE) sc->sc_flags |= SCF_SIGNATURE; sc->sc_rxmits = 0; TCPSTAT_INC(tcps_sc_recvcookie); return (sc); } #ifdef INVARIANTS static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch, struct syncache *sc, struct tcphdr *th, struct tcpopt *to, struct socket *lso) { struct syncache scs, *scx; char *s; bzero(&scs, sizeof(scs)); scx = syncookie_lookup(inc, sch, &scs, th, to, lso); if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL) return (0); if (scx != NULL) { if (sc->sc_peer_mss != scx->sc_peer_mss) log(LOG_DEBUG, "%s; %s: mss different %i vs %i\n", s, __func__, sc->sc_peer_mss, scx->sc_peer_mss); if (sc->sc_requested_r_scale != scx->sc_requested_r_scale) log(LOG_DEBUG, "%s; %s: rwscale different %i vs %i\n", s, __func__, sc->sc_requested_r_scale, scx->sc_requested_r_scale); if (sc->sc_requested_s_scale != scx->sc_requested_s_scale) log(LOG_DEBUG, "%s; %s: swscale different %i vs %i\n", s, __func__, sc->sc_requested_s_scale, scx->sc_requested_s_scale); if ((sc->sc_flags & SCF_SACK) != (scx->sc_flags & SCF_SACK)) log(LOG_DEBUG, "%s; %s: SACK different\n", s, __func__); } if (s != NULL) free(s, M_TCPLOG); return (0); } #endif /* INVARIANTS */ static void syncookie_reseed(void *arg) { struct tcp_syncache *sc = arg; uint8_t *secbits; int secbit; /* * Reseeding the secret doesn't have to be protected by a lock. * It only must be ensured that the new random values are visible * to all CPUs in a SMP environment. The atomic with release * semantics ensures that. */ secbit = (sc->secret.oddeven & 0x1) ? 0 : 1; secbits = sc->secret.key[secbit]; arc4rand(secbits, SYNCOOKIE_SECRET_SIZE, 0); atomic_add_rel_int(&sc->secret.oddeven, 1); /* Reschedule ourself. */ callout_schedule(&sc->secret.reseed, SYNCOOKIE_LIFETIME * hz); } /* * Exports the syncache entries to userland so that netstat can display * them alongside the other sockets. This function is intended to be * called only from tcp_pcblist. * * Due to concurrency on an active system, the number of pcbs exported * may have no relation to max_pcbs. max_pcbs merely indicates the * amount of space the caller allocated for this function to use. */ int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported) { struct xtcpcb xt; struct syncache *sc; struct syncache_head *sch; int count, error, i; for (count = 0, error = 0, i = 0; i < V_tcp_syncache.hashsize; i++) { sch = &V_tcp_syncache.hashbase[i]; SCH_LOCK(sch); TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) { if (count >= max_pcbs) { SCH_UNLOCK(sch); goto exit; } if (cr_cansee(req->td->td_ucred, sc->sc_cred) != 0) continue; bzero(&xt, sizeof(xt)); xt.xt_len = sizeof(xt); if (sc->sc_inc.inc_flags & INC_ISIPV6) xt.xt_inp.inp_vflag = INP_IPV6; else xt.xt_inp.inp_vflag = INP_IPV4; bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, sizeof (struct in_conninfo)); xt.xt_tp.t_inpcb = &xt.xt_inp; xt.xt_tp.t_state = TCPS_SYN_RECEIVED; xt.xt_socket.xso_protocol = IPPROTO_TCP; xt.xt_socket.xso_len = sizeof (struct xsocket); xt.xt_socket.so_type = SOCK_STREAM; xt.xt_socket.so_state = SS_ISCONNECTING; error = SYSCTL_OUT(req, &xt, sizeof xt); if (error) { SCH_UNLOCK(sch); goto exit; } count++; } SCH_UNLOCK(sch); } exit: *pcbs_exported = count; return error; } diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index ede48fa509d9..7d225eec6b47 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1,2343 +1,2343 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * Copyright (c) 2006-2007 Robert N. M. Watson * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #include #endif #ifdef TCP_RFC7413 #include #endif #include #include #include #include #include #include #include #ifdef TCPPCAP #include #endif #ifdef TCPDEBUG #include #endif #ifdef TCP_OFFLOAD #include #endif +#include /* * TCP protocol interface to socket abstraction. */ static int tcp_attach(struct socket *); #ifdef INET static int tcp_connect(struct tcpcb *, struct sockaddr *, struct thread *td); #endif /* INET */ #ifdef INET6 static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct thread *td); #endif /* INET6 */ static void tcp_disconnect(struct tcpcb *); static void tcp_usrclosed(struct tcpcb *); static void tcp_fill_info(struct tcpcb *, struct tcp_info *); #ifdef TCPDEBUG #define TCPDEBUG0 int ostate = 0 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ tcp_trace(TA_USER, ostate, tp, 0, 0, req) #else #define TCPDEBUG0 #define TCPDEBUG1() #define TCPDEBUG2(req) #endif /* * TCP attaches to socket via pru_attach(), reserving space, * and an internet control block. */ static int tcp_usr_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct tcpcb *tp = NULL; int error; TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); TCPDEBUG1(); error = tcp_attach(so); if (error) goto out; if ((so->so_options & SO_LINGER) && so->so_linger == 0) so->so_linger = TCP_LINGERTIME; inp = sotoinpcb(so); tp = intotcpcb(inp); out: TCPDEBUG2(PRU_ATTACH); TCP_PROBE2(debug__user, tp, PRU_ATTACH); return error; } /* * tcp_detach is called when the socket layer loses its final reference * to the socket, be it a file descriptor reference, a reference from TCP, * etc. At this point, there is only one case in which we will keep around * inpcb state: time wait. * * This function can probably be re-absorbed back into tcp_usr_detach() now * that there is a single detach path. */ static void tcp_detach(struct socket *so, struct inpcb *inp) { struct tcpcb *tp; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp")); KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so")); tp = intotcpcb(inp); if (inp->inp_flags & INP_TIMEWAIT) { /* * There are two cases to handle: one in which the time wait * state is being discarded (INP_DROPPED), and one in which * this connection will remain in timewait. In the former, * it is time to discard all state (except tcptw, which has * already been discarded by the timewait close code, which * should be further up the call stack somewhere). In the * latter case, we detach from the socket, but leave the pcb * present until timewait ends. * * XXXRW: Would it be cleaner to free the tcptw here? * * Astute question indeed, from twtcp perspective there are * three cases to consider: * * #1 tcp_detach is called at tcptw creation time by * tcp_twstart, then do not discard the newly created tcptw * and leave inpcb present until timewait ends * #2 tcp_detach is called at timewait end (or reuse) by * tcp_twclose, then the tcptw has already been discarded * (or reused) and inpcb is freed here * #3 tcp_detach is called() after timewait ends (or reuse) * (e.g. by soclose), then tcptw has already been discarded * (or reused) and inpcb is freed here * * In all three cases the tcptw should not be freed here. */ if (inp->inp_flags & INP_DROPPED) { in_pcbdetach(inp); if (__predict_true(tp == NULL)) { in_pcbfree(inp); } else { /* * This case should not happen as in TIMEWAIT * state the inp should not be destroyed before * its tcptw. If INVARIANTS is defined, panic. */ #ifdef INVARIANTS panic("%s: Panic before an inp double-free: " "INP_TIMEWAIT && INP_DROPPED && tp != NULL" , __func__); #else log(LOG_ERR, "%s: Avoid an inp double-free: " "INP_TIMEWAIT && INP_DROPPED && tp != NULL" , __func__); #endif INP_WUNLOCK(inp); } } else { in_pcbdetach(inp); INP_WUNLOCK(inp); } } else { /* * If the connection is not in timewait, we consider two * two conditions: one in which no further processing is * necessary (dropped || embryonic), and one in which TCP is * not yet done, but no longer requires the socket, so the * pcb will persist for the time being. * * XXXRW: Does the second case still occur? */ if (inp->inp_flags & INP_DROPPED || tp->t_state < TCPS_SYN_SENT) { tcp_discardcb(tp); in_pcbdetach(inp); in_pcbfree(inp); } else { in_pcbdetach(inp); INP_WUNLOCK(inp); } } } /* * pru_detach() detaches the TCP protocol from the socket. * If the protocol state is non-embryonic, then can't * do this directly: have to initiate a pru_disconnect(), * which may finish later; embryonic TCB's can just * be discarded here. */ static void tcp_usr_detach(struct socket *so) { struct inpcb *inp; int rlock = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); if (!INP_INFO_WLOCKED(&V_tcbinfo)) { INP_INFO_RLOCK(&V_tcbinfo); rlock = 1; } INP_WLOCK(inp); KASSERT(inp->inp_socket != NULL, ("tcp_usr_detach: inp_socket == NULL")); tcp_detach(so, inp); if (rlock) INP_INFO_RUNLOCK(&V_tcbinfo); } #ifdef INET /* * Give the socket an address. */ static int tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; struct sockaddr_in *sinp; sinp = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sinp)) return (EINVAL); /* * Must check for multicast addresses and disallow binding * to them. */ if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) return (EAFNOSUPPORT); TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = EINVAL; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); INP_HASH_WLOCK(&V_tcbinfo); error = in_pcbbind(inp, nam, td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); out: TCPDEBUG2(PRU_BIND); TCP_PROBE2(debug__user, tp, PRU_BIND); INP_WUNLOCK(inp); return (error); } #endif /* INET */ #ifdef INET6 static int tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; struct sockaddr_in6 *sin6p; sin6p = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof (*sin6p)) return (EINVAL); /* * Must check for multicast addresses and disallow binding * to them. */ if (sin6p->sin6_family == AF_INET6 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) return (EAFNOSUPPORT); TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = EINVAL; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); INP_HASH_WLOCK(&V_tcbinfo); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) inp->inp_vflag |= INP_IPV4; else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = in_pcbbind(inp, (struct sockaddr *)&sin, td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); goto out; } } #endif error = in6_pcbbind(inp, nam, td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); out: TCPDEBUG2(PRU_BIND); TCP_PROBE2(debug__user, tp, PRU_BIND); INP_WUNLOCK(inp); return (error); } #endif /* INET6 */ #ifdef INET /* * Prepare to accept connections. */ static int tcp_usr_listen(struct socket *so, int backlog, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = EINVAL; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); SOCK_LOCK(so); error = solisten_proto_check(so); INP_HASH_WLOCK(&V_tcbinfo); if (error == 0 && inp->inp_lport == 0) error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); if (error == 0) { tcp_state_change(tp, TCPS_LISTEN); solisten_proto(so, backlog); #ifdef TCP_OFFLOAD if ((so->so_options & SO_NO_OFFLOAD) == 0) tcp_offload_listen_start(tp); #endif } SOCK_UNLOCK(so); #ifdef TCP_RFC7413 if (IS_FASTOPEN(tp->t_flags)) tp->t_tfo_pending = tcp_fastopen_alloc_counter(); #endif out: TCPDEBUG2(PRU_LISTEN); TCP_PROBE2(debug__user, tp, PRU_LISTEN); INP_WUNLOCK(inp); return (error); } #endif /* INET */ #ifdef INET6 static int tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = EINVAL; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); SOCK_LOCK(so); error = solisten_proto_check(so); INP_HASH_WLOCK(&V_tcbinfo); if (error == 0 && inp->inp_lport == 0) { inp->inp_vflag &= ~INP_IPV4; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) inp->inp_vflag |= INP_IPV4; error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); } INP_HASH_WUNLOCK(&V_tcbinfo); if (error == 0) { tcp_state_change(tp, TCPS_LISTEN); solisten_proto(so, backlog); #ifdef TCP_OFFLOAD if ((so->so_options & SO_NO_OFFLOAD) == 0) tcp_offload_listen_start(tp); #endif } SOCK_UNLOCK(so); #ifdef TCP_RFC7413 if (IS_FASTOPEN(tp->t_flags)) tp->t_tfo_pending = tcp_fastopen_alloc_counter(); #endif out: TCPDEBUG2(PRU_LISTEN); TCP_PROBE2(debug__user, tp, PRU_LISTEN); INP_WUNLOCK(inp); return (error); } #endif /* INET6 */ #ifdef INET /* * Initiate connection to peer. * Create a template for use in transmissions on this connection. * Enter SYN_SENT state, and mark socket as connecting. * Start keep-alive timer, and seed output sequence space. * Send initial segment on connection. */ static int tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; struct sockaddr_in *sinp; sinp = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sinp)) return (EINVAL); /* * Must disallow TCP ``connections'' to multicast addresses. */ if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) return (EAFNOSUPPORT); if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0) return (error); TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_TIMEWAIT) { error = EADDRINUSE; goto out; } if (inp->inp_flags & INP_DROPPED) { error = ECONNREFUSED; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); if ((error = tcp_connect(tp, nam, td)) != 0) goto out; #ifdef TCP_OFFLOAD if (registered_toedevs > 0 && (so->so_options & SO_NO_OFFLOAD) == 0 && (error = tcp_offload_connect(so, nam)) == 0) goto out; #endif tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); error = tp->t_fb->tfb_tcp_output(tp); out: TCPDEBUG2(PRU_CONNECT); TCP_PROBE2(debug__user, tp, PRU_CONNECT); INP_WUNLOCK(inp); return (error); } #endif /* INET */ #ifdef INET6 static int tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; struct sockaddr_in6 *sin6p; TCPDEBUG0; sin6p = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof (*sin6p)) return (EINVAL); /* * Must disallow TCP ``connections'' to multicast addresses. */ if (sin6p->sin6_family == AF_INET6 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) return (EAFNOSUPPORT); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_TIMEWAIT) { error = EADDRINUSE; goto out; } if (inp->inp_flags & INP_DROPPED) { error = ECONNREFUSED; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); #ifdef INET /* * XXXRW: Some confusion: V4/V6 flags relate to binding, and * therefore probably require the hash lock, which isn't held here. * Is this a significant problem? */ if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { struct sockaddr_in sin; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { error = EINVAL; goto out; } in6_sin6_2_sin(&sin, sin6p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; if ((error = prison_remote_ip4(td->td_ucred, &sin.sin_addr)) != 0) goto out; if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) goto out; #ifdef TCP_OFFLOAD if (registered_toedevs > 0 && (so->so_options & SO_NO_OFFLOAD) == 0 && (error = tcp_offload_connect(so, nam)) == 0) goto out; #endif error = tp->t_fb->tfb_tcp_output(tp); goto out; } #endif inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; inp->inp_inc.inc_flags |= INC_ISIPV6; if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0) goto out; if ((error = tcp6_connect(tp, nam, td)) != 0) goto out; #ifdef TCP_OFFLOAD if (registered_toedevs > 0 && (so->so_options & SO_NO_OFFLOAD) == 0 && (error = tcp_offload_connect(so, nam)) == 0) goto out; #endif tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); error = tp->t_fb->tfb_tcp_output(tp); out: TCPDEBUG2(PRU_CONNECT); TCP_PROBE2(debug__user, tp, PRU_CONNECT); INP_WUNLOCK(inp); return (error); } #endif /* INET6 */ /* * Initiate disconnect from peer. * If connection never passed embryonic stage, just drop; * else if don't need to let data drain, then can just drop anyways, * else have to begin TCP shutdown process: mark socket disconnecting, * drain unread data, state switch to reflect user close, and * send segment (e.g. FIN) to peer. Socket will be really disconnected * when peer sends FIN and acks ours. * * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. */ static int tcp_usr_disconnect(struct socket *so) { struct inpcb *inp; struct tcpcb *tp = NULL; int error = 0; TCPDEBUG0; INP_INFO_RLOCK(&V_tcbinfo); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_TIMEWAIT) goto out; if (inp->inp_flags & INP_DROPPED) { error = ECONNRESET; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); tcp_disconnect(tp); out: TCPDEBUG2(PRU_DISCONNECT); TCP_PROBE2(debug__user, tp, PRU_DISCONNECT); INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); return (error); } #ifdef INET /* * Accept a connection. Essentially all the work is done at higher levels; * just return the address of the peer, storing through addr. */ static int tcp_usr_accept(struct socket *so, struct sockaddr **nam) { int error = 0; struct inpcb *inp = NULL; struct tcpcb *tp = NULL; struct in_addr addr; in_port_t port = 0; TCPDEBUG0; if (so->so_state & SS_ISDISCONNECTED) return (ECONNABORTED); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = ECONNABORTED; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); /* * We inline in_getpeeraddr and COMMON_END here, so that we can * copy the data of interest and defer the malloc until after we * release the lock. */ port = inp->inp_fport; addr = inp->inp_faddr; out: TCPDEBUG2(PRU_ACCEPT); TCP_PROBE2(debug__user, tp, PRU_ACCEPT); INP_WUNLOCK(inp); if (error == 0) *nam = in_sockaddr(port, &addr); return error; } #endif /* INET */ #ifdef INET6 static int tcp6_usr_accept(struct socket *so, struct sockaddr **nam) { struct inpcb *inp = NULL; int error = 0; struct tcpcb *tp = NULL; struct in_addr addr; struct in6_addr addr6; in_port_t port = 0; int v4 = 0; TCPDEBUG0; if (so->so_state & SS_ISDISCONNECTED) return (ECONNABORTED); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = ECONNABORTED; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); /* * We inline in6_mapped_peeraddr and COMMON_END here, so that we can * copy the data of interest and defer the malloc until after we * release the lock. */ if (inp->inp_vflag & INP_IPV4) { v4 = 1; port = inp->inp_fport; addr = inp->inp_faddr; } else { port = inp->inp_fport; addr6 = inp->in6p_faddr; } out: TCPDEBUG2(PRU_ACCEPT); TCP_PROBE2(debug__user, tp, PRU_ACCEPT); INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); if (error == 0) { if (v4) *nam = in6_v4mapsin6_sockaddr(port, &addr); else *nam = in6_sockaddr(port, &addr6); } return error; } #endif /* INET6 */ /* * Mark the connection as being incapable of further output. */ static int tcp_usr_shutdown(struct socket *so) { int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; TCPDEBUG0; INP_INFO_RLOCK(&V_tcbinfo); inp = sotoinpcb(so); KASSERT(inp != NULL, ("inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = ECONNRESET; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); socantsendmore(so); tcp_usrclosed(tp); if (!(inp->inp_flags & INP_DROPPED)) error = tp->t_fb->tfb_tcp_output(tp); out: TCPDEBUG2(PRU_SHUTDOWN); TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN); INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); return (error); } /* * After a receive, possibly send window update to peer. */ static int tcp_usr_rcvd(struct socket *so, int flags) { struct inpcb *inp; struct tcpcb *tp = NULL; int error = 0; TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = ECONNRESET; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); #ifdef TCP_RFC7413 /* * For passively-created TFO connections, don't attempt a window * update while still in SYN_RECEIVED as this may trigger an early * SYN|ACK. It is preferable to have the SYN|ACK be sent along with * application response data, or failing that, when the DELACK timer * expires. */ if (IS_FASTOPEN(tp->t_flags) && (tp->t_state == TCPS_SYN_RECEIVED)) goto out; #endif #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) tcp_offload_rcvd(tp); else #endif tp->t_fb->tfb_tcp_output(tp); out: TCPDEBUG2(PRU_RCVD); TCP_PROBE2(debug__user, tp, PRU_RCVD); INP_WUNLOCK(inp); return (error); } /* * Do a send by putting data in output queue and updating urgent * marker if URG set. Possibly send more data. Unlike the other * pru_*() routines, the mbuf chains are our responsibility. We * must either enqueue them or free them. The other pru_* routines * generally are caller-frees. */ static int tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; #ifdef INET6 int isipv6; #endif TCPDEBUG0; /* * We require the pcbinfo lock if we will close the socket as part of * this call. */ if (flags & PRUS_EOF) INP_INFO_RLOCK(&V_tcbinfo); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { if (control) m_freem(control); /* * In case of PRUS_NOTREADY, tcp_usr_ready() is responsible * for freeing memory. */ if (m && (flags & PRUS_NOTREADY) == 0) m_freem(m); error = ECONNRESET; goto out; } #ifdef INET6 isipv6 = nam && nam->sa_family == AF_INET6; #endif /* INET6 */ tp = intotcpcb(inp); TCPDEBUG1(); if (control) { /* TCP doesn't do control messages (rights, creds, etc) */ if (control->m_len) { m_freem(control); if (m) m_freem(m); error = EINVAL; goto out; } m_freem(control); /* empty control, just free it */ } if (!(flags & PRUS_OOB)) { sbappendstream(&so->so_snd, m, flags); if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, * initialize window to default value, and * initialize maxseg using peer's cached MSS. */ #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET error = tcp_connect(tp, nam, td); #endif if (error) goto out; tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); } if (flags & PRUS_EOF) { /* * Close the send side of the connection after * the data is sent. */ INP_INFO_RLOCK_ASSERT(&V_tcbinfo); socantsendmore(so); tcp_usrclosed(tp); } if (!(inp->inp_flags & INP_DROPPED) && !(flags & PRUS_NOTREADY)) { if (flags & PRUS_MORETOCOME) tp->t_flags |= TF_MORETOCOME; error = tp->t_fb->tfb_tcp_output(tp); if (flags & PRUS_MORETOCOME) tp->t_flags &= ~TF_MORETOCOME; } } else { /* * XXXRW: PRUS_EOF not implemented with PRUS_OOB? */ SOCKBUF_LOCK(&so->so_snd); if (sbspace(&so->so_snd) < -512) { SOCKBUF_UNLOCK(&so->so_snd); m_freem(m); error = ENOBUFS; goto out; } /* * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section. * Otherwise, snd_up should be one lower. */ sbappendstream_locked(&so->so_snd, m, flags); SOCKBUF_UNLOCK(&so->so_snd); if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, * initialize window to default value, and * initialize maxseg using peer's cached MSS. */ #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET error = tcp_connect(tp, nam, td); #endif if (error) goto out; tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); } tp->snd_up = tp->snd_una + sbavail(&so->so_snd); if (!(flags & PRUS_NOTREADY)) { tp->t_flags |= TF_FORCEDATA; error = tp->t_fb->tfb_tcp_output(tp); tp->t_flags &= ~TF_FORCEDATA; } } out: TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); INP_WUNLOCK(inp); if (flags & PRUS_EOF) INP_INFO_RUNLOCK(&V_tcbinfo); return (error); } static int tcp_usr_ready(struct socket *so, struct mbuf *m, int count) { struct inpcb *inp; struct tcpcb *tp; int error; inp = sotoinpcb(so); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { INP_WUNLOCK(inp); for (int i = 0; i < count; i++) m = m_free(m); return (ECONNRESET); } tp = intotcpcb(inp); SOCKBUF_LOCK(&so->so_snd); error = sbready(&so->so_snd, m, count); SOCKBUF_UNLOCK(&so->so_snd); if (error == 0) error = tp->t_fb->tfb_tcp_output(tp); INP_WUNLOCK(inp); return (error); } /* * Abort the TCP. Drop the connection abruptly. */ static void tcp_usr_abort(struct socket *so) { struct inpcb *inp; struct tcpcb *tp = NULL; TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL")); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); KASSERT(inp->inp_socket != NULL, ("tcp_usr_abort: inp_socket == NULL")); /* * If we still have full TCP state, and we're not dropped, drop. */ if (!(inp->inp_flags & INP_TIMEWAIT) && !(inp->inp_flags & INP_DROPPED)) { tp = intotcpcb(inp); TCPDEBUG1(); tcp_drop(tp, ECONNABORTED); TCPDEBUG2(PRU_ABORT); TCP_PROBE2(debug__user, tp, PRU_ABORT); } if (!(inp->inp_flags & INP_DROPPED)) { SOCK_LOCK(so); so->so_state |= SS_PROTOREF; SOCK_UNLOCK(so); inp->inp_flags |= INP_SOCKREF; } INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); } /* * TCP socket is closed. Start friendly disconnect. */ static void tcp_usr_close(struct socket *so) { struct inpcb *inp; struct tcpcb *tp = NULL; TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL")); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); KASSERT(inp->inp_socket != NULL, ("tcp_usr_close: inp_socket == NULL")); /* * If we still have full TCP state, and we're not dropped, initiate * a disconnect. */ if (!(inp->inp_flags & INP_TIMEWAIT) && !(inp->inp_flags & INP_DROPPED)) { tp = intotcpcb(inp); TCPDEBUG1(); tcp_disconnect(tp); TCPDEBUG2(PRU_CLOSE); TCP_PROBE2(debug__user, tp, PRU_CLOSE); } if (!(inp->inp_flags & INP_DROPPED)) { SOCK_LOCK(so); so->so_state |= SS_PROTOREF; SOCK_UNLOCK(so); inp->inp_flags |= INP_SOCKREF; } INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); } /* * Receive out-of-band data. */ static int tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) { int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = ECONNRESET; goto out; } tp = intotcpcb(inp); TCPDEBUG1(); if ((so->so_oobmark == 0 && (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || so->so_options & SO_OOBINLINE || tp->t_oobflags & TCPOOB_HADDATA) { error = EINVAL; goto out; } if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { error = EWOULDBLOCK; goto out; } m->m_len = 1; *mtod(m, caddr_t) = tp->t_iobc; if ((flags & MSG_PEEK) == 0) tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); out: TCPDEBUG2(PRU_RCVOOB); TCP_PROBE2(debug__user, tp, PRU_RCVOOB); INP_WUNLOCK(inp); return (error); } #ifdef INET struct pr_usrreqs tcp_usrreqs = { .pru_abort = tcp_usr_abort, .pru_accept = tcp_usr_accept, .pru_attach = tcp_usr_attach, .pru_bind = tcp_usr_bind, .pru_connect = tcp_usr_connect, .pru_control = in_control, .pru_detach = tcp_usr_detach, .pru_disconnect = tcp_usr_disconnect, .pru_listen = tcp_usr_listen, .pru_peeraddr = in_getpeeraddr, .pru_rcvd = tcp_usr_rcvd, .pru_rcvoob = tcp_usr_rcvoob, .pru_send = tcp_usr_send, .pru_ready = tcp_usr_ready, .pru_shutdown = tcp_usr_shutdown, .pru_sockaddr = in_getsockaddr, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = tcp_usr_close, }; #endif /* INET */ #ifdef INET6 struct pr_usrreqs tcp6_usrreqs = { .pru_abort = tcp_usr_abort, .pru_accept = tcp6_usr_accept, .pru_attach = tcp_usr_attach, .pru_bind = tcp6_usr_bind, .pru_connect = tcp6_usr_connect, .pru_control = in6_control, .pru_detach = tcp_usr_detach, .pru_disconnect = tcp_usr_disconnect, .pru_listen = tcp6_usr_listen, .pru_peeraddr = in6_mapped_peeraddr, .pru_rcvd = tcp_usr_rcvd, .pru_rcvoob = tcp_usr_rcvoob, .pru_send = tcp_usr_send, .pru_ready = tcp_usr_ready, .pru_shutdown = tcp_usr_shutdown, .pru_sockaddr = in6_mapped_sockaddr, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = tcp_usr_close, }; #endif /* INET6 */ #ifdef INET /* * Common subroutine to open a TCP connection to remote host specified * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local * port number if needed. Call in_pcbconnect_setup to do the routing and * to choose a local host address (interface). If there is an existing * incarnation of the same connection in TIME-WAIT state and if the remote * host was sending CC options and if the connection duration was < MSL, then * truncate the previous TIME-WAIT state and proceed. * Initialize connection parameters and enter SYN-SENT state. */ static int tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) { struct inpcb *inp = tp->t_inpcb, *oinp; struct socket *so = inp->inp_socket; struct in_addr laddr; u_short lport; int error; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK(&V_tcbinfo); if (inp->inp_lport == 0) { error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); if (error) goto out; } /* * Cannot simply call in_pcbconnect, because there might be an * earlier incarnation of this same connection still in * TIME_WAIT state, creating an ADDRINUSE error. */ laddr = inp->inp_laddr; lport = inp->inp_lport; error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); if (error && oinp == NULL) goto out; if (oinp) { error = EADDRINUSE; goto out; } inp->inp_laddr = laddr; in_pcbrehash(inp); INP_HASH_WUNLOCK(&V_tcbinfo); /* * Compute window scaling to request: * Scale to fit into sweet spot. See tcp_syncache.c. * XXX: This should move to tcp_output(). */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << tp->request_r_scale) < sb_max) tp->request_r_scale++; soisconnecting(so); TCPSTAT_INC(tcps_connattempt); tcp_state_change(tp, TCPS_SYN_SENT); tp->iss = tcp_new_isn(tp); tcp_sendseqinit(tp); return 0; out: INP_HASH_WUNLOCK(&V_tcbinfo); return (error); } #endif /* INET */ #ifdef INET6 static int tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) { struct inpcb *inp = tp->t_inpcb; int error; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK(&V_tcbinfo); if (inp->inp_lport == 0) { error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); if (error) goto out; } error = in6_pcbconnect(inp, nam, td->td_ucred); if (error != 0) goto out; INP_HASH_WUNLOCK(&V_tcbinfo); /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << tp->request_r_scale) < sb_max) tp->request_r_scale++; soisconnecting(inp->inp_socket); TCPSTAT_INC(tcps_connattempt); tcp_state_change(tp, TCPS_SYN_SENT); tp->iss = tcp_new_isn(tp); tcp_sendseqinit(tp); return 0; out: INP_HASH_WUNLOCK(&V_tcbinfo); return error; } #endif /* INET6 */ /* * Export TCP internal state information via a struct tcp_info, based on the * Linux 2.6 API. Not ABI compatible as our constants are mapped differently * (TCP state machine, etc). We export all information using FreeBSD-native * constants -- for example, the numeric values for tcpi_state will differ * from Linux. */ static void tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) { INP_WLOCK_ASSERT(tp->t_inpcb); bzero(ti, sizeof(*ti)); ti->tcpi_state = tp->t_state; if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; if (tp->t_flags & TF_SACK_PERMIT) ti->tcpi_options |= TCPI_OPT_SACK; if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { ti->tcpi_options |= TCPI_OPT_WSCALE; ti->tcpi_snd_wscale = tp->snd_scale; ti->tcpi_rcv_wscale = tp->rcv_scale; } if (tp->t_flags & TF_ECN_PERMIT) ti->tcpi_options |= TCPI_OPT_ECN; ti->tcpi_rto = tp->t_rxtcur * tick; ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick; ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT; ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT; ti->tcpi_snd_ssthresh = tp->snd_ssthresh; ti->tcpi_snd_cwnd = tp->snd_cwnd; /* * FreeBSD-specific extension fields for tcp_info. */ ti->tcpi_rcv_space = tp->rcv_wnd; ti->tcpi_rcv_nxt = tp->rcv_nxt; ti->tcpi_snd_wnd = tp->snd_wnd; ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */ ti->tcpi_snd_nxt = tp->snd_nxt; ti->tcpi_snd_mss = tp->t_maxseg; ti->tcpi_rcv_mss = tp->t_maxseg; if (tp->t_flags & TF_TOE) ti->tcpi_options |= TCPI_OPT_TOE; ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack; ti->tcpi_rcv_ooopack = tp->t_rcvoopack; ti->tcpi_snd_zerowin = tp->t_sndzerowin; } /* * tcp_ctloutput() must drop the inpcb lock before performing copyin on * socket option arguments. When it re-acquires the lock after the copy, it * has to revalidate that the connection is still valid for the socket * option. */ #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do { \ INP_WLOCK(inp); \ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \ INP_WUNLOCK(inp); \ cleanup; \ return (ECONNRESET); \ } \ tp = intotcpcb(inp); \ } while(0) #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */) int tcp_ctloutput(struct socket *so, struct sockopt *sopt) { int error; struct inpcb *inp; struct tcpcb *tp; struct tcp_function_block *blk; struct tcp_function_set fsn; error = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); INP_WLOCK(inp); if (sopt->sopt_level != IPPROTO_TCP) { #ifdef INET6 if (inp->inp_vflag & INP_IPV6PROTO) { INP_WUNLOCK(inp); error = ip6_ctloutput(so, sopt); } #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET { INP_WUNLOCK(inp); error = ip_ctloutput(so, sopt); } #endif return (error); } if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { INP_WUNLOCK(inp); return (ECONNRESET); } tp = intotcpcb(inp); /* * Protect the TCP option TCP_FUNCTION_BLK so * that a sub-function can *never* overwrite this. */ if ((sopt->sopt_dir == SOPT_SET) && (sopt->sopt_name == TCP_FUNCTION_BLK)) { INP_WUNLOCK(inp); error = sooptcopyin(sopt, &fsn, sizeof fsn, sizeof fsn); if (error) return (error); INP_WLOCK_RECHECK(inp); blk = find_and_ref_tcp_functions(&fsn); if (blk == NULL) { INP_WUNLOCK(inp); return (ENOENT); } if (tp->t_fb == blk) { /* You already have this */ refcount_release(&blk->tfb_refcnt); INP_WUNLOCK(inp); return (0); } if (tp->t_state != TCPS_CLOSED) { int error=EINVAL; /* * The user has advanced the state * past the initial point, we may not * be able to switch. */ if (blk->tfb_tcp_handoff_ok != NULL) { /* * Does the stack provide a * query mechanism, if so it may * still be possible? */ error = (*blk->tfb_tcp_handoff_ok)(tp); } if (error) { refcount_release(&blk->tfb_refcnt); INP_WUNLOCK(inp); return(error); } } if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) { refcount_release(&blk->tfb_refcnt); INP_WUNLOCK(inp); return (ENOENT); } /* * Release the old refcnt, the * lookup acquired a ref on the * new one already. */ if (tp->t_fb->tfb_tcp_fb_fini) { /* * Tell the stack to cleanup with 0 i.e. * the tcb is not going away. */ (*tp->t_fb->tfb_tcp_fb_fini)(tp, 0); } refcount_release(&tp->t_fb->tfb_refcnt); tp->t_fb = blk; if (tp->t_fb->tfb_tcp_fb_init) { (*tp->t_fb->tfb_tcp_fb_init)(tp); } #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) { tcp_offload_ctloutput(tp, sopt->sopt_dir, sopt->sopt_name); } #endif INP_WUNLOCK(inp); return (error); } else if ((sopt->sopt_dir == SOPT_GET) && (sopt->sopt_name == TCP_FUNCTION_BLK)) { strcpy(fsn.function_set_name, tp->t_fb->tfb_tcp_block_name); fsn.pcbcnt = tp->t_fb->tfb_refcnt; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &fsn, sizeof fsn); return (error); } /* Pass in the INP locked, called must unlock it */ return (tp->t_fb->tfb_tcp_ctloutput(so, sopt, inp, tp)); } int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp) { int error, opt, optval; u_int ui; struct tcp_info ti; struct cc_algo *algo; char *pbuf, buf[TCP_CA_NAME_MAX]; size_t len; /* * For TCP_CCALGOOPT forward the control to CC module, for both * SOPT_SET and SOPT_GET. */ switch (sopt->sopt_name) { case TCP_CCALGOOPT: INP_WUNLOCK(inp); pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO); error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize, sopt->sopt_valsize); if (error) { free(pbuf, M_TEMP); return (error); } INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP)); if (CC_ALGO(tp)->ctl_output != NULL) error = CC_ALGO(tp)->ctl_output(tp->ccv, sopt, pbuf); else error = ENOENT; INP_WUNLOCK(inp); if (error == 0 && sopt->sopt_dir == SOPT_GET) error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize); free(pbuf, M_TEMP); return (error); } switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) case TCP_MD5SIG: - INP_WUNLOCK(inp); - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); + if (!TCPMD5_ENABLED()) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + error = TCPMD5_PCBCTL(inp, sopt); if (error) return (error); - - INP_WLOCK_RECHECK(inp); - if (optval > 0) - tp->t_flags |= TF_SIGNATURE; - else - tp->t_flags &= ~TF_SIGNATURE; goto unlock_and_done; -#endif /* TCP_SIGNATURE */ +#endif /* IPSEC */ case TCP_NODELAY: case TCP_NOOPT: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); INP_WLOCK_RECHECK(inp); switch (sopt->sopt_name) { case TCP_NODELAY: opt = TF_NODELAY; break; case TCP_NOOPT: opt = TF_NOOPT; break; default: opt = 0; /* dead code to fool gcc */ break; } if (optval) tp->t_flags |= opt; else tp->t_flags &= ~opt; unlock_and_done: #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) { tcp_offload_ctloutput(tp, sopt->sopt_dir, sopt->sopt_name); } #endif INP_WUNLOCK(inp); break; case TCP_NOPUSH: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); INP_WLOCK_RECHECK(inp); if (optval) tp->t_flags |= TF_NOPUSH; else if (tp->t_flags & TF_NOPUSH) { tp->t_flags &= ~TF_NOPUSH; if (TCPS_HAVEESTABLISHED(tp->t_state)) error = tp->t_fb->tfb_tcp_output(tp); } goto unlock_and_done; case TCP_MAXSEG: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); INP_WLOCK_RECHECK(inp); if (optval > 0 && optval <= tp->t_maxseg && optval + 40 >= V_tcp_minmss) tp->t_maxseg = optval; else error = EINVAL; goto unlock_and_done; case TCP_INFO: INP_WUNLOCK(inp); error = EINVAL; break; case TCP_CONGESTION: INP_WUNLOCK(inp); error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1); if (error) break; buf[sopt->sopt_valsize] = '\0'; INP_WLOCK_RECHECK(inp); CC_LIST_RLOCK(); STAILQ_FOREACH(algo, &cc_list, entries) if (strncmp(buf, algo->name, TCP_CA_NAME_MAX) == 0) break; CC_LIST_RUNLOCK(); if (algo == NULL) { INP_WUNLOCK(inp); error = EINVAL; break; } /* * We hold a write lock over the tcb so it's safe to * do these things without ordering concerns. */ if (CC_ALGO(tp)->cb_destroy != NULL) CC_ALGO(tp)->cb_destroy(tp->ccv); CC_ALGO(tp) = algo; /* * If something goes pear shaped initialising the new * algo, fall back to newreno (which does not * require initialisation). */ if (algo->cb_init != NULL && algo->cb_init(tp->ccv) != 0) { CC_ALGO(tp) = &newreno_cc_algo; /* * The only reason init should fail is * because of malloc. */ error = ENOMEM; } INP_WUNLOCK(inp); break; case TCP_KEEPIDLE: case TCP_KEEPINTVL: case TCP_KEEPINIT: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); if (error) return (error); if (ui > (UINT_MAX / hz)) { error = EINVAL; break; } ui *= hz; INP_WLOCK_RECHECK(inp); switch (sopt->sopt_name) { case TCP_KEEPIDLE: tp->t_keepidle = ui; /* * XXX: better check current remaining * timeout and "merge" it with new value. */ if ((tp->t_state > TCPS_LISTEN) && (tp->t_state <= TCPS_CLOSING)) tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); break; case TCP_KEEPINTVL: tp->t_keepintvl = ui; if ((tp->t_state == TCPS_FIN_WAIT_2) && (TP_MAXIDLE(tp) > 0)) tcp_timer_activate(tp, TT_2MSL, TP_MAXIDLE(tp)); break; case TCP_KEEPINIT: tp->t_keepinit = ui; if (tp->t_state == TCPS_SYN_RECEIVED || tp->t_state == TCPS_SYN_SENT) tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); break; } goto unlock_and_done; case TCP_KEEPCNT: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); if (error) return (error); INP_WLOCK_RECHECK(inp); tp->t_keepcnt = ui; if ((tp->t_state == TCPS_FIN_WAIT_2) && (TP_MAXIDLE(tp) > 0)) tcp_timer_activate(tp, TT_2MSL, TP_MAXIDLE(tp)); goto unlock_and_done; #ifdef TCPPCAP case TCP_PCAP_OUT: case TCP_PCAP_IN: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); INP_WLOCK_RECHECK(inp); if (optval >= 0) tcp_pcap_set_sock_max(TCP_PCAP_OUT ? &(tp->t_outpkts) : &(tp->t_inpkts), optval); else error = EINVAL; goto unlock_and_done; #endif #ifdef TCP_RFC7413 case TCP_FASTOPEN: INP_WUNLOCK(inp); if (!V_tcp_fastopen_enabled) return (EPERM); error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) return (error); INP_WLOCK_RECHECK(inp); if (optval) { tp->t_flags |= TF_FASTOPEN; if ((tp->t_state == TCPS_LISTEN) && (tp->t_tfo_pending == NULL)) tp->t_tfo_pending = tcp_fastopen_alloc_counter(); } else tp->t_flags &= ~TF_FASTOPEN; goto unlock_and_done; #endif default: INP_WUNLOCK(inp); error = ENOPROTOOPT; break; } break; case SOPT_GET: tp = intotcpcb(inp); switch (sopt->sopt_name) { -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) case TCP_MD5SIG: - optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; - INP_WUNLOCK(inp); - error = sooptcopyout(sopt, &optval, sizeof optval); + if (!TCPMD5_ENABLED()) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + error = TCPMD5_PCBCTL(inp, sopt); break; #endif case TCP_NODELAY: optval = tp->t_flags & TF_NODELAY; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_MAXSEG: optval = tp->t_maxseg; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_NOOPT: optval = tp->t_flags & TF_NOOPT; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_NOPUSH: optval = tp->t_flags & TF_NOPUSH; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_INFO: tcp_fill_info(tp, &ti); INP_WUNLOCK(inp); error = sooptcopyout(sopt, &ti, sizeof ti); break; case TCP_CONGESTION: len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX); INP_WUNLOCK(inp); error = sooptcopyout(sopt, buf, len + 1); break; case TCP_KEEPIDLE: case TCP_KEEPINTVL: case TCP_KEEPINIT: case TCP_KEEPCNT: switch (sopt->sopt_name) { case TCP_KEEPIDLE: ui = TP_KEEPIDLE(tp) / hz; break; case TCP_KEEPINTVL: ui = TP_KEEPINTVL(tp) / hz; break; case TCP_KEEPINIT: ui = TP_KEEPINIT(tp) / hz; break; case TCP_KEEPCNT: ui = TP_KEEPCNT(tp); break; } INP_WUNLOCK(inp); error = sooptcopyout(sopt, &ui, sizeof(ui)); break; #ifdef TCPPCAP case TCP_PCAP_OUT: case TCP_PCAP_IN: optval = tcp_pcap_get_sock_max(TCP_PCAP_OUT ? &(tp->t_outpkts) : &(tp->t_inpkts)); INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; #endif #ifdef TCP_RFC7413 case TCP_FASTOPEN: optval = tp->t_flags & TF_FASTOPEN; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; #endif default: INP_WUNLOCK(inp); error = ENOPROTOOPT; break; } break; } return (error); } #undef INP_WLOCK_RECHECK #undef INP_WLOCK_RECHECK_CLEANUP /* * Attach TCP protocol to socket, allocating * internet protocol control block, tcp control block, * bufer space, and entering LISTEN state if to accept connections. */ static int tcp_attach(struct socket *so) { struct tcpcb *tp; struct inpcb *inp; int error; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace); if (error) return (error); } so->so_rcv.sb_flags |= SB_AUTOSIZE; so->so_snd.sb_flags |= SB_AUTOSIZE; INP_INFO_RLOCK(&V_tcbinfo); error = in_pcballoc(so, &V_tcbinfo); if (error) { INP_INFO_RUNLOCK(&V_tcbinfo); return (error); } inp = sotoinpcb(so); #ifdef INET6 if (inp->inp_vflag & INP_IPV6PROTO) { inp->inp_vflag |= INP_IPV6; inp->in6p_hops = -1; /* use kernel default */ } else #endif inp->inp_vflag |= INP_IPV4; tp = tcp_newtcpcb(inp); if (tp == NULL) { in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_RUNLOCK(&V_tcbinfo); return (ENOBUFS); } tp->t_state = TCPS_CLOSED; INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); TCPSTATES_INC(TCPS_CLOSED); return (0); } /* * Initiate (or continue) disconnect. * If embryonic state, just send reset (once). * If in ``let data drain'' option and linger null, just drop. * Otherwise (hard), mark socket disconnecting and drop * current input data; switch states based on user close, and * send segment to peer (with FIN). */ static void tcp_disconnect(struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); /* * Neither tcp_close() nor tcp_drop() should return NULL, as the * socket is still open. */ if (tp->t_state < TCPS_ESTABLISHED) { tp = tcp_close(tp); KASSERT(tp != NULL, ("tcp_disconnect: tcp_close() returned NULL")); } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { tp = tcp_drop(tp, 0); KASSERT(tp != NULL, ("tcp_disconnect: tcp_drop() returned NULL")); } else { soisdisconnecting(so); sbflush(&so->so_rcv); tcp_usrclosed(tp); if (!(inp->inp_flags & INP_DROPPED)) tp->t_fb->tfb_tcp_output(tp); } } /* * User issued close, and wish to trail through shutdown states: * if never received SYN, just forget it. If got a SYN from peer, * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. * If already got a FIN from peer, then almost done; go to LAST_ACK * state. In all other cases, have already sent FIN to peer (e.g. * after PRU_SHUTDOWN), and just have to play tedious game waiting * for peer to send FIN or not respond to keep-alives, etc. * We can let the user exit from the close as soon as the FIN is acked. */ static void tcp_usrclosed(struct tcpcb *tp) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); switch (tp->t_state) { case TCPS_LISTEN: #ifdef TCP_OFFLOAD tcp_offload_listen_stop(tp); #endif tcp_state_change(tp, TCPS_CLOSED); /* FALLTHROUGH */ case TCPS_CLOSED: tp = tcp_close(tp); /* * tcp_close() should never return NULL here as the socket is * still open. */ KASSERT(tp != NULL, ("tcp_usrclosed: tcp_close() returned NULL")); break; case TCPS_SYN_SENT: case TCPS_SYN_RECEIVED: tp->t_flags |= TF_NEEDFIN; break; case TCPS_ESTABLISHED: tcp_state_change(tp, TCPS_FIN_WAIT_1); break; case TCPS_CLOSE_WAIT: tcp_state_change(tp, TCPS_LAST_ACK); break; } if (tp->t_state >= TCPS_FIN_WAIT_2) { soisdisconnected(tp->t_inpcb->inp_socket); /* Prevent the connection hanging in FIN_WAIT_2 forever. */ if (tp->t_state == TCPS_FIN_WAIT_2) { int timeout; timeout = (tcp_fast_finwait2_recycle) ? tcp_finwait2_timeout : TP_MAXIDLE(tp); tcp_timer_activate(tp, TT_2MSL, timeout); } } } #ifdef DDB static void db_print_indent(int indent) { int i; for (i = 0; i < indent; i++) db_printf(" "); } static void db_print_tstate(int t_state) { switch (t_state) { case TCPS_CLOSED: db_printf("TCPS_CLOSED"); return; case TCPS_LISTEN: db_printf("TCPS_LISTEN"); return; case TCPS_SYN_SENT: db_printf("TCPS_SYN_SENT"); return; case TCPS_SYN_RECEIVED: db_printf("TCPS_SYN_RECEIVED"); return; case TCPS_ESTABLISHED: db_printf("TCPS_ESTABLISHED"); return; case TCPS_CLOSE_WAIT: db_printf("TCPS_CLOSE_WAIT"); return; case TCPS_FIN_WAIT_1: db_printf("TCPS_FIN_WAIT_1"); return; case TCPS_CLOSING: db_printf("TCPS_CLOSING"); return; case TCPS_LAST_ACK: db_printf("TCPS_LAST_ACK"); return; case TCPS_FIN_WAIT_2: db_printf("TCPS_FIN_WAIT_2"); return; case TCPS_TIME_WAIT: db_printf("TCPS_TIME_WAIT"); return; default: db_printf("unknown"); return; } } static void db_print_tflags(u_int t_flags) { int comma; comma = 0; if (t_flags & TF_ACKNOW) { db_printf("%sTF_ACKNOW", comma ? ", " : ""); comma = 1; } if (t_flags & TF_DELACK) { db_printf("%sTF_DELACK", comma ? ", " : ""); comma = 1; } if (t_flags & TF_NODELAY) { db_printf("%sTF_NODELAY", comma ? ", " : ""); comma = 1; } if (t_flags & TF_NOOPT) { db_printf("%sTF_NOOPT", comma ? ", " : ""); comma = 1; } if (t_flags & TF_SENTFIN) { db_printf("%sTF_SENTFIN", comma ? ", " : ""); comma = 1; } if (t_flags & TF_REQ_SCALE) { db_printf("%sTF_REQ_SCALE", comma ? ", " : ""); comma = 1; } if (t_flags & TF_RCVD_SCALE) { db_printf("%sTF_RECVD_SCALE", comma ? ", " : ""); comma = 1; } if (t_flags & TF_REQ_TSTMP) { db_printf("%sTF_REQ_TSTMP", comma ? ", " : ""); comma = 1; } if (t_flags & TF_RCVD_TSTMP) { db_printf("%sTF_RCVD_TSTMP", comma ? ", " : ""); comma = 1; } if (t_flags & TF_SACK_PERMIT) { db_printf("%sTF_SACK_PERMIT", comma ? ", " : ""); comma = 1; } if (t_flags & TF_NEEDSYN) { db_printf("%sTF_NEEDSYN", comma ? ", " : ""); comma = 1; } if (t_flags & TF_NEEDFIN) { db_printf("%sTF_NEEDFIN", comma ? ", " : ""); comma = 1; } if (t_flags & TF_NOPUSH) { db_printf("%sTF_NOPUSH", comma ? ", " : ""); comma = 1; } if (t_flags & TF_MORETOCOME) { db_printf("%sTF_MORETOCOME", comma ? ", " : ""); comma = 1; } if (t_flags & TF_LQ_OVERFLOW) { db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : ""); comma = 1; } if (t_flags & TF_LASTIDLE) { db_printf("%sTF_LASTIDLE", comma ? ", " : ""); comma = 1; } if (t_flags & TF_RXWIN0SENT) { db_printf("%sTF_RXWIN0SENT", comma ? ", " : ""); comma = 1; } if (t_flags & TF_FASTRECOVERY) { db_printf("%sTF_FASTRECOVERY", comma ? ", " : ""); comma = 1; } if (t_flags & TF_CONGRECOVERY) { db_printf("%sTF_CONGRECOVERY", comma ? ", " : ""); comma = 1; } if (t_flags & TF_WASFRECOVERY) { db_printf("%sTF_WASFRECOVERY", comma ? ", " : ""); comma = 1; } if (t_flags & TF_SIGNATURE) { db_printf("%sTF_SIGNATURE", comma ? ", " : ""); comma = 1; } if (t_flags & TF_FORCEDATA) { db_printf("%sTF_FORCEDATA", comma ? ", " : ""); comma = 1; } if (t_flags & TF_TSO) { db_printf("%sTF_TSO", comma ? ", " : ""); comma = 1; } if (t_flags & TF_ECN_PERMIT) { db_printf("%sTF_ECN_PERMIT", comma ? ", " : ""); comma = 1; } if (t_flags & TF_FASTOPEN) { db_printf("%sTF_FASTOPEN", comma ? ", " : ""); comma = 1; } } static void db_print_toobflags(char t_oobflags) { int comma; comma = 0; if (t_oobflags & TCPOOB_HAVEDATA) { db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : ""); comma = 1; } if (t_oobflags & TCPOOB_HADDATA) { db_printf("%sTCPOOB_HADDATA", comma ? ", " : ""); comma = 1; } } static void db_print_tcpcb(struct tcpcb *tp, const char *name, int indent) { db_print_indent(indent); db_printf("%s at %p\n", name, tp); indent += 2; db_print_indent(indent); db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n", LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks); db_print_indent(indent); db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n", &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep); db_print_indent(indent); db_printf("tt_2msl: %p tt_delack: %p t_inpcb: %p\n", &tp->t_timers->tt_2msl, &tp->t_timers->tt_delack, tp->t_inpcb); db_print_indent(indent); db_printf("t_state: %d (", tp->t_state); db_print_tstate(tp->t_state); db_printf(")\n"); db_print_indent(indent); db_printf("t_flags: 0x%x (", tp->t_flags); db_print_tflags(tp->t_flags); db_printf(")\n"); db_print_indent(indent); db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: x0%08x\n", tp->snd_una, tp->snd_max, tp->snd_nxt); db_print_indent(indent); db_printf("snd_up: 0x%08x snd_wl1: 0x%08x snd_wl2: 0x%08x\n", tp->snd_up, tp->snd_wl1, tp->snd_wl2); db_print_indent(indent); db_printf("iss: 0x%08x irs: 0x%08x rcv_nxt: 0x%08x\n", tp->iss, tp->irs, tp->rcv_nxt); db_print_indent(indent); db_printf("rcv_adv: 0x%08x rcv_wnd: %u rcv_up: 0x%08x\n", tp->rcv_adv, tp->rcv_wnd, tp->rcv_up); db_print_indent(indent); db_printf("snd_wnd: %u snd_cwnd: %u\n", tp->snd_wnd, tp->snd_cwnd); db_print_indent(indent); db_printf("snd_ssthresh: %u snd_recover: " "0x%08x\n", tp->snd_ssthresh, tp->snd_recover); db_print_indent(indent); db_printf("t_rcvtime: %u t_startime: %u\n", tp->t_rcvtime, tp->t_starttime); db_print_indent(indent); db_printf("t_rttime: %u t_rtsq: 0x%08x\n", tp->t_rtttime, tp->t_rtseq); db_print_indent(indent); db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %d\n", tp->t_rxtcur, tp->t_maxseg, tp->t_srtt); db_print_indent(indent); db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u " "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin, tp->t_rttbest); db_print_indent(indent); db_printf("t_rttupdated: %lu max_sndwnd: %u t_softerror: %d\n", tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror); db_print_indent(indent); db_printf("t_oobflags: 0x%x (", tp->t_oobflags); db_print_toobflags(tp->t_oobflags); db_printf(") t_iobc: 0x%02x\n", tp->t_iobc); db_print_indent(indent); db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n", tp->snd_scale, tp->rcv_scale, tp->request_r_scale); db_print_indent(indent); db_printf("ts_recent: %u ts_recent_age: %u\n", tp->ts_recent, tp->ts_recent_age); db_print_indent(indent); db_printf("ts_offset: %u last_ack_sent: 0x%08x snd_cwnd_prev: " "%u\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev); db_print_indent(indent); db_printf("snd_ssthresh_prev: %u snd_recover_prev: 0x%08x " "t_badrxtwin: %u\n", tp->snd_ssthresh_prev, tp->snd_recover_prev, tp->t_badrxtwin); db_print_indent(indent); db_printf("snd_numholes: %d snd_holes first: %p\n", tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes)); db_print_indent(indent); db_printf("snd_fack: 0x%08x rcv_numsacks: %d sack_newdata: " "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata); /* Skip sackblks, sackhint. */ db_print_indent(indent); db_printf("t_rttlow: %d rfbuf_ts: %u rfbuf_cnt: %d\n", tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt); } DB_SHOW_COMMAND(tcpcb, db_show_tcpcb) { struct tcpcb *tp; if (!have_addr) { db_printf("usage: show tcpcb \n"); return; } tp = (struct tcpcb *)addr; db_print_tcpcb(tp, "tcpcb", 0); } #endif diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 38e8a15e7e19..0ed96ccdf279 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1,905 +1,868 @@ /*- * Copyright (c) 1982, 1986, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 * $FreeBSD$ */ #ifndef _NETINET_TCP_VAR_H_ #define _NETINET_TCP_VAR_H_ #include #include #ifdef _KERNEL #include #include /* * Kernel variables for tcp. */ VNET_DECLARE(int, tcp_do_rfc1323); #define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323) #endif /* _KERNEL */ /* TCP segment queue entry */ struct tseg_qent { LIST_ENTRY(tseg_qent) tqe_q; int tqe_len; /* TCP segment data length */ struct tcphdr *tqe_th; /* a pointer to tcp header */ struct mbuf *tqe_m; /* mbuf contains packet */ }; LIST_HEAD(tsegqe_head, tseg_qent); struct sackblk { tcp_seq start; /* start seq no. of sack block */ tcp_seq end; /* end seq no. */ }; struct sackhole { tcp_seq start; /* start seq no. of hole */ tcp_seq end; /* end seq no. */ tcp_seq rxmit; /* next seq. no in hole to be retransmitted */ TAILQ_ENTRY(sackhole) scblink; /* scoreboard linkage */ }; struct sackhint { struct sackhole *nexthole; int sack_bytes_rexmit; tcp_seq last_sack_ack; /* Most recent/largest sacked ack */ int ispare; /* explicit pad for 64bit alignment */ int sacked_bytes; /* * Total sacked bytes reported by the * receiver via sack option */ uint32_t _pad1[1]; /* TBD */ uint64_t _pad[1]; /* TBD */ }; struct tcptemp { u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ struct tcphdr tt_t; }; #define tcp6cb tcpcb /* for KAME src sync over BSD*'s */ /* * TODO: We yet need to brave plowing in * to tcp_input() and the pru_usrreq() block. * Right now these go to the old standards which * are somewhat ok, but in the long term may * need to be changed. If we do tackle tcp_input() * then we need to get rid of the tcp_do_segment() * function below. */ /* Flags for tcp functions */ #define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */ struct tcpcb; struct inpcb; struct sockopt; struct socket; /* * If defining the optional tcp_timers, in the * tfb_tcp_timer_stop call you must use the * callout_async_drain() function with the * tcp_timer_discard callback. You should check * the return of callout_async_drain() and if 0 * increment tt_draincnt. Since the timer sub-system * does not know your callbacks you must provide a * stop_all function that loops through and calls * tcp_timer_stop() with each of your defined timers. * Adding a tfb_tcp_handoff_ok function allows the socket * option to change stacks to query you even if the * connection is in a later stage. You return 0 to * say you can take over and run your stack, you return * non-zero (an error number) to say no you can't. * If the function is undefined you can only change * in the early states (before connect or listen). * tfb_tcp_fb_fini is changed to add a flag to tell * the old stack if the tcb is being destroyed or * not. A one in the flag means the TCB is being * destroyed, a zero indicates its transitioning to * another stack (via socket option). */ struct tcp_function_block { char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX]; int (*tfb_tcp_output)(struct tcpcb *); void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t, int); int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp); /* Optional memory allocation/free routine */ void (*tfb_tcp_fb_init)(struct tcpcb *); void (*tfb_tcp_fb_fini)(struct tcpcb *, int); /* Optional timers, must define all if you define one */ int (*tfb_tcp_timer_stop_all)(struct tcpcb *); void (*tfb_tcp_timer_activate)(struct tcpcb *, uint32_t, u_int); int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t); void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t); void (*tfb_tcp_rexmit_tmr)(struct tcpcb *); int (*tfb_tcp_handoff_ok)(struct tcpcb *); volatile uint32_t tfb_refcnt; uint32_t tfb_flags; }; struct tcp_function { TAILQ_ENTRY(tcp_function) tf_next; struct tcp_function_block *tf_fb; }; TAILQ_HEAD(tcp_funchead, tcp_function); /* * Tcp control block, one per tcp; fields: * Organized for 16 byte cacheline efficiency. */ struct tcpcb { struct tsegqe_head t_segq; /* segment reassembly queue */ void *t_pspare[2]; /* new reassembly queue */ int t_segqlen; /* segment reassembly queue length */ int t_dupacks; /* consecutive dup acks recd */ struct tcp_timer *t_timers; /* All the TCP timers in one struct */ struct inpcb *t_inpcb; /* back pointer to internet pcb */ int t_state; /* state of this connection */ u_int t_flags; struct vnet *t_vnet; /* back pointer to parent vnet */ tcp_seq snd_una; /* sent but unacknowledged */ tcp_seq snd_max; /* highest sequence number sent; * used to recognize retransmits */ tcp_seq snd_nxt; /* send next */ tcp_seq snd_up; /* send urgent pointer */ tcp_seq snd_wl1; /* window update seg seq number */ tcp_seq snd_wl2; /* window update seg ack number */ tcp_seq iss; /* initial send sequence number */ tcp_seq irs; /* initial receive sequence number */ tcp_seq rcv_nxt; /* receive next */ tcp_seq rcv_adv; /* advertised window */ uint32_t rcv_wnd; /* receive window */ tcp_seq rcv_up; /* receive urgent pointer */ uint32_t snd_wnd; /* send window */ uint32_t snd_cwnd; /* congestion-controlled window */ u_long snd_spare1; /* unused */ uint32_t snd_ssthresh; /* snd_cwnd size threshold for * for slow start exponential to * linear switch */ u_long snd_spare2; /* unused */ tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ u_int t_rcvtime; /* inactivity time */ u_int t_starttime; /* time connection was established */ u_int t_rtttime; /* RTT measurement start time */ tcp_seq t_rtseq; /* sequence number being timed */ u_int t_bw_spare1; /* unused */ tcp_seq t_bw_spare2; /* unused */ int t_rxtcur; /* current retransmit value (ticks) */ u_int t_maxseg; /* maximum segment size */ u_int t_pmtud_saved_maxseg; /* pre-blackhole MSS */ int t_srtt; /* smoothed round-trip time */ int t_rttvar; /* variance in round-trip time */ int t_rxtshift; /* log(2) of rexmt exp. backoff */ u_int t_rttmin; /* minimum rtt allowed */ u_int t_rttbest; /* best rtt we've seen */ u_long t_rttupdated; /* number of times rtt sampled */ uint32_t max_sndwnd; /* largest window peer has offered */ int t_softerror; /* possible error not yet reported */ /* out-of-band data */ char t_oobflags; /* have some */ char t_iobc; /* input character */ /* RFC 1323 variables */ u_char snd_scale; /* window scaling for send window */ u_char rcv_scale; /* window scaling for recv window */ u_char request_r_scale; /* pending window scaling */ u_int32_t ts_recent; /* timestamp echo data */ u_int ts_recent_age; /* when last updated */ u_int32_t ts_offset; /* our timestamp offset */ tcp_seq last_ack_sent; /* experimental */ uint32_t snd_cwnd_prev; /* cwnd prior to retransmit */ uint32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ int t_sndzerowin; /* zero-window updates sent */ u_int t_badrxtwin; /* window for retransmit recovery */ u_char snd_limited; /* segments limited transmitted */ /* SACK related state */ int snd_numholes; /* number of holes seen by sender */ TAILQ_HEAD(sackhole_head, sackhole) snd_holes; /* SACK scoreboard (sorted) */ tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/ int rcv_numsacks; /* # distinct sack blks present */ struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */ tcp_seq sack_newdata; /* New data xmitted in this recovery episode starts at this seq number */ struct sackhint sackhint; /* SACK scoreboard hint */ int t_rttlow; /* smallest observerved RTT */ u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ int rfbuf_cnt; /* recv buffer autoscaling byte count */ struct toedev *tod; /* toedev handling this connection */ int t_sndrexmitpack; /* retransmit packets sent */ int t_rcvoopack; /* out-of-order packets received */ void *t_toe; /* TOE pcb pointer */ int t_bytes_acked; /* # bytes acked during current RTT */ struct cc_algo *cc_algo; /* congestion control algorithm */ struct cc_var *ccv; /* congestion control specific vars */ struct osd *osd; /* storage for Khelp module data */ u_int t_keepinit; /* time to establish connection */ u_int t_keepidle; /* time before keepalive probes begin */ u_int t_keepintvl; /* interval between keepalives */ u_int t_keepcnt; /* number of keepalives before close */ u_int t_tsomax; /* TSO total burst length limit in bytes */ u_int t_tsomaxsegcount; /* TSO maximum segment count */ u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */ u_int t_flags2; /* More tcpcb flags storage */ #if defined(_KERNEL) && defined(TCP_RFC7413) uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */ uint64_t t_tfo_cookie; /* TCP Fast Open cookie */ #else uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */ #endif struct tcp_function_block *t_fb;/* TCP function call block */ void *t_fb_ptr; /* Pointer to t_fb specific data */ #if defined(_KERNEL) && defined(TCP_RFC7413) unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */ void *t_pspare2[1]; /* 1 TCP_SIGNATURE */ #else void *t_pspare2[2]; /* 1 TCP_SIGNATURE, 1 TBD */ #endif #if defined(_KERNEL) && defined(TCPPCAP) struct mbufq t_inpkts; /* List of saved input packets. */ struct mbufq t_outpkts; /* List of saved output packets. */ #ifdef _LP64 uint64_t _pad[0]; /* all used! */ #else uint64_t _pad[2]; /* 2 are available */ #endif /* _LP64 */ #else uint64_t _pad[6]; #endif /* defined(_KERNEL) && defined(TCPPCAP) */ }; /* * Flags and utility macros for the t_flags field. */ #define TF_ACKNOW 0x000001 /* ack peer immediately */ #define TF_DELACK 0x000002 /* ack, but try to delay it */ #define TF_NODELAY 0x000004 /* don't delay packets to coalesce */ #define TF_NOOPT 0x000008 /* don't use tcp options */ #define TF_SENTFIN 0x000010 /* have sent FIN */ #define TF_REQ_SCALE 0x000020 /* have/will request window scaling */ #define TF_RCVD_SCALE 0x000040 /* other side has requested scaling */ #define TF_REQ_TSTMP 0x000080 /* have/will request timestamps */ #define TF_RCVD_TSTMP 0x000100 /* a timestamp was received in SYN */ #define TF_SACK_PERMIT 0x000200 /* other side said I could SACK */ #define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */ #define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */ #define TF_NOPUSH 0x001000 /* don't push */ #define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */ #define TF_MORETOCOME 0x010000 /* More data to be appended to sock */ #define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */ #define TF_LASTIDLE 0x040000 /* connection was previously idle */ #define TF_RXWIN0SENT 0x080000 /* sent a receiver win 0 in response */ #define TF_FASTRECOVERY 0x100000 /* in NewReno Fast Recovery */ #define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */ #define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */ #define TF_FORCEDATA 0x800000 /* force out a byte */ #define TF_TSO 0x1000000 /* TSO enabled on this connection */ #define TF_TOE 0x2000000 /* this connection is offloaded */ #define TF_ECN_PERMIT 0x4000000 /* connection ECN-ready */ #define TF_ECN_SND_CWR 0x8000000 /* ECN CWR in queue */ #define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */ #define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */ #define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ #define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */ #define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY) #define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY #define EXIT_FASTRECOVERY(t_flags) t_flags &= ~TF_FASTRECOVERY #define IN_CONGRECOVERY(t_flags) (t_flags & TF_CONGRECOVERY) #define ENTER_CONGRECOVERY(t_flags) t_flags |= TF_CONGRECOVERY #define EXIT_CONGRECOVERY(t_flags) t_flags &= ~TF_CONGRECOVERY #define IN_RECOVERY(t_flags) (t_flags & (TF_CONGRECOVERY | TF_FASTRECOVERY)) #define ENTER_RECOVERY(t_flags) t_flags |= (TF_CONGRECOVERY | TF_FASTRECOVERY) #define EXIT_RECOVERY(t_flags) t_flags &= ~(TF_CONGRECOVERY | TF_FASTRECOVERY) #if defined(_KERNEL) && !defined(TCP_RFC7413) #define IS_FASTOPEN(t_flags) (false) #else #define IS_FASTOPEN(t_flags) (t_flags & TF_FASTOPEN) #endif #define BYTES_THIS_ACK(tp, th) (th->th_ack - tp->snd_una) /* * Flags for the t_oobflags field. */ #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 -#ifdef TCP_SIGNATURE -/* - * Defines which are needed by the xform_tcp module and tcp_[in|out]put - * for SADB verification and lookup. - */ -#define TCP_SIGLEN 16 /* length of computed digest in bytes */ -#define TCP_KEYLEN_MIN 1 /* minimum length of TCP-MD5 key */ -#define TCP_KEYLEN_MAX 80 /* maximum length of TCP-MD5 key */ -/* - * Only a single SA per host may be specified at this time. An SPI is - * needed in order for the KEY_ALLOCSA() lookup to work. - */ -#define TCP_SIG_SPI 0x1000 -#endif /* TCP_SIGNATURE */ - /* * Flags for PLPMTU handling, t_flags2 */ #define TF2_PLPMTU_BLACKHOLE 0x00000001 /* Possible PLPMTUD Black Hole. */ #define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */ #define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */ /* * Structure to hold TCP options that are only used during segment * processing (in tcp_input), but not held in the tcpcb. * It's basically used to reduce the number of parameters * to tcp_dooptions and tcp_addoptions. * The binary order of the to_flags is relevant for packing of the * options in tcp_addoptions. */ struct tcpopt { u_int32_t to_flags; /* which options are present */ #define TOF_MSS 0x0001 /* maximum segment size */ #define TOF_SCALE 0x0002 /* window scaling */ #define TOF_SACKPERM 0x0004 /* SACK permitted */ #define TOF_TS 0x0010 /* timestamp */ #define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */ #define TOF_SACK 0x0080 /* Peer sent SACK option */ #define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */ #define TOF_MAXOPT 0x0200 u_int32_t to_tsval; /* new timestamp */ u_int32_t to_tsecr; /* reflected timestamp */ u_char *to_sacks; /* pointer to the first SACK blocks */ u_char *to_signature; /* pointer to the TCP-MD5 signature */ u_char *to_tfo_cookie; /* pointer to the TFO cookie */ u_int16_t to_mss; /* maximum segment size */ u_int8_t to_wscale; /* window scaling */ u_int8_t to_nsacks; /* number of SACK blocks */ u_int8_t to_tfo_len; /* TFO cookie length */ u_int32_t to_spare; /* UTO */ }; /* * Flags for tcp_dooptions. */ #define TO_SYN 0x01 /* parse SYN-only options */ struct hc_metrics_lite { /* must stay in sync with hc_metrics */ uint32_t rmx_mtu; /* MTU for this path */ uint32_t rmx_ssthresh; /* outbound gateway buffer limit */ uint32_t rmx_rtt; /* estimated round trip time */ uint32_t rmx_rttvar; /* estimated rtt variance */ uint32_t rmx_cwnd; /* congestion window */ uint32_t rmx_sendpipe; /* outbound delay-bandwidth product */ uint32_t rmx_recvpipe; /* inbound delay-bandwidth product */ }; /* * Used by tcp_maxmtu() to communicate interface specific features * and limits at the time of connection setup. */ struct tcp_ifcap { int ifcap; u_int tsomax; u_int tsomaxsegcount; u_int tsomaxsegsize; }; #ifndef _NETINET_IN_PCB_H_ struct in_conninfo; #endif /* _NETINET_IN_PCB_H_ */ struct tcptw { struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */ tcp_seq snd_nxt; tcp_seq rcv_nxt; tcp_seq iss; tcp_seq irs; u_short last_win; /* cached window value */ short tw_so_options; /* copy of so_options */ struct ucred *tw_cred; /* user credentials */ u_int32_t t_recent; u_int32_t ts_offset; /* our timestamp offset */ u_int t_starttime; int tw_time; TAILQ_ENTRY(tcptw) tw_2msl; void *tw_pspare; /* TCP_SIGNATURE */ u_int *tw_spare; /* TCP_SIGNATURE */ }; #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) #define intotw(ip) ((struct tcptw *)(ip)->inp_ppcb) #define sototcpcb(so) (intotcpcb(sotoinpcb(so))) /* * The smoothed round-trip time and estimated variance * are stored as fixed point numbers scaled by the values below. * For convenience, these scales are also used in smoothing the average * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). * With these scales, srtt has 3 bits to the right of the binary point, * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the * binary point, and is smoothed with an ALPHA of 0.75. */ #define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */ #define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */ #define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */ #define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */ #define TCP_DELTA_SHIFT 2 /* see tcp_input.c */ /* * The initial retransmission should happen at rtt + 4 * rttvar. * Because of the way we do the smoothing, srtt and rttvar * will each average +1/2 tick of bias. When we compute * the retransmit timer, we want 1/2 tick of rounding and * 1 extra tick because of +-1/2 tick uncertainty in the * firing of the timer. The bias will give us exactly the * 1.5 tick we need. But, because the bias is * statistical, we have to test that we don't drop below * the minimum feasible timer (which is 2 ticks). * This version of the macro adapted from a paper by Lawrence * Brakmo and Larry Peterson which outlines a problem caused * by insufficient precision in the original implementation, * which results in inappropriately large RTO values for very * fast networks. */ #define TCP_REXMTVAL(tp) \ max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \ + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) /* * TCP statistics. * Many of these should be kept per connection, * but that's inconvenient at the moment. */ struct tcpstat { uint64_t tcps_connattempt; /* connections initiated */ uint64_t tcps_accepts; /* connections accepted */ uint64_t tcps_connects; /* connections established */ uint64_t tcps_drops; /* connections dropped */ uint64_t tcps_conndrops; /* embryonic connections dropped */ uint64_t tcps_minmssdrops; /* average minmss too low drops */ uint64_t tcps_closed; /* conn. closed (includes drops) */ uint64_t tcps_segstimed; /* segs where we tried to get rtt */ uint64_t tcps_rttupdated; /* times we succeeded */ uint64_t tcps_delack; /* delayed acks sent */ uint64_t tcps_timeoutdrop; /* conn. dropped in rxmt timeout */ uint64_t tcps_rexmttimeo; /* retransmit timeouts */ uint64_t tcps_persisttimeo; /* persist timeouts */ uint64_t tcps_keeptimeo; /* keepalive timeouts */ uint64_t tcps_keepprobe; /* keepalive probes sent */ uint64_t tcps_keepdrops; /* connections dropped in keepalive */ uint64_t tcps_sndtotal; /* total packets sent */ uint64_t tcps_sndpack; /* data packets sent */ uint64_t tcps_sndbyte; /* data bytes sent */ uint64_t tcps_sndrexmitpack; /* data packets retransmitted */ uint64_t tcps_sndrexmitbyte; /* data bytes retransmitted */ uint64_t tcps_sndrexmitbad; /* unnecessary packet retransmissions */ uint64_t tcps_sndacks; /* ack-only packets sent */ uint64_t tcps_sndprobe; /* window probes sent */ uint64_t tcps_sndurg; /* packets sent with URG only */ uint64_t tcps_sndwinup; /* window update-only packets sent */ uint64_t tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */ uint64_t tcps_rcvtotal; /* total packets received */ uint64_t tcps_rcvpack; /* packets received in sequence */ uint64_t tcps_rcvbyte; /* bytes received in sequence */ uint64_t tcps_rcvbadsum; /* packets received with ccksum errs */ uint64_t tcps_rcvbadoff; /* packets received with bad offset */ uint64_t tcps_rcvreassfull; /* packets dropped for no reass space */ uint64_t tcps_rcvshort; /* packets received too short */ uint64_t tcps_rcvduppack; /* duplicate-only packets received */ uint64_t tcps_rcvdupbyte; /* duplicate-only bytes received */ uint64_t tcps_rcvpartduppack; /* packets with some duplicate data */ uint64_t tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */ uint64_t tcps_rcvoopack; /* out-of-order packets received */ uint64_t tcps_rcvoobyte; /* out-of-order bytes received */ uint64_t tcps_rcvpackafterwin; /* packets with data after window */ uint64_t tcps_rcvbyteafterwin; /* bytes rcvd after window */ uint64_t tcps_rcvafterclose; /* packets rcvd after "close" */ uint64_t tcps_rcvwinprobe; /* rcvd window probe packets */ uint64_t tcps_rcvdupack; /* rcvd duplicate acks */ uint64_t tcps_rcvacktoomuch; /* rcvd acks for unsent data */ uint64_t tcps_rcvackpack; /* rcvd ack packets */ uint64_t tcps_rcvackbyte; /* bytes acked by rcvd acks */ uint64_t tcps_rcvwinupd; /* rcvd window update packets */ uint64_t tcps_pawsdrop; /* segments dropped due to PAWS */ uint64_t tcps_predack; /* times hdr predict ok for acks */ uint64_t tcps_preddat; /* times hdr predict ok for data pkts */ uint64_t tcps_pcbcachemiss; uint64_t tcps_cachedrtt; /* times cached RTT in route updated */ uint64_t tcps_cachedrttvar; /* times cached rttvar updated */ uint64_t tcps_cachedssthresh; /* times cached ssthresh updated */ uint64_t tcps_usedrtt; /* times RTT initialized from route */ uint64_t tcps_usedrttvar; /* times RTTVAR initialized from rt */ uint64_t tcps_usedssthresh; /* times ssthresh initialized from rt*/ uint64_t tcps_persistdrop; /* timeout in persist state */ uint64_t tcps_badsyn; /* bogus SYN, e.g. premature ACK */ uint64_t tcps_mturesent; /* resends due to MTU discovery */ uint64_t tcps_listendrop; /* listen queue overflows */ uint64_t tcps_badrst; /* ignored RSTs in the window */ uint64_t tcps_sc_added; /* entry added to syncache */ uint64_t tcps_sc_retransmitted; /* syncache entry was retransmitted */ uint64_t tcps_sc_dupsyn; /* duplicate SYN packet */ uint64_t tcps_sc_dropped; /* could not reply to packet */ uint64_t tcps_sc_completed; /* successful extraction of entry */ uint64_t tcps_sc_bucketoverflow;/* syncache per-bucket limit hit */ uint64_t tcps_sc_cacheoverflow; /* syncache cache limit hit */ uint64_t tcps_sc_reset; /* RST removed entry from syncache */ uint64_t tcps_sc_stale; /* timed out or listen socket gone */ uint64_t tcps_sc_aborted; /* syncache entry aborted */ uint64_t tcps_sc_badack; /* removed due to bad ACK */ uint64_t tcps_sc_unreach; /* ICMP unreachable received */ uint64_t tcps_sc_zonefail; /* zalloc() failed */ uint64_t tcps_sc_sendcookie; /* SYN cookie sent */ uint64_t tcps_sc_recvcookie; /* SYN cookie received */ uint64_t tcps_hc_added; /* entry added to hostcache */ uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */ uint64_t tcps_finwait2_drops; /* Drop FIN_WAIT_2 connection after time limit */ /* SACK related stats */ uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */ uint64_t tcps_sack_rexmits; /* SACK rexmit segments */ uint64_t tcps_sack_rexmit_bytes; /* SACK rexmit bytes */ uint64_t tcps_sack_rcv_blocks; /* SACK blocks (options) received */ uint64_t tcps_sack_send_blocks; /* SACK blocks (options) sent */ uint64_t tcps_sack_sboverflow; /* times scoreboard overflowed */ /* ECN related stats */ uint64_t tcps_ecn_ce; /* ECN Congestion Experienced */ uint64_t tcps_ecn_ect0; /* ECN Capable Transport */ uint64_t tcps_ecn_ect1; /* ECN Capable Transport */ uint64_t tcps_ecn_shs; /* ECN successful handshakes */ uint64_t tcps_ecn_rcwnd; /* # times ECN reduced the cwnd */ /* TCP_SIGNATURE related stats */ uint64_t tcps_sig_rcvgoodsig; /* Total matching signature received */ uint64_t tcps_sig_rcvbadsig; /* Total bad signature received */ - uint64_t tcps_sig_err_buildsig; /* Mismatching signature received */ + uint64_t tcps_sig_err_buildsig; /* Failed to make signature */ uint64_t tcps_sig_err_sigopt; /* No signature expected by socket */ uint64_t tcps_sig_err_nosigopt; /* No signature provided by segment */ uint64_t _pad[12]; /* 6 UTO, 6 TBD */ }; #define tcps_rcvmemdrop tcps_rcvreassfull /* compat */ #ifdef _KERNEL #define TI_UNLOCKED 1 #define TI_RLOCKED 2 #include VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */ /* * In-kernel consumers can use these accessor macros directly to update * stats. */ #define TCPSTAT_ADD(name, val) \ VNET_PCPUSTAT_ADD(struct tcpstat, tcpstat, name, (val)) #define TCPSTAT_INC(name) TCPSTAT_ADD(name, 1) /* * Kernel module consumers must use this accessor macro. */ void kmod_tcpstat_inc(int statnum); #define KMOD_TCPSTAT_INC(name) \ kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(uint64_t)) /* * Running TCP connection count by state. */ VNET_DECLARE(counter_u64_t, tcps_states[TCP_NSTATES]); #define V_tcps_states VNET(tcps_states) #define TCPSTATES_INC(state) counter_u64_add(V_tcps_states[state], 1) #define TCPSTATES_DEC(state) counter_u64_add(V_tcps_states[state], -1) /* * TCP specific helper hook point identifiers. */ #define HHOOK_TCP_EST_IN 0 #define HHOOK_TCP_EST_OUT 1 #define HHOOK_TCP_LAST HHOOK_TCP_EST_OUT struct tcp_hhook_data { struct tcpcb *tp; struct tcphdr *th; struct tcpopt *to; uint32_t len; int tso; tcp_seq curack; }; #endif /* * TCB structure exported to user-land via sysctl(3). * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been * included. Not all of our clients do. */ #if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_) struct xtcp_timer { int tt_rexmt; /* retransmit timer */ int tt_persist; /* retransmit persistence */ int tt_keep; /* keepalive */ int tt_2msl; /* 2*msl TIME_WAIT timer */ int tt_delack; /* delayed ACK timer */ int t_rcvtime; /* Time since last packet received */ }; struct xtcpcb { size_t xt_len; struct inpcb xt_inp; struct tcpcb xt_tp; struct xsocket xt_socket; struct xtcp_timer xt_timer; u_quad_t xt_alignment_hack; }; #endif /* * Identifiers for TCP sysctl nodes */ #define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */ #define TCPCTL_MSSDFLT 3 /* MSS default */ #define TCPCTL_STATS 4 /* statistics */ #define TCPCTL_RTTDFLT 5 /* default RTT estimate */ #define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */ #define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */ #define TCPCTL_SENDSPACE 8 /* send buffer space */ #define TCPCTL_RECVSPACE 9 /* receive buffer space */ #define TCPCTL_KEEPINIT 10 /* timeout for establishing syn */ #define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */ #define TCPCTL_DELACKTIME 12 /* time before sending delayed ACK */ #define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */ #define TCPCTL_SACK 14 /* Selective Acknowledgement,rfc 2018 */ #define TCPCTL_DROP 15 /* drop tcp connection */ #define TCPCTL_STATES 16 /* connection counts by TCP state */ #ifdef _KERNEL #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet_tcp); SYSCTL_DECL(_net_inet_tcp_sack); MALLOC_DECLARE(M_TCPLOG); #endif VNET_DECLARE(struct inpcbhead, tcb); /* queue of active tcpcb's */ VNET_DECLARE(struct inpcbinfo, tcbinfo); extern int tcp_log_in_vain; VNET_DECLARE(int, tcp_mssdflt); /* XXX */ VNET_DECLARE(int, tcp_minmss); VNET_DECLARE(int, tcp_delack_enabled); VNET_DECLARE(int, tcp_do_rfc3390); VNET_DECLARE(int, tcp_initcwnd_segments); VNET_DECLARE(int, tcp_sendspace); VNET_DECLARE(int, tcp_recvspace); VNET_DECLARE(int, path_mtu_discovery); VNET_DECLARE(int, tcp_do_rfc3465); VNET_DECLARE(int, tcp_abc_l_var); #define V_tcb VNET(tcb) #define V_tcbinfo VNET(tcbinfo) #define V_tcp_mssdflt VNET(tcp_mssdflt) #define V_tcp_minmss VNET(tcp_minmss) #define V_tcp_delack_enabled VNET(tcp_delack_enabled) #define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390) #define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments) #define V_tcp_sendspace VNET(tcp_sendspace) #define V_tcp_recvspace VNET(tcp_recvspace) #define V_path_mtu_discovery VNET(path_mtu_discovery) #define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465) #define V_tcp_abc_l_var VNET(tcp_abc_l_var) VNET_DECLARE(int, tcp_do_sack); /* SACK enabled/disabled */ VNET_DECLARE(int, tcp_sc_rst_sock_fail); /* RST on sock alloc failure */ #define V_tcp_do_sack VNET(tcp_do_sack) #define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail) VNET_DECLARE(int, tcp_do_ecn); /* TCP ECN enabled/disabled */ VNET_DECLARE(int, tcp_ecn_maxretries); #define V_tcp_do_ecn VNET(tcp_do_ecn) #define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries) #ifdef TCP_HHOOK VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]); #define V_tcp_hhh VNET(tcp_hhh) #endif VNET_DECLARE(int, tcp_do_rfc6675_pipe); #define V_tcp_do_rfc6675_pipe VNET(tcp_do_rfc6675_pipe) int tcp_addoptions(struct tcpopt *, u_char *); int tcp_ccalgounload(struct cc_algo *unload_algo); struct tcpcb * tcp_close(struct tcpcb *); void tcp_discardcb(struct tcpcb *); void tcp_twstart(struct tcpcb *); void tcp_twclose(struct tcptw *, int); void tcp_ctlinput(int, struct sockaddr *, void *); int tcp_ctloutput(struct socket *, struct sockopt *); struct tcpcb * tcp_drop(struct tcpcb *, int); void tcp_drain(void); void tcp_init(void); void tcp_fini(void *); char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *, const void *); char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *, const void *); int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *); void tcp_reass_global_init(void); void tcp_reass_flush(struct tcpcb *); void tcp_dooptions(struct tcpopt *, u_char *, int, int); void tcp_dropwithreset(struct mbuf *, struct tcphdr *, struct tcpcb *, int, int); void tcp_pulloutofband(struct socket *, struct tcphdr *, struct mbuf *, int); void tcp_xmit_timer(struct tcpcb *, int); void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *); void cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs, uint16_t type); void cc_conn_init(struct tcpcb *tp); void cc_post_recovery(struct tcpcb *tp, struct tcphdr *th); void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type); #ifdef TCP_HHOOK void hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to); #endif int tcp_input(struct mbuf **, int *, int); void tcp_do_segment(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t, int); int register_tcp_functions(struct tcp_function_block *blk, int wait); int deregister_tcp_functions(struct tcp_function_block *blk); struct tcp_function_block *find_and_ref_tcp_functions(struct tcp_function_set *fs); struct tcp_function_block *find_and_ref_tcp_fb(struct tcp_function_block *blk); int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp); uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *); uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *); u_int tcp_maxseg(const struct tcpcb *); void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *, struct tcp_ifcap *); void tcp_mss(struct tcpcb *, int); int tcp_mssopt(struct in_conninfo *); struct inpcb * tcp_drop_syn_sent(struct inpcb *, int); struct tcpcb * tcp_newtcpcb(struct inpcb *); int tcp_output(struct tcpcb *); void tcp_state_change(struct tcpcb *, int); void tcp_respond(struct tcpcb *, void *, struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int); void tcp_tw_init(void); #ifdef VIMAGE void tcp_tw_destroy(void); #endif void tcp_tw_zone_change(void); int tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *, struct mbuf *, int); void tcp_setpersist(struct tcpcb *); -#ifdef TCP_SIGNATURE -struct secasvar; -struct secasvar *tcp_get_sav(struct mbuf *, u_int); -int tcp_signature_do_compute(struct mbuf *, int, int, u_char *, - struct secasvar *); -int tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int); -int tcp_signature_verify(struct mbuf *, int, int, int, struct tcpopt *, - struct tcphdr *, u_int); -int tcp_signature_check(struct mbuf *m, int off0, int tlen, int optlen, - struct tcpopt *to, struct tcphdr *th, u_int tcpbflag); -#endif void tcp_slowtimo(void); struct tcptemp * tcpip_maketemplate(struct inpcb *); void tcpip_fillheaders(struct inpcb *, void *, void *); void tcp_timer_activate(struct tcpcb *, uint32_t, u_int); int tcp_timer_active(struct tcpcb *, uint32_t); void tcp_timer_stop(struct tcpcb *, uint32_t); void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int); /* * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo) */ void tcp_hc_init(void); #ifdef VIMAGE void tcp_hc_destroy(void); #endif void tcp_hc_get(struct in_conninfo *, struct hc_metrics_lite *); uint32_t tcp_hc_getmtu(struct in_conninfo *); void tcp_hc_updatemtu(struct in_conninfo *, uint32_t); void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *); extern struct pr_usrreqs tcp_usrreqs; tcp_seq tcp_new_isn(struct tcpcb *); int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq); void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); void tcp_clean_sackreport(struct tcpcb *tp); void tcp_sack_adjust(struct tcpcb *tp); struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); void tcp_free_sackholes(struct tcpcb *tp); int tcp_newreno(struct tcpcb *, struct tcphdr *); int tcp_compute_pipe(struct tcpcb *); static inline void tcp_fields_to_host(struct tcphdr *th) { th->th_seq = ntohl(th->th_seq); th->th_ack = ntohl(th->th_ack); th->th_win = ntohs(th->th_win); th->th_urp = ntohs(th->th_urp); } -#ifdef TCP_SIGNATURE -static inline void -tcp_fields_to_net(struct tcphdr *th) -{ - - th->th_seq = htonl(th->th_seq); - th->th_ack = htonl(th->th_ack); - th->th_win = htons(th->th_win); - th->th_urp = htons(th->th_urp); -} -#endif #endif /* _KERNEL */ #endif /* _NETINET_TCP_VAR_H_ */ diff --git a/sys/netinet/udp.h b/sys/netinet/udp.h index c2d638dde321..d84757204b3a 100644 --- a/sys/netinet/udp.h +++ b/sys/netinet/udp.h @@ -1,69 +1,69 @@ /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NETINET_UDP_H_ #define _NETINET_UDP_H_ /* * UDP protocol header. * Per RFC 768, September, 1981. */ struct udphdr { u_short uh_sport; /* source port */ u_short uh_dport; /* destination port */ u_short uh_ulen; /* udp length */ u_short uh_sum; /* udp checksum */ }; /* * User-settable options (used with setsockopt). */ #define UDP_ENCAP 1 /* Start of reserved space for third-party user-settable options. */ #define UDP_VENDOR SO_VENDOR /* * UDP Encapsulation of IPsec Packets options. */ /* Encapsulation types. */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ -#define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-02+ */ +#define UDP_ENCAP_ESPINUDP 2 /* RFC3948 */ /* Default ESP in UDP encapsulation port. */ #define UDP_ENCAP_ESPINUDP_PORT 500 /* Maximum UDP fragment size for ESP over UDP. */ #define UDP_ENCAP_ESPINUDP_MAXFRAGLEN 552 #endif diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index b7c183d2327b..259fe0d22cc0 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -1,1966 +1,1792 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. * Copyright (c) 2008 Robert N. M. Watson * Copyright (c) 2010-2011 Juniper Networks, Inc. * Copyright (c) 2014 Kevin Lo * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include -#ifdef IPSEC -#include -#include -#endif +#include #include #include /* * UDP and UDP-Lite protocols implementation. * Per RFC 768, August, 1980. * Per RFC 3828, July, 2004. */ /* * BSD 4.2 defaulted the udp checksum to be off. Turning off udp checksums * removes the only data integrity mechanism for packets and malformed * packets that would otherwise be discarded due to bad checksums, and may * cause problems (especially for NFS data blocks). */ VNET_DEFINE(int, udp_cksum) = 1; SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(udp_cksum), 0, "compute udp checksum"); int udp_log_in_vain = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, &udp_log_in_vain, 0, "Log all incoming UDP packets"); VNET_DEFINE(int, udp_blackhole) = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(udp_blackhole), 0, "Do not send port unreachables for refused connects"); static VNET_DEFINE(int, udp_require_l2_bcast) = 0; #define V_udp_require_l2_bcast VNET(udp_require_l2_bcast) SYSCTL_INT(_net_inet_udp, OID_AUTO, require_l2_bcast, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(udp_require_l2_bcast), 0, "Only treat packets sent to an L2 broadcast address as broadcast packets"); u_long udp_sendspace = 9216; /* really max datagram size */ SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); u_long udp_recvspace = 40 * (1024 + #ifdef INET6 sizeof(struct sockaddr_in6) #else sizeof(struct sockaddr_in) #endif ); /* 40 1K datagrams */ SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, &udp_recvspace, 0, "Maximum space for incoming UDP datagrams"); VNET_DEFINE(struct inpcbhead, udb); /* from udp_var.h */ VNET_DEFINE(struct inpcbinfo, udbinfo); VNET_DEFINE(struct inpcbhead, ulitecb); VNET_DEFINE(struct inpcbinfo, ulitecbinfo); static VNET_DEFINE(uma_zone_t, udpcb_zone); #define V_udpcb_zone VNET(udpcb_zone) #ifndef UDBHASHSIZE #define UDBHASHSIZE 128 #endif VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat); /* from udp_var.h */ VNET_PCPUSTAT_SYSINIT(udpstat); SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)"); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(udpstat); #endif /* VIMAGE */ #ifdef INET static void udp_detach(struct socket *so); static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *, struct mbuf *, struct thread *); #endif -#ifdef IPSEC -#ifdef IPSEC_NAT_T -#define UF_ESPINUDP_ALL (UF_ESPINUDP_NON_IKE|UF_ESPINUDP) -#ifdef INET -static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int); -#endif -#endif /* IPSEC_NAT_T */ -#endif /* IPSEC */ - static void udp_zone_change(void *tag) { uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets); uma_zone_set_max(V_udpcb_zone, maxsockets); } static int udp_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp; inp = mem; INP_LOCK_INIT(inp, "inp", "udpinp"); return (0); } static int udplite_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp; inp = mem; INP_LOCK_INIT(inp, "inp", "udpliteinp"); return (0); } void udp_init(void) { /* * For now default to 2-tuple UDP hashing - until the fragment * reassembly code can also update the flowid. * * Once we can calculate the flowid that way and re-establish * a 4-tuple, flip this to 4-tuple. */ in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE, "udp_inpcb", udp_inpcb_init, NULL, 0, IPI_HASHFIELDS_2TUPLE); V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_zone_set_max(V_udpcb_zone, maxsockets); uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached"); EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL, EVENTHANDLER_PRI_ANY); } void udplite_init(void) { in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE, UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, NULL, 0, IPI_HASHFIELDS_2TUPLE); } /* * Kernel module interface for updating udpstat. The argument is an index * into udpstat treated as an array of u_long. While this encodes the * general layout of udpstat into the caller, it doesn't encode its location, * so that future changes to add, for example, per-CPU stats support won't * cause binary compatibility problems for kernel modules. */ void kmod_udpstat_inc(int statnum) { counter_u64_add(VNET(udpstat)[statnum], 1); } int udp_newudpcb(struct inpcb *inp) { struct udpcb *up; up = uma_zalloc(V_udpcb_zone, M_NOWAIT | M_ZERO); if (up == NULL) return (ENOBUFS); inp->inp_ppcb = up; return (0); } void udp_discardcb(struct udpcb *up) { uma_zfree(V_udpcb_zone, up); } #ifdef VIMAGE static void udp_destroy(void *unused __unused) { in_pcbinfo_destroy(&V_udbinfo); uma_zdestroy(V_udpcb_zone); } VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL); static void udplite_destroy(void *unused __unused) { in_pcbinfo_destroy(&V_ulitecbinfo); } VNET_SYSUNINIT(udplite, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udplite_destroy, NULL); #endif #ifdef INET /* * Subroutine of udp_input(), which appends the provided mbuf chain to the * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that * contains the source address. If the socket ends up being an IPv6 socket, * udp_append() will convert to a sockaddr_in6 before passing the address * into the socket code. * * In the normal case udp_append() will return 0, indicating that you * must unlock the inp. However if a tunneling protocol is in place we increment * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we * then decrement the reference count. If the inp_rele returns 1, indicating the * inp is gone, we return that to the caller to tell them *not* to unlock * the inp. In the case of multi-cast this will cause the distribution * to stop (though most tunneling protocols known currently do *not* use * multicast). */ static int udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, struct sockaddr_in *udp_in) { struct sockaddr *append_sa; struct socket *so; struct mbuf *opts = NULL; #ifdef INET6 struct sockaddr_in6 udp_in6; #endif struct udpcb *up; INP_LOCK_ASSERT(inp); /* * Engage the tunneling protocol. */ up = intoudpcb(inp); if (up->u_tun_func != NULL) { in_pcbref(inp); INP_RUNLOCK(inp); (*up->u_tun_func)(n, off, inp, (struct sockaddr *)udp_in, up->u_tun_ctx); INP_RLOCK(inp); return (in_pcbrele_rlocked(inp)); } off += sizeof(struct udphdr); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* Check AH/ESP integrity. */ - if (ipsec4_in_reject(n, inp)) { + if (IPSEC_ENABLED(ipv4) && + IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) { m_freem(n); return (0); } -#ifdef IPSEC_NAT_T - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: udpcb NULL", __func__)); - if (up->u_flags & UF_ESPINUDP_ALL) { /* IPSec UDP encaps. */ - n = udp4_espdecap(inp, n, off); - if (n == NULL) /* Consumed. */ - return (0); + if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */ + if (IPSEC_ENABLED(ipv4) && + UDPENCAP_INPUT(n, off, AF_INET) != 0) + return (0); /* Consumed. */ } -#endif /* IPSEC_NAT_T */ #endif /* IPSEC */ #ifdef MAC if (mac_inpcb_check_deliver(inp, n) != 0) { m_freem(n); return (0); } #endif /* MAC */ if (inp->inp_flags & INP_CONTROLOPTS || inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) { #ifdef INET6 if (inp->inp_vflag & INP_IPV6) (void)ip6_savecontrol_v4(inp, n, &opts, NULL); else #endif /* INET6 */ ip_savecontrol(inp, &opts, ip, n); } #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { bzero(&udp_in6, sizeof(udp_in6)); udp_in6.sin6_len = sizeof(udp_in6); udp_in6.sin6_family = AF_INET6; in6_sin_2_v4mapsin6(udp_in, &udp_in6); append_sa = (struct sockaddr *)&udp_in6; } else #endif /* INET6 */ append_sa = (struct sockaddr *)udp_in; m_adj(n, off); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_rcv); if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) { SOCKBUF_UNLOCK(&so->so_rcv); m_freem(n); if (opts) m_freem(opts); UDPSTAT_INC(udps_fullsock); } else sorwakeup_locked(so); return (0); } int udp_input(struct mbuf **mp, int *offp, int proto) { struct ip *ip; struct udphdr *uh; struct ifnet *ifp; struct inpcb *inp; uint16_t len, ip_len; struct inpcbinfo *pcbinfo; struct ip save_ip; struct sockaddr_in udp_in; struct mbuf *m; struct m_tag *fwd_tag; int cscov_partial, iphlen; m = *mp; iphlen = *offp; ifp = m->m_pkthdr.rcvif; *mp = NULL; UDPSTAT_INC(udps_ipackets); /* * Strip IP options, if any; should skip this, make available to * user, and use on returned packets, but we don't yet have a way to * check the checksum with options still present. */ if (iphlen > sizeof (struct ip)) { ip_stripoptions(m); iphlen = sizeof(struct ip); } /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); if (m->m_len < iphlen + sizeof(struct udphdr)) { if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) { UDPSTAT_INC(udps_hdrops); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); } uh = (struct udphdr *)((caddr_t)ip + iphlen); cscov_partial = (proto == IPPROTO_UDPLITE) ? 1 : 0; /* * Destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) goto badunlocked; /* * Construct sockaddr format source address. Stuff source address * and datagram in user buffer. */ bzero(&udp_in, sizeof(udp_in)); udp_in.sin_len = sizeof(udp_in); udp_in.sin_family = AF_INET; udp_in.sin_port = uh->uh_sport; udp_in.sin_addr = ip->ip_src; /* * Make mbuf data length reflect UDP length. If not enough data to * reflect UDP length, drop. */ len = ntohs((u_short)uh->uh_ulen); ip_len = ntohs(ip->ip_len) - iphlen; if (proto == IPPROTO_UDPLITE && (len == 0 || len == ip_len)) { /* Zero means checksum over the complete packet. */ if (len == 0) len = ip_len; cscov_partial = 0; } if (ip_len != len) { if (len > ip_len || len < sizeof(struct udphdr)) { UDPSTAT_INC(udps_badlen); goto badunlocked; } if (proto == IPPROTO_UDP) m_adj(m, len - ip_len); } /* * Save a copy of the IP header in case we want restore it for * sending an ICMP error message in response. */ if (!V_udp_blackhole) save_ip = *ip; else memset(&save_ip, 0, sizeof(save_ip)); /* * Checksum extended UDP header and data. */ if (uh->uh_sum) { u_short uh_sum; if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) && !cscov_partial) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh_sum = m->m_pkthdr.csum_data; else uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + proto)); uh_sum ^= 0xffff; } else { char b[9]; bcopy(((struct ipovly *)ip)->ih_x1, b, 9); bzero(((struct ipovly *)ip)->ih_x1, 9); ((struct ipovly *)ip)->ih_len = (proto == IPPROTO_UDP) ? uh->uh_ulen : htons(ip_len); uh_sum = in_cksum(m, len + sizeof (struct ip)); bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); } if (uh_sum) { UDPSTAT_INC(udps_badsum); m_freem(m); return (IPPROTO_DONE); } } else { if (proto == IPPROTO_UDP) { UDPSTAT_INC(udps_nosum); } else { /* UDPLite requires a checksum */ /* XXX: What is the right UDPLite MIB counter here? */ m_freem(m); return (IPPROTO_DONE); } } pcbinfo = udp_get_inpcbinfo(proto); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || ((!V_udp_require_l2_bcast || m->m_flags & M_BCAST) && in_broadcast(ip->ip_dst, ifp))) { struct inpcb *last; struct inpcbhead *pcblist; struct ip_moptions *imo; INP_INFO_RLOCK(pcbinfo); pcblist = udp_get_pcblist(proto); last = NULL; LIST_FOREACH(inp, pcblist, inp_list) { if (inp->inp_lport != uh->uh_dport) continue; #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_laddr.s_addr != INADDR_ANY && inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; if (inp->inp_faddr.s_addr != INADDR_ANY && inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; if (inp->inp_fport != 0 && inp->inp_fport != uh->uh_sport) continue; INP_RLOCK(inp); /* * XXXRW: Because we weren't holding either the inpcb * or the hash lock when we checked for a match * before, we should probably recheck now that the * inpcb lock is held. */ /* * Handle socket delivery policy for any-source * and source-specific multicast. [RFC3678] */ imo = inp->inp_moptions; if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { struct sockaddr_in group; int blocked; if (imo == NULL) { INP_RUNLOCK(inp); continue; } bzero(&group, sizeof(struct sockaddr_in)); group.sin_len = sizeof(struct sockaddr_in); group.sin_family = AF_INET; group.sin_addr = ip->ip_dst; blocked = imo_multi_filter(imo, ifp, (struct sockaddr *)&group, (struct sockaddr *)&udp_in); if (blocked != MCAST_PASS) { if (blocked == MCAST_NOTGMEMBER) IPSTAT_INC(ips_notmember); if (blocked == MCAST_NOTSMEMBER || blocked == MCAST_MUTED) UDPSTAT_INC(udps_filtermcast); INP_RUNLOCK(inp); continue; } } if (last != NULL) { struct mbuf *n; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { UDP_PROBE(receive, NULL, last, ip, last, uh); if (udp_append(last, ip, n, iphlen, &udp_in)) { goto inp_lost; } } INP_RUNLOCK(last); } last = inp; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids * searching through all pcbs in the common case of a * non-shared port. It assumes that an application * will never clear these options after setting them. */ if ((last->inp_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. (No need * to send an ICMP Port Unreachable for a broadcast * or multicast datgram.) */ UDPSTAT_INC(udps_noportbcast); if (inp) INP_RUNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); goto badunlocked; } UDP_PROBE(receive, NULL, last, ip, last, uh); if (udp_append(last, ip, m, iphlen, &udp_in) == 0) INP_RUNLOCK(last); inp_lost: INP_INFO_RUNLOCK(pcbinfo); return (IPPROTO_DONE); } /* * Locate pcb for datagram. */ /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ if ((m->m_flags & M_IP_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { struct sockaddr_in *next_hop; next_hop = (struct sockaddr_in *)(fwd_tag + 1); /* * Transparently forwarded. Pretend to be the destination. * Already got one like this? */ inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport, ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m); if (!inp) { /* * It's new. Try to find the ambushing socket. * Because we've rewritten the destination address, * any hardware-generated hash is ignored. */ inp = in_pcblookup(pcbinfo, ip->ip_src, uh->uh_sport, next_hop->sin_addr, next_hop->sin_port ? htons(next_hop->sin_port) : uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, ifp); } /* Remove the tag from the packet. We don't need it anymore. */ m_tag_delete(m, fwd_tag); m->m_flags &= ~M_IP_NEXTHOP; } else inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport, ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, ifp, m); if (inp == NULL) { if (udp_log_in_vain) { char buf[4*sizeof "123"]; strcpy(buf, inet_ntoa(ip->ip_dst)); log(LOG_INFO, "Connection attempt to UDP %s:%d from %s:%d\n", buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), ntohs(uh->uh_sport)); } UDPSTAT_INC(udps_noport); if (m->m_flags & (M_BCAST | M_MCAST)) { UDPSTAT_INC(udps_noportbcast); goto badunlocked; } if (V_udp_blackhole) goto badunlocked; if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) goto badunlocked; *ip = save_ip; icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); return (IPPROTO_DONE); } /* * Check the minimum TTL for socket. */ INP_RLOCK_ASSERT(inp); if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) { INP_RUNLOCK(inp); m_freem(m); return (IPPROTO_DONE); } if (cscov_partial) { struct udpcb *up; up = intoudpcb(inp); if (up->u_rxcslen == 0 || up->u_rxcslen > len) { INP_RUNLOCK(inp); m_freem(m); return (IPPROTO_DONE); } } UDP_PROBE(receive, NULL, inp, ip, inp, uh); if (udp_append(inp, ip, m, iphlen, &udp_in) == 0) INP_RUNLOCK(inp); return (IPPROTO_DONE); badunlocked: m_freem(m); return (IPPROTO_DONE); } #endif /* INET */ /* * Notify a udp user of an asynchronous error; just wake up so that they can * collect error status. */ struct inpcb * udp_notify(struct inpcb *inp, int errno) { /* * While udp_ctlinput() always calls udp_notify() with a read lock * when invoking it directly, in_pcbnotifyall() currently uses write * locks due to sharing code with TCP. For now, accept either a read * or a write lock, but a read lock is sufficient. */ INP_LOCK_ASSERT(inp); if ((errno == EHOSTUNREACH || errno == ENETUNREACH || errno == EHOSTDOWN) && inp->inp_route.ro_rt) { RTFREE(inp->inp_route.ro_rt); inp->inp_route.ro_rt = (struct rtentry *)NULL; } inp->inp_socket->so_error = errno; sorwakeup(inp->inp_socket); sowwakeup(inp->inp_socket); return (inp); } #ifdef INET static void udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip, struct inpcbinfo *pcbinfo) { struct ip *ip = vip; struct udphdr *uh; struct in_addr faddr; struct inpcb *inp; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; if (PRC_IS_REDIRECT(cmd)) { /* signal EHOSTDOWN, as it flushes the cached route */ in_pcbnotifyall(&V_udbinfo, faddr, EHOSTDOWN, udp_notify); return; } /* * Hostdead is ugly because it goes linearly through all PCBs. * * XXX: We never get this from ICMP, otherwise it makes an excellent * DoS attack on machines with many connections. */ if (cmd == PRC_HOSTDEAD) ip = NULL; else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) return; if (ip != NULL) { uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport, ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { INP_RLOCK_ASSERT(inp); if (inp->inp_socket != NULL) { udp_notify(inp, inetctlerrmap[cmd]); } INP_RUNLOCK(inp); } else { inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport, ip->ip_src, uh->uh_sport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { struct udpcb *up; up = intoudpcb(inp); if (up->u_icmp_func != NULL) { INP_RUNLOCK(inp); (*up->u_icmp_func)(cmd, sa, vip, up->u_tun_ctx); } else { INP_RUNLOCK(inp); } } } } else in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd], udp_notify); } void udp_ctlinput(int cmd, struct sockaddr *sa, void *vip) { return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo)); } void udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip) { return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo)); } #endif /* INET */ static int udp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the PCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = V_udbinfo.ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); } if (req->newptr != 0) return (EPERM); /* * OK, now we're committed to doing something. */ INP_INFO_RLOCK(&V_udbinfo); gencnt = V_udbinfo.ipi_gencnt; n = V_udbinfo.ipi_count; INP_INFO_RUNLOCK(&V_udbinfo); error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) + n * sizeof(struct xinpcb)); if (error != 0) return (error); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == NULL) return (ENOMEM); INP_INFO_RLOCK(&V_udbinfo); for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseeinpcb(req->td->td_ucred, inp) == 0) { in_pcbref(inp); inp_list[i++] = inp; } INP_WUNLOCK(inp); } INP_INFO_RUNLOCK(&V_udbinfo); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; bzero(&xi, sizeof(xi)); xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); xi.xi_inp.inp_gencnt = inp->inp_gencnt; INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else INP_RUNLOCK(inp); } INP_INFO_WLOCK(&V_udbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } INP_INFO_WUNLOCK(&V_udbinfo); if (!error) { /* * Give the user an updated idea of our state. If the * generation differs from what we told her before, she knows * that something happened while we were processing this * request, and it might be necessary to retry. */ INP_INFO_RLOCK(&V_udbinfo); xig.xig_gen = V_udbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_udbinfo.ipi_count; INP_INFO_RUNLOCK(&V_udbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); #ifdef INET static int udp_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in addrs[2]; struct inpcb *inp; int error; error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { INP_RLOCK_ASSERT(inp); if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, udp_getcred, "S,xucred", "Get the xucred of a UDP connection"); #endif /* INET */ int udp_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp; struct udpcb *up; int isudplite, error, optval; error = 0; isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); INP_WLOCK(inp); if (sopt->sopt_level != so->so_proto->pr_protocol) { #ifdef INET6 if (INP_CHECK_SOCKAF(so, AF_INET6)) { INP_WUNLOCK(inp); error = ip6_ctloutput(so, sopt); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { INP_WUNLOCK(inp); error = ip_ctloutput(so, sopt); } #endif return (error); } switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { +#if defined(IPSEC) || defined(IPSEC_SUPPORT) +#ifdef INET case UDP_ENCAP: - INP_WUNLOCK(inp); - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); - if (error) - break; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); - INP_WLOCK(inp); -#ifdef IPSEC_NAT_T - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: up == NULL", __func__)); -#endif - switch (optval) { - case 0: - /* Clear all UDP encap. */ -#ifdef IPSEC_NAT_T - up->u_flags &= ~UF_ESPINUDP_ALL; -#endif - break; -#ifdef IPSEC_NAT_T - case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: - up->u_flags &= ~UF_ESPINUDP_ALL; - if (optval == UDP_ENCAP_ESPINUDP) - up->u_flags |= UF_ESPINUDP; - else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE) - up->u_flags |= UF_ESPINUDP_NON_IKE; - break; -#endif - default: - error = EINVAL; - break; + if (!IPSEC_ENABLED(ipv4)) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); } - INP_WUNLOCK(inp); + error = UDPENCAP_PCBCTL(inp, sopt); break; +#endif /* INET */ +#endif /* IPSEC */ case UDPLITE_SEND_CSCOV: case UDPLITE_RECV_CSCOV: if (!isudplite) { INP_WUNLOCK(inp); error = ENOPROTOOPT; break; } INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (error != 0) break; inp = sotoinpcb(so); KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); INP_WLOCK(inp); up = intoudpcb(inp); KASSERT(up != NULL, ("%s: up == NULL", __func__)); if ((optval != 0 && optval < 8) || (optval > 65535)) { INP_WUNLOCK(inp); error = EINVAL; break; } if (sopt->sopt_name == UDPLITE_SEND_CSCOV) up->u_txcslen = optval; else up->u_rxcslen = optval; INP_WUNLOCK(inp); break; default: INP_WUNLOCK(inp); error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (sopt->sopt_name) { -#ifdef IPSEC_NAT_T +#if defined(IPSEC) || defined(IPSEC_SUPPORT) +#ifdef INET case UDP_ENCAP: - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: up == NULL", __func__)); - optval = up->u_flags & UF_ESPINUDP_ALL; - INP_WUNLOCK(inp); - error = sooptcopyout(sopt, &optval, sizeof optval); + if (!IPSEC_ENABLED(ipv4)) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + error = UDPENCAP_PCBCTL(inp, sopt); break; -#endif +#endif /* INET */ +#endif /* IPSEC */ case UDPLITE_SEND_CSCOV: case UDPLITE_RECV_CSCOV: if (!isudplite) { INP_WUNLOCK(inp); error = ENOPROTOOPT; break; } up = intoudpcb(inp); KASSERT(up != NULL, ("%s: up == NULL", __func__)); if (sopt->sopt_name == UDPLITE_SEND_CSCOV) optval = up->u_txcslen; else optval = up->u_rxcslen; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof(optval)); break; default: INP_WUNLOCK(inp); error = ENOPROTOOPT; break; } break; } return (error); } #ifdef INET #define UH_WLOCKED 2 #define UH_RLOCKED 1 #define UH_UNLOCKED 0 static int udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct udpiphdr *ui; int len = m->m_pkthdr.len; struct in_addr faddr, laddr; struct cmsghdr *cm; struct inpcbinfo *pcbinfo; struct sockaddr_in *sin, src; int cscov_partial = 0; int error = 0; int ipflags; u_short fport, lport; int unlock_udbinfo, unlock_inp; u_char tos; uint8_t pr; uint16_t cscov = 0; uint32_t flowid = 0; uint8_t flowtype = M_HASHTYPE_NONE; /* * udp_output() may need to temporarily bind or connect the current * inpcb. As such, we don't know up front whether we will need the * pcbinfo lock or not. Do any work to decide what is needed up * front before acquiring any locks. */ if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { if (control) m_freem(control); m_freem(m); return (EMSGSIZE); } src.sin_family = 0; sin = (struct sockaddr_in *)addr; if (sin == NULL || (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) { INP_WLOCK(inp); unlock_inp = UH_WLOCKED; } else { INP_RLOCK(inp); unlock_inp = UH_RLOCKED; } tos = inp->inp_ip_tos; if (control != NULL) { /* * XXX: Currently, we assume all the optional information is * stored in a single mbuf. */ if (control->m_next) { if (unlock_inp == UH_WLOCKED) INP_WUNLOCK(inp); else INP_RUNLOCK(inp); m_freem(control); m_freem(m); return (EINVAL); } for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len), control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { cm = mtod(control, struct cmsghdr *); if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) { error = EINVAL; break; } if (cm->cmsg_level != IPPROTO_IP) continue; switch (cm->cmsg_type) { case IP_SENDSRCADDR: if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_addr))) { error = EINVAL; break; } bzero(&src, sizeof(src)); src.sin_family = AF_INET; src.sin_len = sizeof(src); src.sin_port = inp->inp_lport; src.sin_addr = *(struct in_addr *)CMSG_DATA(cm); break; case IP_TOS: if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) { error = EINVAL; break; } tos = *(u_char *)CMSG_DATA(cm); break; case IP_FLOWID: if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) { error = EINVAL; break; } flowid = *(uint32_t *) CMSG_DATA(cm); break; case IP_FLOWTYPE: if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) { error = EINVAL; break; } flowtype = *(uint32_t *) CMSG_DATA(cm); break; #ifdef RSS case IP_RSSBUCKETID: if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) { error = EINVAL; break; } /* This is just a placeholder for now */ break; #endif /* RSS */ default: error = ENOPROTOOPT; break; } if (error) break; } m_freem(control); } if (error) { if (unlock_inp == UH_WLOCKED) INP_WUNLOCK(inp); else INP_RUNLOCK(inp); m_freem(m); return (error); } /* * Depending on whether or not the application has bound or connected * the socket, we may have to do varying levels of work. The optimal * case is for a connected UDP socket, as a global lock isn't * required at all. * * In order to decide which we need, we require stability of the * inpcb binding, which we ensure by acquiring a read lock on the * inpcb. This doesn't strictly follow the lock order, so we play * the trylock and retry game; note that we may end up with more * conservative locks than required the second time around, so later * assertions have to accept that. Further analysis of the number of * misses under contention is required. * * XXXRW: Check that hash locking update here is correct. */ pr = inp->inp_socket->so_proto->pr_protocol; pcbinfo = udp_get_inpcbinfo(pr); sin = (struct sockaddr_in *)addr; if (sin != NULL && (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) { INP_HASH_WLOCK(pcbinfo); unlock_udbinfo = UH_WLOCKED; } else if ((sin != NULL && ( (sin->sin_addr.s_addr == INADDR_ANY) || (sin->sin_addr.s_addr == INADDR_BROADCAST) || (inp->inp_laddr.s_addr == INADDR_ANY) || (inp->inp_lport == 0))) || (src.sin_family == AF_INET)) { INP_HASH_RLOCK(pcbinfo); unlock_udbinfo = UH_RLOCKED; } else unlock_udbinfo = UH_UNLOCKED; /* * If the IP_SENDSRCADDR control message was specified, override the * source address for this datagram. Its use is invalidated if the * address thus specified is incomplete or clobbers other inpcbs. */ laddr = inp->inp_laddr; lport = inp->inp_lport; if (src.sin_family == AF_INET) { INP_HASH_LOCK_ASSERT(pcbinfo); if ((lport == 0) || (laddr.s_addr == INADDR_ANY && src.sin_addr.s_addr == INADDR_ANY)) { error = EINVAL; goto release; } error = in_pcbbind_setup(inp, (struct sockaddr *)&src, &laddr.s_addr, &lport, td->td_ucred); if (error) goto release; } /* * If a UDP socket has been connected, then a local address/port will * have been selected and bound. * * If a UDP socket has not been connected to, then an explicit * destination address must be used, in which case a local * address/port may not have been selected and bound. */ if (sin != NULL) { INP_LOCK_ASSERT(inp); if (inp->inp_faddr.s_addr != INADDR_ANY) { error = EISCONN; goto release; } /* * Jail may rewrite the destination address, so let it do * that before we use it. */ error = prison_remote_ip4(td->td_ucred, &sin->sin_addr); if (error) goto release; /* * If a local address or port hasn't yet been selected, or if * the destination address needs to be rewritten due to using * a special INADDR_ constant, invoke in_pcbconnect_setup() * to do the heavy lifting. Once a port is selected, we * commit the binding back to the socket; we also commit the * binding of the address if in jail. * * If we already have a valid binding and we're not * requesting a destination address rewrite, use a fast path. */ if (inp->inp_laddr.s_addr == INADDR_ANY || inp->inp_lport == 0 || sin->sin_addr.s_addr == INADDR_ANY || sin->sin_addr.s_addr == INADDR_BROADCAST) { INP_HASH_LOCK_ASSERT(pcbinfo); error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, &lport, &faddr.s_addr, &fport, NULL, td->td_ucred); if (error) goto release; /* * XXXRW: Why not commit the port if the address is * !INADDR_ANY? */ /* Commit the local port if newly assigned. */ if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); /* * Remember addr if jailed, to prevent * rebinding. */ if (prison_flag(td->td_ucred, PR_IP4)) inp->inp_laddr = laddr; inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->inp_lport = 0; error = EAGAIN; goto release; } inp->inp_flags |= INP_ANONPORT; } } else { faddr = sin->sin_addr; fport = sin->sin_port; } } else { INP_LOCK_ASSERT(inp); faddr = inp->inp_faddr; fport = inp->inp_fport; if (faddr.s_addr == INADDR_ANY) { error = ENOTCONN; goto release; } } /* * Calculate data length and get a mbuf for UDP, IP, and possible * link-layer headers. Immediate slide the data pointer back forward * since we won't use that space at this layer. */ M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto release; } m->m_data += max_linkhdr; m->m_len -= max_linkhdr; m->m_pkthdr.len -= max_linkhdr; /* * Fill in mbuf with extended UDP header and addresses and length put * into network format. */ ui = mtod(m, struct udpiphdr *); bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */ ui->ui_pr = pr; ui->ui_src = laddr; ui->ui_dst = faddr; ui->ui_sport = lport; ui->ui_dport = fport; ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); if (pr == IPPROTO_UDPLITE) { struct udpcb *up; uint16_t plen; up = intoudpcb(inp); cscov = up->u_txcslen; plen = (u_short)len + sizeof(struct udphdr); if (cscov >= plen) cscov = 0; ui->ui_len = htons(plen); ui->ui_ulen = htons(cscov); /* * For UDP-Lite, checksum coverage length of zero means * the entire UDPLite packet is covered by the checksum. */ cscov_partial = (cscov == 0) ? 0 : 1; } else ui->ui_v = IPVERSION << 4; /* * Set the Don't Fragment bit in the IP header. */ if (inp->inp_flags & INP_DONTFRAG) { struct ip *ip; ip = (struct ip *)&ui->ui_i; ip->ip_off |= htons(IP_DF); } ipflags = 0; if (inp->inp_socket->so_options & SO_DONTROUTE) ipflags |= IP_ROUTETOIF; if (inp->inp_socket->so_options & SO_BROADCAST) ipflags |= IP_ALLOWBROADCAST; if (inp->inp_flags & INP_ONESBCAST) ipflags |= IP_SENDONES; #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif /* * Set up checksum and output datagram. */ ui->ui_sum = 0; if (pr == IPPROTO_UDPLITE) { if (inp->inp_flags & INP_ONESBCAST) faddr.s_addr = INADDR_BROADCAST; if (cscov_partial) { if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0) ui->ui_sum = 0xffff; } else { if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0) ui->ui_sum = 0xffff; } } else if (V_udp_cksum) { if (inp->inp_flags & INP_ONESBCAST) faddr.s_addr = INADDR_BROADCAST; ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr, htons((u_short)len + sizeof(struct udphdr) + pr)); m->m_pkthdr.csum_flags = CSUM_UDP; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } ((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len); ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ ((struct ip *)ui)->ip_tos = tos; /* XXX */ UDPSTAT_INC(udps_opackets); /* * Setup flowid / RSS information for outbound socket. * * Once the UDP code decides to set a flowid some other way, * this allows the flowid to be overridden by userland. */ if (flowtype != M_HASHTYPE_NONE) { m->m_pkthdr.flowid = flowid; M_HASHTYPE_SET(m, flowtype); #ifdef RSS } else { uint32_t hash_val, hash_type; /* * Calculate an appropriate RSS hash for UDP and * UDP Lite. * * The called function will take care of figuring out * whether a 2-tuple or 4-tuple hash is required based * on the currently configured scheme. * * Later later on connected socket values should be * cached in the inpcb and reused, rather than constantly * re-calculating it. * * UDP Lite is a different protocol number and will * likely end up being hashed as a 2-tuple until * RSS / NICs grow UDP Lite protocol awareness. */ if (rss_proto_software_hash_v4(faddr, laddr, fport, lport, pr, &hash_val, &hash_type) == 0) { m->m_pkthdr.flowid = hash_val; M_HASHTYPE_SET(m, hash_type); } #endif } #ifdef RSS /* * Don't override with the inp cached flowid value. * * Depending upon the kind of send being done, the inp * flowid/flowtype values may actually not be appropriate * for this particular socket send. * * We should either leave the flowid at zero (which is what is * currently done) or set it to some software generated * hash value based on the packet contents. */ ipflags |= IP_NODEFAULTFLOWID; #endif /* RSS */ if (unlock_udbinfo == UH_WLOCKED) INP_HASH_WUNLOCK(pcbinfo); else if (unlock_udbinfo == UH_RLOCKED) INP_HASH_RUNLOCK(pcbinfo); UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u); error = ip_output(m, inp->inp_options, (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags, inp->inp_moptions, inp); if (unlock_inp == UH_WLOCKED) INP_WUNLOCK(inp); else INP_RUNLOCK(inp); return (error); release: if (unlock_udbinfo == UH_WLOCKED) { KASSERT(unlock_inp == UH_WLOCKED, ("%s: excl udbinfo lock, shared inp lock", __func__)); INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); } else if (unlock_udbinfo == UH_RLOCKED) { KASSERT(unlock_inp == UH_RLOCKED, ("%s: shared udbinfo lock, excl inp lock", __func__)); INP_HASH_RUNLOCK(pcbinfo); INP_RUNLOCK(inp); } else if (unlock_inp == UH_WLOCKED) INP_WUNLOCK(inp); else INP_RUNLOCK(inp); m_freem(m); return (error); } - -#if defined(IPSEC) && defined(IPSEC_NAT_T) -/* - * Potentially decap ESP in UDP frame. Check for an ESP header - * and optional marker; if present, strip the UDP header and - * push the result through IPSec. - * - * Returns mbuf to be processed (potentially re-allocated) or - * NULL if consumed and/or processed. - */ -static struct mbuf * -udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off) -{ - size_t minlen, payload, skip, iphlen; - caddr_t data; - struct udpcb *up; - struct m_tag *tag; - struct udphdr *udphdr; - struct ip *ip; - - INP_RLOCK_ASSERT(inp); - - /* - * Pull up data so the longest case is contiguous: - * IP/UDP hdr + non ESP marker + ESP hdr. - */ - minlen = off + sizeof(uint64_t) + sizeof(struct esp); - if (minlen > m->m_pkthdr.len) - minlen = m->m_pkthdr.len; - if ((m = m_pullup(m, minlen)) == NULL) { - IPSECSTAT_INC(ips_in_inval); - return (NULL); /* Bypass caller processing. */ - } - data = mtod(m, caddr_t); /* Points to ip header. */ - payload = m->m_len - off; /* Size of payload. */ - - if (payload == 1 && data[off] == '\xff') - return (m); /* NB: keepalive packet, no decap. */ - - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: udpcb NULL", __func__)); - KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0, - ("u_flags 0x%x", up->u_flags)); - - /* - * Check that the payload is large enough to hold an - * ESP header and compute the amount of data to remove. - * - * NB: the caller has already done a pullup for us. - * XXX can we assume alignment and eliminate bcopys? - */ - if (up->u_flags & UF_ESPINUDP_NON_IKE) { - /* - * draft-ietf-ipsec-nat-t-ike-0[01].txt and - * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring - * possible AH mode non-IKE marker+non-ESP marker - * from draft-ietf-ipsec-udp-encaps-00.txt. - */ - uint64_t marker; - - if (payload <= sizeof(uint64_t) + sizeof(struct esp)) - return (m); /* NB: no decap. */ - bcopy(data + off, &marker, sizeof(uint64_t)); - if (marker != 0) /* Non-IKE marker. */ - return (m); /* NB: no decap. */ - skip = sizeof(uint64_t) + sizeof(struct udphdr); - } else { - uint32_t spi; - - if (payload <= sizeof(struct esp)) { - IPSECSTAT_INC(ips_in_inval); - m_freem(m); - return (NULL); /* Discard. */ - } - bcopy(data + off, &spi, sizeof(uint32_t)); - if (spi == 0) /* Non-ESP marker. */ - return (m); /* NB: no decap. */ - skip = sizeof(struct udphdr); - } - - /* - * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember - * the UDP ports. This is required if we want to select - * the right SPD for multiple hosts behind same NAT. - * - * NB: ports are maintained in network byte order everywhere - * in the NAT-T code. - */ - tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS, - 2 * sizeof(uint16_t), M_NOWAIT); - if (tag == NULL) { - IPSECSTAT_INC(ips_in_nomem); - m_freem(m); - return (NULL); /* Discard. */ - } - iphlen = off - sizeof(struct udphdr); - udphdr = (struct udphdr *)(data + iphlen); - ((uint16_t *)(tag + 1))[0] = udphdr->uh_sport; - ((uint16_t *)(tag + 1))[1] = udphdr->uh_dport; - m_tag_prepend(m, tag); - - /* - * Remove the UDP header (and possibly the non ESP marker) - * IP header length is iphlen - * Before: - * <--- off ---> - * +----+------+-----+ - * | IP | UDP | ESP | - * +----+------+-----+ - * <-skip-> - * After: - * +----+-----+ - * | IP | ESP | - * +----+-----+ - * <-skip-> - */ - ovbcopy(data, data + skip, iphlen); - m_adj(m, skip); - - ip = mtod(m, struct ip *); - ip->ip_len = htons(ntohs(ip->ip_len) - skip); - ip->ip_p = IPPROTO_ESP; - - /* - * We cannot yet update the cksums so clear any - * h/w cksum flags as they are no longer valid. - */ - if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) - m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR); - - (void) ipsec_common_input(m, iphlen, offsetof(struct ip, ip_p), - AF_INET, ip->ip_p); - return (NULL); /* NB: consumed, bypass processing. */ -} -#endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */ - static void udp_abort(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_abort: inp == NULL")); INP_WLOCK(inp); if (inp->inp_faddr.s_addr != INADDR_ANY) { INP_HASH_WLOCK(pcbinfo); in_pcbdisconnect(inp); inp->inp_laddr.s_addr = INADDR_ANY; INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); } static int udp_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp == NULL, ("udp_attach: inp != NULL")); error = soreserve(so, udp_sendspace, udp_recvspace); if (error) return (error); INP_INFO_WLOCK(pcbinfo); error = in_pcballoc(so, pcbinfo); if (error) { INP_INFO_WUNLOCK(pcbinfo); return (error); } inp = sotoinpcb(so); inp->inp_vflag |= INP_IPV4; inp->inp_ip_ttl = V_ip_defttl; error = udp_newudpcb(inp); if (error) { in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(pcbinfo); return (error); } INP_WUNLOCK(inp); INP_INFO_WUNLOCK(pcbinfo); return (0); } #endif /* INET */ int udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, udp_tun_icmp_t i, void *ctx) { struct inpcb *inp; struct udpcb *up; KASSERT(so->so_type == SOCK_DGRAM, ("udp_set_kernel_tunneling: !dgram")); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL")); INP_WLOCK(inp); up = intoudpcb(inp); if ((up->u_tun_func != NULL) || (up->u_icmp_func != NULL)) { INP_WUNLOCK(inp); return (EBUSY); } up->u_tun_func = f; up->u_icmp_func = i; up->u_tun_ctx = ctx; INP_WUNLOCK(inp); return (0); } #ifdef INET static int udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_bind: inp == NULL")); INP_WLOCK(inp); INP_HASH_WLOCK(pcbinfo); error = in_pcbbind(inp, nam, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); return (error); } static void udp_close(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_close: inp == NULL")); INP_WLOCK(inp); if (inp->inp_faddr.s_addr != INADDR_ANY) { INP_HASH_WLOCK(pcbinfo); in_pcbdisconnect(inp); inp->inp_laddr.s_addr = INADDR_ANY; INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); } static int udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; struct sockaddr_in *sin; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_connect: inp == NULL")); INP_WLOCK(inp); if (inp->inp_faddr.s_addr != INADDR_ANY) { INP_WUNLOCK(inp); return (EISCONN); } sin = (struct sockaddr_in *)nam; error = prison_remote_ip4(td->td_ucred, &sin->sin_addr); if (error != 0) { INP_WUNLOCK(inp); return (error); } INP_HASH_WLOCK(pcbinfo); error = in_pcbconnect(inp, nam, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); INP_WUNLOCK(inp); return (error); } static void udp_detach(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; struct udpcb *up; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_detach: inp == NULL")); KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, ("udp_detach: not disconnected")); INP_INFO_WLOCK(pcbinfo); INP_WLOCK(inp); up = intoudpcb(inp); KASSERT(up != NULL, ("%s: up == NULL", __func__)); inp->inp_ppcb = NULL; in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(pcbinfo); udp_discardcb(up); } static int udp_disconnect(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_disconnect: inp == NULL")); INP_WLOCK(inp); if (inp->inp_faddr.s_addr == INADDR_ANY) { INP_WUNLOCK(inp); return (ENOTCONN); } INP_HASH_WLOCK(pcbinfo); in_pcbdisconnect(inp); inp->inp_laddr.s_addr = INADDR_ANY; INP_HASH_WUNLOCK(pcbinfo); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; /* XXX */ SOCK_UNLOCK(so); INP_WUNLOCK(inp); return (0); } static int udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_send: inp == NULL")); return (udp_output(inp, m, addr, control, td)); } #endif /* INET */ int udp_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return (0); } #ifdef INET struct pr_usrreqs udp_usrreqs = { .pru_abort = udp_abort, .pru_attach = udp_attach, .pru_bind = udp_bind, .pru_connect = udp_connect, .pru_control = in_control, .pru_detach = udp_detach, .pru_disconnect = udp_disconnect, .pru_peeraddr = in_getpeeraddr, .pru_send = udp_send, .pru_soreceive = soreceive_dgram, .pru_sosend = sosend_dgram, .pru_shutdown = udp_shutdown, .pru_sockaddr = in_getsockaddr, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = udp_close, }; #endif /* INET */ diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h index 62c5e0b05481..e913c0ab977a 100644 --- a/sys/netinet6/in6.h +++ b/sys/netinet6/in6.h @@ -1,748 +1,745 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6.h,v 1.89 2001/05/27 13:28:35 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.h 8.3 (Berkeley) 1/3/94 * $FreeBSD$ */ #ifndef __KAME_NETINET_IN_H_INCLUDED_ #error "do not include netinet6/in6.h directly, include netinet/in.h. see RFC2553" #endif #ifndef _NETINET6_IN6_H_ #define _NETINET6_IN6_H_ /* * Identification of the network protocol stack * for *BSD-current/release: http://www.kame.net/dev/cvsweb.cgi/kame/COVERAGE * has the table of implementation/integration differences. */ #define __KAME__ #define __KAME_VERSION "FreeBSD" /* * IPv6 port allocation rules should mirror the IPv4 rules and are controlled * by the net.inet.ip.portrange sysctl tree. The following defines exist * for compatibility with userland applications that need them. */ #if __BSD_VISIBLE #define IPV6PORT_RESERVED 1024 #define IPV6PORT_ANONMIN 49152 #define IPV6PORT_ANONMAX 65535 #define IPV6PORT_RESERVEDMIN 600 #define IPV6PORT_RESERVEDMAX (IPV6PORT_RESERVED-1) #endif /* * IPv6 address */ struct in6_addr { union { uint8_t __u6_addr8[16]; uint16_t __u6_addr16[8]; uint32_t __u6_addr32[4]; } __u6_addr; /* 128-bit IP6 address */ }; #define s6_addr __u6_addr.__u6_addr8 #ifdef _KERNEL /* XXX nonstandard */ #define s6_addr8 __u6_addr.__u6_addr8 #define s6_addr16 __u6_addr.__u6_addr16 #define s6_addr32 __u6_addr.__u6_addr32 #endif #define INET6_ADDRSTRLEN 46 /* * XXX missing POSIX.1-2001 macro IPPROTO_IPV6. */ /* * Socket address for IPv6 */ #if __BSD_VISIBLE #define SIN6_LEN #endif struct sockaddr_in6 { uint8_t sin6_len; /* length of this struct */ sa_family_t sin6_family; /* AF_INET6 */ in_port_t sin6_port; /* Transport layer port # */ uint32_t sin6_flowinfo; /* IP6 flow information */ struct in6_addr sin6_addr; /* IP6 address */ uint32_t sin6_scope_id; /* scope zone index */ }; /* * Local definition for masks */ #ifdef _KERNEL /* XXX nonstandard */ #define IN6MASK0 {{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}} #define IN6MASK32 {{{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} #define IN6MASK64 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} #define IN6MASK96 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }}} #define IN6MASK128 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}} #endif #ifdef _KERNEL extern const struct sockaddr_in6 sa6_any; extern const struct in6_addr in6mask0; extern const struct in6_addr in6mask32; extern const struct in6_addr in6mask64; extern const struct in6_addr in6mask96; extern const struct in6_addr in6mask128; #endif /* _KERNEL */ /* * Macros started with IPV6_ADDR is KAME local */ #ifdef _KERNEL /* XXX nonstandard */ #if _BYTE_ORDER == _BIG_ENDIAN #define IPV6_ADDR_INT32_ONE 1 #define IPV6_ADDR_INT32_TWO 2 #define IPV6_ADDR_INT32_MNL 0xff010000 #define IPV6_ADDR_INT32_MLL 0xff020000 #define IPV6_ADDR_INT32_SMP 0x0000ffff #define IPV6_ADDR_INT16_ULL 0xfe80 #define IPV6_ADDR_INT16_USL 0xfec0 #define IPV6_ADDR_INT16_MLL 0xff02 #elif _BYTE_ORDER == _LITTLE_ENDIAN #define IPV6_ADDR_INT32_ONE 0x01000000 #define IPV6_ADDR_INT32_TWO 0x02000000 #define IPV6_ADDR_INT32_MNL 0x000001ff #define IPV6_ADDR_INT32_MLL 0x000002ff #define IPV6_ADDR_INT32_SMP 0xffff0000 #define IPV6_ADDR_INT16_ULL 0x80fe #define IPV6_ADDR_INT16_USL 0xc0fe #define IPV6_ADDR_INT16_MLL 0x02ff #endif #endif /* * Definition of some useful macros to handle IP6 addresses */ #if __BSD_VISIBLE #define IN6ADDR_ANY_INIT \ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} #define IN6ADDR_LOOPBACK_INIT \ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_NODELOCAL_ALLNODES_INIT \ {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_INTFACELOCAL_ALLNODES_INIT \ {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_LINKLOCAL_ALLNODES_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }}} #define IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16 }}} #endif extern const struct in6_addr in6addr_any; extern const struct in6_addr in6addr_loopback; #if __BSD_VISIBLE extern const struct in6_addr in6addr_nodelocal_allnodes; extern const struct in6_addr in6addr_linklocal_allnodes; extern const struct in6_addr in6addr_linklocal_allrouters; extern const struct in6_addr in6addr_linklocal_allv2routers; #endif /* * Equality * NOTE: Some of kernel programming environment (for example, openbsd/sparc) * does not supply memcmp(). For userland memcmp() is preferred as it is * in ANSI standard. */ #ifdef _KERNEL #define IN6_ARE_ADDR_EQUAL(a, b) \ (bcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) #else #if __BSD_VISIBLE #define IN6_ARE_ADDR_EQUAL(a, b) \ (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) #endif #endif /* * Unspecified */ #define IN6_IS_ADDR_UNSPECIFIED(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == 0 && \ (a)->__u6_addr.__u6_addr32[3] == 0) /* * Loopback */ #define IN6_IS_ADDR_LOOPBACK(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == 0 && \ (a)->__u6_addr.__u6_addr32[3] == ntohl(1)) /* * IPv4 compatible */ #define IN6_IS_ADDR_V4COMPAT(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == 0 && \ (a)->__u6_addr.__u6_addr32[3] != 0 && \ (a)->__u6_addr.__u6_addr32[3] != ntohl(1)) /* * Mapped */ #define IN6_IS_ADDR_V4MAPPED(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == ntohl(0x0000ffff)) /* * KAME Scope Values */ #ifdef _KERNEL /* XXX nonstandard */ #define IPV6_ADDR_SCOPE_NODELOCAL 0x01 #define IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 #define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 #define IPV6_ADDR_SCOPE_SITELOCAL 0x05 #define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ #define IPV6_ADDR_SCOPE_GLOBAL 0x0e #else #define __IPV6_ADDR_SCOPE_NODELOCAL 0x01 #define __IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 #define __IPV6_ADDR_SCOPE_LINKLOCAL 0x02 #define __IPV6_ADDR_SCOPE_SITELOCAL 0x05 #define __IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ #define __IPV6_ADDR_SCOPE_GLOBAL 0x0e #endif /* * Unicast Scope * Note that we must check topmost 10 bits only, not 16 bits (see RFC2373). */ #define IN6_IS_ADDR_LINKLOCAL(a) \ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80)) #define IN6_IS_ADDR_SITELOCAL(a) \ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0)) /* * Multicast */ #define IN6_IS_ADDR_MULTICAST(a) ((a)->s6_addr[0] == 0xff) #ifdef _KERNEL /* XXX nonstandard */ #define IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) #else #define __IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) #endif /* * Multicast Scope */ #ifdef _KERNEL /* refers nonstandard items */ #define IN6_IS_ADDR_MC_NODELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_NODELOCAL)) #define IN6_IS_ADDR_MC_INTFACELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_INTFACELOCAL)) #define IN6_IS_ADDR_MC_LINKLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_LINKLOCAL)) #define IN6_IS_ADDR_MC_SITELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_SITELOCAL)) #define IN6_IS_ADDR_MC_ORGLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_ORGLOCAL)) #define IN6_IS_ADDR_MC_GLOBAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_GLOBAL)) #else #define IN6_IS_ADDR_MC_NODELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_NODELOCAL)) #define IN6_IS_ADDR_MC_LINKLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_LINKLOCAL)) #define IN6_IS_ADDR_MC_SITELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL)) #define IN6_IS_ADDR_MC_ORGLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_ORGLOCAL)) #define IN6_IS_ADDR_MC_GLOBAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_GLOBAL)) #endif #ifdef _KERNEL /* nonstandard */ /* * KAME Scope */ #define IN6_IS_SCOPE_LINKLOCAL(a) \ ((IN6_IS_ADDR_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_LINKLOCAL(a))) #define IN6_IS_SCOPE_EMBED(a) \ ((IN6_IS_ADDR_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_INTFACELOCAL(a))) #define IFA6_IS_DEPRECATED(a) \ ((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \ (a)->ia6_lifetime.ia6t_pltime) #define IFA6_IS_INVALID(a) \ ((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \ (a)->ia6_lifetime.ia6t_vltime) #endif /* _KERNEL */ /* * IP6 route structure */ #if __BSD_VISIBLE struct route_in6 { struct rtentry *ro_rt; struct llentry *ro_lle; /* * ro_prepend and ro_plen are only used for bpf to pass in a * preformed header. They are not cacheable. */ char *ro_prepend; uint16_t ro_plen; uint16_t ro_flags; uint16_t ro_mtu; /* saved ro_rt mtu */ uint16_t spare; struct sockaddr_in6 ro_dst; }; #endif #ifdef _KERNEL #define MTAG_ABI_IPV6 1444287380 /* IPv6 ABI */ #define IPV6_TAG_DIRECT 0 /* direct-dispatch IPv6 */ #endif /* _KERNEL */ /* * Options for use with [gs]etsockopt at the IPV6 level. * First word of comment is data type; bool is stored in int. */ /* no hdrincl */ #if 0 /* the followings are relic in IPv4 and hence are disabled */ #define IPV6_OPTIONS 1 /* buf/ip6_opts; set/get IP6 options */ #define IPV6_RECVOPTS 5 /* bool; receive all IP6 opts w/dgram */ #define IPV6_RECVRETOPTS 6 /* bool; receive IP6 opts for response */ #define IPV6_RECVDSTADDR 7 /* bool; receive IP6 dst addr w/dgram */ #define IPV6_RETOPTS 8 /* ip6_opts; set/get IP6 options */ #endif #define IPV6_SOCKOPT_RESERVED1 3 /* reserved for future use */ #define IPV6_UNICAST_HOPS 4 /* int; IP6 hops */ #define IPV6_MULTICAST_IF 9 /* u_int; set/get IP6 multicast i/f */ #define IPV6_MULTICAST_HOPS 10 /* int; set/get IP6 multicast hops */ #define IPV6_MULTICAST_LOOP 11 /* u_int; set/get IP6 multicast loopback */ #define IPV6_JOIN_GROUP 12 /* ipv6_mreq; join a group membership */ #define IPV6_LEAVE_GROUP 13 /* ipv6_mreq; leave a group membership */ #define IPV6_PORTRANGE 14 /* int; range to choose for unspec port */ #define ICMP6_FILTER 18 /* icmp6_filter; icmp6 filter */ /* RFC2292 options */ #ifdef _KERNEL #define IPV6_2292PKTINFO 19 /* bool; send/recv if, src/dst addr */ #define IPV6_2292HOPLIMIT 20 /* bool; hop limit */ #define IPV6_2292NEXTHOP 21 /* bool; next hop addr */ #define IPV6_2292HOPOPTS 22 /* bool; hop-by-hop option */ #define IPV6_2292DSTOPTS 23 /* bool; destinaion option */ #define IPV6_2292RTHDR 24 /* bool; routing header */ #define IPV6_2292PKTOPTIONS 25 /* buf/cmsghdr; set/get IPv6 options */ #endif #define IPV6_CHECKSUM 26 /* int; checksum offset for raw socket */ #define IPV6_V6ONLY 27 /* bool; make AF_INET6 sockets v6 only */ #ifndef _KERNEL #define IPV6_BINDV6ONLY IPV6_V6ONLY #endif -#if 1 /* IPSEC */ #define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ -#endif /* IPSEC */ - /* 29; unused; was IPV6_FAITH */ #if 1 /* IPV6FIREWALL */ #define IPV6_FW_ADD 30 /* add a firewall rule to chain */ #define IPV6_FW_DEL 31 /* delete a firewall rule from chain */ #define IPV6_FW_FLUSH 32 /* flush firewall rule chain */ #define IPV6_FW_ZERO 33 /* clear single/all firewall counter(s) */ #define IPV6_FW_GET 34 /* get entire firewall rule chain */ #endif /* new socket options introduced in RFC3542 */ #define IPV6_RTHDRDSTOPTS 35 /* ip6_dest; send dst option before rthdr */ #define IPV6_RECVPKTINFO 36 /* bool; recv if, dst addr */ #define IPV6_RECVHOPLIMIT 37 /* bool; recv hop limit */ #define IPV6_RECVRTHDR 38 /* bool; recv routing header */ #define IPV6_RECVHOPOPTS 39 /* bool; recv hop-by-hop option */ #define IPV6_RECVDSTOPTS 40 /* bool; recv dst option after rthdr */ #ifdef _KERNEL #define IPV6_RECVRTHDRDSTOPTS 41 /* bool; recv dst option before rthdr */ #endif #define IPV6_USE_MIN_MTU 42 /* bool; send packets at the minimum MTU */ #define IPV6_RECVPATHMTU 43 /* bool; notify an according MTU */ #define IPV6_PATHMTU 44 /* mtuinfo; get the current path MTU (sopt), 4 bytes int; MTU notification (cmsg) */ #if 0 /*obsoleted during 2292bis -> 3542*/ #define IPV6_REACHCONF 45 /* no data; ND reachability confirm (cmsg only/not in of RFC3542) */ #endif /* more new socket options introduced in RFC3542 */ #define IPV6_PKTINFO 46 /* in6_pktinfo; send if, src addr */ #define IPV6_HOPLIMIT 47 /* int; send hop limit */ #define IPV6_NEXTHOP 48 /* sockaddr; next hop addr */ #define IPV6_HOPOPTS 49 /* ip6_hbh; send hop-by-hop option */ #define IPV6_DSTOPTS 50 /* ip6_dest; send dst option befor rthdr */ #define IPV6_RTHDR 51 /* ip6_rthdr; send routing header */ #if 0 #define IPV6_PKTOPTIONS 52 /* buf/cmsghdr; set/get IPv6 options */ /* obsoleted by RFC3542 */ #endif #define IPV6_RECVTCLASS 57 /* bool; recv traffic class values */ #define IPV6_AUTOFLOWLABEL 59 /* bool; attach flowlabel automagically */ #define IPV6_TCLASS 61 /* int; send traffic class value */ #define IPV6_DONTFRAG 62 /* bool; disable IPv6 fragmentation */ #define IPV6_PREFER_TEMPADDR 63 /* int; prefer temporary addresses as * the source address. */ #define IPV6_BINDANY 64 /* bool: allow bind to any address */ #define IPV6_BINDMULTI 65 /* bool; allow multibind to same addr/port */ #define IPV6_RSS_LISTEN_BUCKET 66 /* int; set RSS listen bucket */ #define IPV6_FLOWID 67 /* int; flowid of given socket */ #define IPV6_FLOWTYPE 68 /* int; flowtype of given socket */ #define IPV6_RSSBUCKETID 69 /* int; RSS bucket ID of given socket */ #define IPV6_RECVFLOWID 70 /* bool; receive IP6 flowid/flowtype w/ datagram */ #define IPV6_RECVRSSBUCKETID 71 /* bool; receive IP6 RSS bucket id w/ datagram */ /* * The following option is private; do not use it from user applications. * It is deliberately defined to the same value as IP_MSFILTER. */ #define IPV6_MSFILTER 74 /* struct __msfilterreq; * set/get multicast source filter list. */ /* to define items, should talk with KAME guys first, for *BSD compatibility */ #define IPV6_RTHDR_LOOSE 0 /* this hop need not be a neighbor. XXX old spec */ #define IPV6_RTHDR_STRICT 1 /* this hop must be a neighbor. XXX old spec */ #define IPV6_RTHDR_TYPE_0 0 /* IPv6 routing header type 0 */ /* * Defaults and limits for options */ #define IPV6_DEFAULT_MULTICAST_HOPS 1 /* normally limit m'casts to 1 hop */ #define IPV6_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */ /* * The im6o_membership vector for each socket is now dynamically allocated at * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized * according to a power-of-two increment. */ #define IPV6_MIN_MEMBERSHIPS 31 #define IPV6_MAX_MEMBERSHIPS 4095 /* * Default resource limits for IPv6 multicast source filtering. * These may be modified by sysctl. */ #define IPV6_MAX_GROUP_SRC_FILTER 512 /* sources per group */ #define IPV6_MAX_SOCK_SRC_FILTER 128 /* sources per socket/group */ /* * Argument structure for IPV6_JOIN_GROUP and IPV6_LEAVE_GROUP. */ struct ipv6_mreq { struct in6_addr ipv6mr_multiaddr; unsigned int ipv6mr_interface; }; /* * IPV6_PKTINFO: Packet information(RFC2292 sec 5) */ struct in6_pktinfo { struct in6_addr ipi6_addr; /* src/dst IPv6 address */ unsigned int ipi6_ifindex; /* send/recv interface index */ }; /* * Control structure for IPV6_RECVPATHMTU socket option. */ struct ip6_mtuinfo { struct sockaddr_in6 ip6m_addr; /* or sockaddr_storage? */ uint32_t ip6m_mtu; }; /* * Argument for IPV6_PORTRANGE: * - which range to search when port is unspecified at bind() or connect() */ #define IPV6_PORTRANGE_DEFAULT 0 /* default range */ #define IPV6_PORTRANGE_HIGH 1 /* "high" - request firewall bypass */ #define IPV6_PORTRANGE_LOW 2 /* "low" - vouchsafe security */ #if __BSD_VISIBLE /* * Definitions for inet6 sysctl operations. * * Third level is protocol number. * Fourth level is desired variable within that protocol. */ #define IPV6PROTO_MAXID (IPPROTO_PIM + 1) /* don't list to IPV6PROTO_MAX */ /* * Names for IP sysctl objects */ #define IPV6CTL_FORWARDING 1 /* act as router */ #define IPV6CTL_SENDREDIRECTS 2 /* may send redirects when forwarding*/ #define IPV6CTL_DEFHLIM 3 /* default Hop-Limit */ #ifdef notyet #define IPV6CTL_DEFMTU 4 /* default MTU */ #endif #define IPV6CTL_FORWSRCRT 5 /* forward source-routed dgrams */ #define IPV6CTL_STATS 6 /* stats */ #define IPV6CTL_MRTSTATS 7 /* multicast forwarding stats */ #define IPV6CTL_MRTPROTO 8 /* multicast routing protocol */ #define IPV6CTL_MAXFRAGPACKETS 9 /* max packets reassembly queue */ #define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */ #define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */ #define IPV6CTL_ACCEPT_RTADV 12 /* 13; unused; was: IPV6CTL_KEEPFAITH */ #define IPV6CTL_LOG_INTERVAL 14 #define IPV6CTL_HDRNESTLIMIT 15 #define IPV6CTL_DAD_COUNT 16 #define IPV6CTL_AUTO_FLOWLABEL 17 #define IPV6CTL_DEFMCASTHLIM 18 #define IPV6CTL_GIF_HLIM 19 /* default HLIM for gif encap packet */ #define IPV6CTL_KAME_VERSION 20 #define IPV6CTL_USE_DEPRECATED 21 /* use deprecated addr (RFC2462 5.5.4) */ #define IPV6CTL_RR_PRUNE 22 /* walk timer for router renumbering */ #if 0 /* obsolete */ #define IPV6CTL_MAPPED_ADDR 23 #endif #define IPV6CTL_V6ONLY 24 /* IPV6CTL_RTEXPIRE 25 deprecated */ /* IPV6CTL_RTMINEXPIRE 26 deprecated */ /* IPV6CTL_RTMAXCACHE 27 deprecated */ #define IPV6CTL_USETEMPADDR 32 /* use temporary addresses (RFC3041) */ #define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */ #define IPV6CTL_TEMPVLTIME 34 /* valid lifetime for tmpaddrs */ #define IPV6CTL_AUTO_LINKLOCAL 35 /* automatic link-local addr assign */ #define IPV6CTL_RIP6STATS 36 /* raw_ip6 stats */ #define IPV6CTL_PREFER_TEMPADDR 37 /* prefer temporary addr as src */ #define IPV6CTL_ADDRCTLPOLICY 38 /* get/set address selection policy */ #define IPV6CTL_USE_DEFAULTZONE 39 /* use default scope zone */ #define IPV6CTL_MAXFRAGS 41 /* max fragments */ #if 0 #define IPV6CTL_IFQ 42 /* ip6intrq node */ #define IPV6CTL_ISATAPRTR 43 /* isatap router */ #endif #define IPV6CTL_MCAST_PMTU 44 /* enable pMTU discovery for multicast? */ /* New entries should be added here from current IPV6CTL_MAXID value. */ /* to define items, should talk with KAME guys first, for *BSD compatibility */ #define IPV6CTL_STEALTH 45 #define ICMPV6CTL_ND6_ONLINKNSRFC4861 47 #define IPV6CTL_NO_RADR 48 /* No defroute from RA */ #define IPV6CTL_NORBIT_RAIF 49 /* Disable R-bit in NA on RA * receiving IF. */ #define IPV6CTL_RFC6204W3 50 /* Accept defroute even when forwarding enabled */ #define IPV6CTL_INTRQMAXLEN 51 /* max length of IPv6 netisr queue */ #define IPV6CTL_INTRDQMAXLEN 52 /* max length of direct IPv6 netisr * queue */ #define IPV6CTL_MAXID 53 #endif /* __BSD_VISIBLE */ /* * Since both netinet/ and netinet6/ call into netipsec/ and netpfil/, * the protocol specific mbuf flags are shared between them. */ #define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */ #define M_IP6_NEXTHOP M_PROTO2 /* explicit ip nexthop */ #define M_IP_NEXTHOP M_PROTO2 /* explicit ip nexthop */ #define M_SKIP_FIREWALL M_PROTO3 /* skip firewall processing */ #define M_AUTHIPHDR M_PROTO4 #define M_DECRYPTED M_PROTO5 #define M_LOOP M_PROTO6 #define M_AUTHIPDGM M_PROTO7 #define M_RTALERT_MLD M_PROTO8 #ifdef _KERNEL struct cmsghdr; struct ip6_hdr; int in6_cksum_pseudo(struct ip6_hdr *, uint32_t, uint8_t, uint16_t); int in6_cksum(struct mbuf *, u_int8_t, u_int32_t, u_int32_t); int in6_cksum_partial(struct mbuf *, u_int8_t, u_int32_t, u_int32_t, u_int32_t); int in6_localaddr(struct in6_addr *); int in6_localip(struct in6_addr *); int in6_ifhasaddr(struct ifnet *, struct in6_addr *); int in6_addrscope(const struct in6_addr *); char *ip6_sprintf(char *, const struct in6_addr *); struct in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *); extern void in6_if_up(struct ifnet *); struct sockaddr; extern u_char ip6_protox[]; void in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6); void in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6); void in6_sin6_2_sin_in_sock(struct sockaddr *nam); void in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam); extern void addrsel_policy_init(void); #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) #endif /* _KERNEL */ #ifndef _SIZE_T_DECLARED typedef __size_t size_t; #define _SIZE_T_DECLARED #endif #ifndef _SOCKLEN_T_DECLARED typedef __socklen_t socklen_t; #define _SOCKLEN_T_DECLARED #endif #if __BSD_VISIBLE __BEGIN_DECLS struct cmsghdr; extern int inet6_option_space(int); extern int inet6_option_init(void *, struct cmsghdr **, int); extern int inet6_option_append(struct cmsghdr *, const uint8_t *, int, int); extern uint8_t *inet6_option_alloc(struct cmsghdr *, int, int, int); extern int inet6_option_next(const struct cmsghdr *, uint8_t **); extern int inet6_option_find(const struct cmsghdr *, uint8_t **, int); extern size_t inet6_rthdr_space(int, int); extern struct cmsghdr *inet6_rthdr_init(void *, int); extern int inet6_rthdr_add(struct cmsghdr *, const struct in6_addr *, unsigned int); extern int inet6_rthdr_lasthop(struct cmsghdr *, unsigned int); #if 0 /* not implemented yet */ extern int inet6_rthdr_reverse(const struct cmsghdr *, struct cmsghdr *); #endif extern int inet6_rthdr_segments(const struct cmsghdr *); extern struct in6_addr *inet6_rthdr_getaddr(struct cmsghdr *, int); extern int inet6_rthdr_getflags(const struct cmsghdr *, int); extern int inet6_opt_init(void *, socklen_t); extern int inet6_opt_append(void *, socklen_t, int, uint8_t, socklen_t, uint8_t, void **); extern int inet6_opt_finish(void *, socklen_t, int); extern int inet6_opt_set_val(void *, int, void *, socklen_t); extern int inet6_opt_next(void *, socklen_t, int, uint8_t *, socklen_t *, void **); extern int inet6_opt_find(void *, socklen_t, int, uint8_t, socklen_t *, void **); extern int inet6_opt_get_val(void *, int, void *, socklen_t); extern socklen_t inet6_rth_space(int, int); extern void *inet6_rth_init(void *, socklen_t, int, int); extern int inet6_rth_add(void *, const struct in6_addr *); extern int inet6_rth_reverse(const void *, void *); extern int inet6_rth_segments(const void *); extern struct in6_addr *inet6_rth_getaddr(const void *, int); __END_DECLS #endif /* __BSD_VISIBLE */ #endif /* !_NETINET6_IN6_H_ */ diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index 0678b4c1181c..34ce8aa658fb 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -1,651 +1,619 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_proto.c,v 1.91 2001/05/27 13:28:35 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_proto.c 8.1 (Berkeley) 6/10/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_ipstealth.h" #include "opt_sctp.h" #include "opt_mpath.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SCTP #include #include #include #include #include #endif /* SCTP */ -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ - #include /* * TCP/IP protocol family: IP6, ICMP6, UDP, TCP. */ FEATURE(inet6, "Internet Protocol version 6"); extern struct domain inet6domain; static struct pr_usrreqs nousrreqs; #define PR_LISTEN 0 #define PR_ABRTACPTDIS 0 /* Spacer for loadable protocols. */ #define IP6PROTOSPACER \ { \ .pr_domain = &inet6domain, \ .pr_protocol = PROTO_SPACER, \ .pr_usrreqs = &nousrreqs \ } struct protosw inet6sw[] = { { .pr_type = 0, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_IPV6, .pr_init = ip6_init, .pr_slowtimo = frag6_slowtimo, .pr_drain = frag6_drain, .pr_usrreqs = &nousrreqs, }, { .pr_type = SOCK_DGRAM, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_UDP, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = udp6_input, .pr_ctlinput = udp6_ctlinput, .pr_ctloutput = ip6_ctloutput, #ifndef INET /* Do not call initialization twice. */ .pr_init = udp_init, #endif .pr_usrreqs = &udp6_usrreqs, }, { .pr_type = SOCK_STREAM, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_TCP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN, .pr_input = tcp6_input, .pr_ctlinput = tcp6_ctlinput, .pr_ctloutput = tcp_ctloutput, #ifndef INET /* don't call initialization and timeout routines twice */ .pr_init = tcp_init, .pr_slowtimo = tcp_slowtimo, #endif .pr_drain = tcp_drain, .pr_usrreqs = &tcp6_usrreqs, }, #ifdef SCTP { .pr_type = SOCK_SEQPACKET, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_WANTRCVD, .pr_input = sctp6_input, .pr_ctlinput = sctp6_ctlinput, .pr_ctloutput = sctp_ctloutput, .pr_drain = sctp_drain, #ifndef INET /* Do not call initialization twice. */ .pr_init = sctp_init, #endif .pr_usrreqs = &sctp6_usrreqs }, { .pr_type = SOCK_STREAM, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_input = sctp6_input, .pr_ctlinput = sctp6_ctlinput, .pr_ctloutput = sctp_ctloutput, .pr_drain = sctp_drain, .pr_usrreqs = &sctp6_usrreqs }, #endif /* SCTP */ { .pr_type = SOCK_DGRAM, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_UDPLITE, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = udp6_input, .pr_ctlinput = udplite6_ctlinput, .pr_ctloutput = udp_ctloutput, #ifndef INET /* Do not call initialization twice. */ .pr_init = udplite_init, #endif .pr_usrreqs = &udp6_usrreqs, }, { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_RAW, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = rip6_input, .pr_output = rip6_output, .pr_ctlinput = rip6_ctlinput, .pr_ctloutput = rip6_ctloutput, #ifndef INET /* Do not call initialization twice. */ .pr_init = rip_init, #endif .pr_usrreqs = &rip6_usrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_ICMPV6, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = icmp6_input, .pr_output = rip6_output, .pr_ctlinput = rip6_ctlinput, .pr_ctloutput = rip6_ctloutput, .pr_fasttimo = icmp6_fasttimo, .pr_slowtimo = icmp6_slowtimo, .pr_usrreqs = &rip6_usrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_DSTOPTS, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = dest6_input, .pr_usrreqs = &nousrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_ROUTING, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = route6_input, .pr_usrreqs = &nousrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_FRAGMENT, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = frag6_input, .pr_usrreqs = &nousrreqs }, -#ifdef IPSEC -{ - .pr_type = SOCK_RAW, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_AH, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipsec6_common_input, - .pr_usrreqs = &nousrreqs, -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_ESP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipsec6_common_input, - .pr_ctlinput = esp6_ctlinput, - .pr_usrreqs = &nousrreqs, -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_IPCOMP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipsec6_common_input, - .pr_usrreqs = &nousrreqs, -}, -#endif /* IPSEC */ #ifdef INET { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_IPV4, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap6_input, .pr_output = rip6_output, .pr_ctloutput = rip6_ctloutput, .pr_init = encap_init, .pr_usrreqs = &rip6_usrreqs }, #endif /* INET */ { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_IPV6, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap6_input, .pr_output = rip6_output, .pr_ctloutput = rip6_ctloutput, .pr_init = encap_init, .pr_usrreqs = &rip6_usrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_GRE, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap6_input, .pr_output = rip6_output, .pr_ctloutput = rip6_ctloutput, .pr_init = encap_init, .pr_usrreqs = &rip6_usrreqs }, { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_PIM, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap6_input, .pr_output = rip6_output, .pr_ctloutput = rip6_ctloutput, .pr_usrreqs = &rip6_usrreqs }, /* Spacer n-times for loadable protocols. */ IP6PROTOSPACER, IP6PROTOSPACER, IP6PROTOSPACER, IP6PROTOSPACER, IP6PROTOSPACER, IP6PROTOSPACER, IP6PROTOSPACER, IP6PROTOSPACER, /* raw wildcard */ { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = rip6_input, .pr_output = rip6_output, .pr_ctloutput = rip6_ctloutput, .pr_usrreqs = &rip6_usrreqs }, }; extern int in6_inithead(void **, int); #ifdef VIMAGE extern int in6_detachhead(void **, int); #endif struct domain inet6domain = { .dom_family = AF_INET6, .dom_name = "internet6", .dom_protosw = (struct protosw *)inet6sw, .dom_protoswNPROTOSW = (struct protosw *)&inet6sw[nitems(inet6sw)], #ifdef RADIX_MPATH .dom_rtattach = rn6_mpath_inithead, #else .dom_rtattach = in6_inithead, #endif #ifdef VIMAGE .dom_rtdetach = in6_detachhead, #endif .dom_ifattach = in6_domifattach, .dom_ifdetach = in6_domifdetach, .dom_ifmtu = in6_domifmtu }; VNET_DOMAIN_SET(inet6); /* * Internet configuration info */ #ifndef IPV6FORWARDING #ifdef GATEWAY6 #define IPV6FORWARDING 1 /* forward IP6 packets not for us */ #else #define IPV6FORWARDING 0 /* don't forward IP6 packets not for us */ #endif /* GATEWAY6 */ #endif /* !IPV6FORWARDING */ #ifndef IPV6_SENDREDIRECTS #define IPV6_SENDREDIRECTS 1 #endif VNET_DEFINE(int, ip6_forwarding) = IPV6FORWARDING; /* act as router? */ VNET_DEFINE(int, ip6_sendredirects) = IPV6_SENDREDIRECTS; VNET_DEFINE(int, ip6_defhlim) = IPV6_DEFHLIM; VNET_DEFINE(int, ip6_defmcasthlim) = IPV6_DEFAULT_MULTICAST_HOPS; VNET_DEFINE(int, ip6_accept_rtadv) = 0; VNET_DEFINE(int, ip6_no_radr) = 0; VNET_DEFINE(int, ip6_norbit_raif) = 0; VNET_DEFINE(int, ip6_rfc6204w3) = 0; VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */ VNET_DEFINE(int, ip6_maxfrags); /* initialized in frag6.c:frag6_init() */ VNET_DEFINE(int, ip6_log_interval) = 5; VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we * process? */ VNET_DEFINE(int, ip6_dad_count) = 1; /* DupAddrDetectionTransmits */ VNET_DEFINE(int, ip6_auto_flowlabel) = 1; VNET_DEFINE(int, ip6_use_deprecated) = 1;/* allow deprecated addr * (RFC2462 5.5.4) */ VNET_DEFINE(int, ip6_rr_prune) = 5; /* router renumbering prefix * walk list every 5 sec. */ VNET_DEFINE(int, ip6_mcast_pmtu) = 0; /* enable pMTU discovery for multicast? */ VNET_DEFINE(int, ip6_v6only) = 1; VNET_DEFINE(time_t, ip6_log_time) = (time_t)0L; #ifdef IPSTEALTH VNET_DEFINE(int, ip6stealth) = 0; #endif VNET_DEFINE(int, nd6_onlink_ns_rfc4861) = 0;/* allow 'on-link' nd6 NS * (RFC 4861) */ /* icmp6 */ /* * BSDI4 defines these variables in in_proto.c... * XXX: what if we don't define INET? Should we define pmtu6_expire * or so? (jinmei@kame.net 19990310) */ VNET_DEFINE(int, pmtu_expire) = 60*10; VNET_DEFINE(int, pmtu_probe) = 60*2; /* ICMPV6 parameters */ VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */ VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60; /* 10 minutes */ VNET_DEFINE(int, icmp6errppslim) = 100; /* 100pps */ /* control how to respond to NI queries */ VNET_DEFINE(int, icmp6_nodeinfo) = (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK); VNET_DEFINE(int, icmp6_nodeinfo_oldmcprefix) = 1; /* * sysctl related items. */ SYSCTL_NODE(_net, PF_INET6, inet6, CTLFLAG_RW, 0, "Internet6 Family"); /* net.inet6 */ SYSCTL_NODE(_net_inet6, IPPROTO_IPV6, ip6, CTLFLAG_RW, 0, "IP6"); SYSCTL_NODE(_net_inet6, IPPROTO_ICMPV6, icmp6, CTLFLAG_RW, 0, "ICMP6"); SYSCTL_NODE(_net_inet6, IPPROTO_UDP, udp6, CTLFLAG_RW, 0, "UDP6"); SYSCTL_NODE(_net_inet6, IPPROTO_TCP, tcp6, CTLFLAG_RW, 0, "TCP6"); #ifdef SCTP SYSCTL_NODE(_net_inet6, IPPROTO_SCTP, sctp6, CTLFLAG_RW, 0, "SCTP6"); #endif -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW, 0, "IPSEC6"); #endif /* IPSEC */ /* net.inet6.ip6 */ static int sysctl_ip6_temppltime(SYSCTL_HANDLER_ARGS) { int error, val; val = V_ip6_temp_preferred_lifetime; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || !req->newptr) return (error); if (val < V_ip6_desync_factor + V_ip6_temp_regen_advance) return (EINVAL); V_ip6_temp_preferred_lifetime = val; return (0); } static int sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS) { int error, val; val = V_ip6_temp_valid_lifetime; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || !req->newptr) return (error); if (val < V_ip6_temp_preferred_lifetime) return (EINVAL); V_ip6_temp_valid_lifetime = val; return (0); } SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0, "Enable forwarding of IPv6 packets between interfaces"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_sendredirects), 0, "Send ICMPv6 redirects for unforwardable IPv6 packets"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defhlim), 0, "Default hop limit to use for outgoing IPv6 packets"); SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat, ip6stat, "IP6 statistics (struct ip6stat, netinet6/ip6_var.h)"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, "Default maximum number of outstanding fragmented IPv6 packets. " "A value of 0 means no fragmented packets will be accepted, while a " "a value of -1 means no limit"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0, "Default value of per-interface flag for accepting ICMPv6 RA messages"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0, "Default value of per-interface flag to control whether routers " "sending ICMPv6 RA messages on that interface are added into the " "default router list"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_norbit_raif), 0, "Always set clear the R flag in ICMPv6 NA messages when accepting RA " "on the interface"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0, "Accept the default router list from ICMPv6 RA messages even " "when packet forwarding is enabled"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0, "Frequency in seconds at which to log IPv6 forwarding errors"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0, "Default maximum number of IPv6 extension headers permitted on " "incoming IPv6 packets, 0 for no artificial limit"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_dad_count), 0, "Number of ICMPv6 NS messages sent during duplicate address detection"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_flowlabel), 0, "Provide an IPv6 flowlabel in outbound packets"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0, "Default hop limit for IPv6 multicast packets originating from this " "node"); SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION, kame_version, CTLFLAG_RD, __KAME_VERSION, 0, "KAME version string"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_deprecated), 0, "Allow the use of addresses whose preferred lifetimes have expired"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rr_prune), 0, ""); /* XXX unused */ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR, use_tempaddr, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_tempaddr), 0, "Create RFC3041 temporary addresses for autoconfigured addresses"); SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0, sysctl_ip6_temppltime, "I", "Maximum preferred lifetime for temporary addresses"); SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0, sysctl_ip6_tempvltime, "I", "Maximum valid lifetime for temporary addresses"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_v6only), 0, "Restrict AF_INET6 sockets to IPv6 addresses only"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0, "Default value of per-interface flag for automatically adding an IPv6 " "link-local address to interfaces when attached"); SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, struct rip6stat, rip6stat, "Raw IP6 statistics (struct rip6stat, netinet6/raw_ip6.h)"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0, "Prefer RFC3041 temporary addresses in source address selection"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0, "Use the default scope zone when none is specified"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfrags), 0, "Maximum allowed number of outstanding IPv6 packet fragments"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0, "Enable path MTU discovery for multicast packets"); #ifdef IPSTEALTH SYSCTL_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6stealth), 0, "Forward IPv6 packets without decrementing their TTL"); #endif /* net.inet6.icmp6 */ SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, "Accept ICMPv6 redirect messages"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0, ""); /* XXX unused */ SYSCTL_VNET_PCPUSTAT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, struct icmp6stat, icmp6stat, "ICMPv6 statistics (struct icmp6stat, netinet/icmp6.h)"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_prune), 0, "Frequency in seconds of checks for expired prefixes and routers"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_delay), 0, "Delay in seconds before probing for reachability"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0, "Number of ICMPv6 NS messages sent during reachability detection"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0, "Number of ICMPv6 NS messages sent during address resolution"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0, "Create a loopback route when configuring an IPv6 address"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_nodeinfo), 0, "Mask of enabled RF4620 node information query types"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX, nodeinfo_oldmcprefix, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0, "Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup " "for compatibility with KAME implementation"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0, "Maximum number of ICMPv6 error messages per second"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0, ""); /* XXX unused */ SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_debug), 0, "Log NDP debug messages"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861, nd6_onlink_ns_rfc4861, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_onlink_ns_rfc4861), 0, "Accept 'on-link' ICMPv6 NS messages in compliance with RFC 4861"); diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c index 1a3e5f4dee27..98967a9b936f 100644 --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -1,627 +1,434 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: ip6_forward.c,v 1.69 2001/05/17 03:48:30 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_ipstealth.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef IPSEC -#include -#include -#include -#include -#endif /* IPSEC */ +#include /* * Forward a packet. If some error occurs return the sender * an icmp packet. Note we can't always generate a meaningful * icmp message because icmp doesn't have a large enough repertoire * of codes and types. * * If not forwarding, just drop the packet. This could be confusing * if ipforwarding was zero but some routing protocol was advancing * us as a gateway to somewhere. However, we must let the routing * protocol deal with that. * */ void ip6_forward(struct mbuf *m, int srcrt) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct sockaddr_in6 *dst = NULL; struct rtentry *rt = NULL; struct route_in6 rin6; int error, type = 0, code = 0; struct mbuf *mcopy = NULL; struct ifnet *origifp; /* maybe unnecessary */ u_int32_t inzone, outzone; struct in6_addr src_in6, dst_in6, odst; -#ifdef IPSEC - struct secpolicy *sp = NULL; -#endif struct m_tag *fwd_tag; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; /* * Do not forward packets to multicast destination (should be handled * by ip6_mforward(). * Do not forward packets with unspecified source. It was discussed * in July 2000, on the ipngwg mailing list. */ if ((m->m_flags & (M_BCAST|M_MCAST)) != 0 || IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { IP6STAT_INC(ip6s_cantforward); /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ if (V_ip6_log_time + V_ip6_log_interval < time_uptime) { V_ip6_log_time = time_uptime; log(LOG_DEBUG, "cannot forward " "from %s to %s nxt %d received on %s\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif)); } m_freem(m); return; } -#ifdef IPSEC - /* - * Check if this packet has an active SA and needs to be dropped - * instead of forwarded. - */ - if (ip6_ipsec_fwd(m) != 0) { - IP6STAT_INC(ip6s_cantforward); - m_freem(m); - return; - } -#endif /* IPSEC */ + if ( #ifdef IPSTEALTH - if (!V_ip6stealth) { + V_ip6stealth == 0 && #endif - if (ip6->ip6_hlim <= IPV6_HLIMDEC) { + ip6->ip6_hlim <= IPV6_HLIMDEC) { /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ icmp6_error(m, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_TRANSIT, 0); + ICMP6_TIME_EXCEED_TRANSIT, 0); return; } - ip6->ip6_hlim -= IPV6_HLIMDEC; - -#ifdef IPSTEALTH - } -#endif /* * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU - * size of IPv6 + ICMPv6 headers) bytes of the packet in case * we need to generate an ICMP6 message to the src. * Thanks to M_EXT, in most cases copy will not occur. * * It is important to save it before IPsec processing as IPsec * processing may modify the mbuf. */ mcopy = m_copym(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN), M_NOWAIT); - -#ifdef IPSEC - /* get a security policy for this packet */ - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, &error); - if (sp == NULL) { - IPSEC6STAT_INC(ips_out_inval); - IP6STAT_INC(ip6s_cantforward); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else - m_freem(mcopy); +#ifdef IPSTEALTH + if (V_ip6stealth == 0) #endif - } - m_freem(m); - return; - } - - error = 0; + ip6->ip6_hlim -= IPV6_HLIMDEC; - /* check policy */ - switch (sp->policy) { - case IPSEC_POLICY_DISCARD: - /* - * This packet is just discarded. - */ - IPSEC6STAT_INC(ips_out_polvio); - IP6STAT_INC(ip6s_cantforward); - KEY_FREESP(&sp); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv6)) { + if ((error = IPSEC_FORWARD(ipv6, m)) != 0) { + /* mbuf consumed by IPsec */ m_freem(mcopy); -#endif - } - m_freem(m); - return; - - case IPSEC_POLICY_BYPASS: - case IPSEC_POLICY_NONE: - /* no need to do IPsec. */ - KEY_FREESP(&sp); - goto skip_ipsec; - - case IPSEC_POLICY_IPSEC: - if (sp->req == NULL) { - /* XXX should be panic ? */ - printf("ip6_forward: No IPsec request specified.\n"); - IP6STAT_INC(ip6s_cantforward); - KEY_FREESP(&sp); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else - m_freem(mcopy); -#endif - } - m_freem(m); + if (error != EINPROGRESS) + IP6STAT_INC(ip6s_cantforward); return; } - /* do IPsec */ - break; - - case IPSEC_POLICY_ENTRUST: - default: - /* should be panic ?? */ - printf("ip6_forward: Invalid policy found. %d\n", sp->policy); - KEY_FREESP(&sp); - goto skip_ipsec; + /* No IPsec processing required */ } - - { - struct ipsecrequest *isr = NULL; - - /* - * when the kernel forwards a packet, it is not proper to apply - * IPsec transport mode to the packet. This check avoid from this. - * at present, if there is even a transport mode SA request in the - * security policy, the kernel does not apply IPsec to the packet. - * this check is not enough because the following case is valid. - * ipsec esp/tunnel/xxx-xxx/require esp/transport//require; - */ - for (isr = sp->req; isr; isr = isr->next) { - if (isr->saidx.mode == IPSEC_MODE_ANY) - goto doipsectunnel; - if (isr->saidx.mode == IPSEC_MODE_TUNNEL) - goto doipsectunnel; - } - - /* - * if there's no need for tunnel mode IPsec, skip. - */ - if (!isr) - goto skip_ipsec; - - doipsectunnel: - /* - * All the extension headers will become inaccessible - * (since they can be encrypted). - * Don't panic, we need no more updates to extension headers - * on inner IPv6 packet (since they are now encapsulated). - * - * IPv6 [ESP|AH] IPv6 [extension headers] payload - */ - - /* - * If we need to encapsulate the packet, do it here - * ipsec6_proces_packet will send the packet using ip6_output - */ - error = ipsec6_process_packet(m, sp->req); - /* Release SP if an error occurred */ - if (error != 0) - KEY_FREESP(&sp); - if (error == EJUSTRETURN) { - /* - * We had a SP with a level of 'use' and no SA. We - * will just continue to process the packet without - * IPsec processing. - */ - error = 0; - goto skip_ipsec; - } - - if (error) { - /* mbuf is already reclaimed in ipsec6_process_packet. */ - switch (error) { - case EHOSTUNREACH: - case ENETUNREACH: - case EMSGSIZE: - case ENOBUFS: - case ENOMEM: - break; - default: - printf("ip6_output (ipsec): error code %d\n", error); - /* FALLTHROUGH */ - case ENOENT: - /* don't show these error codes to the user */ - break; - } - IP6STAT_INC(ip6s_cantforward); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else - m_freem(mcopy); -#endif - } - return; - } else { - /* - * In the FAST IPSec case we have already - * re-injected the packet and it has been freed - * by the ipsec_done() function. So, just clean - * up after ourselves. - */ - m = NULL; - goto freecopy; - } - } -skip_ipsec: #endif again: bzero(&rin6, sizeof(struct route_in6)); dst = (struct sockaddr_in6 *)&rin6.ro_dst; dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = ip6->ip6_dst; again2: rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m)); rt = rin6.ro_rt; if (rin6.ro_rt != NULL) RT_UNLOCK(rin6.ro_rt); else { IP6STAT_INC(ip6s_noroute); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute); if (mcopy) { icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE, 0); } goto bad; } /* * Source scope check: if a packet can't be delivered to its * destination for the reason that the destination is beyond the scope * of the source address, discard the packet and return an icmp6 * destination unreachable error with Code 2 (beyond scope of source * address). We use a local copy of ip6_src, since in6_setscope() * will possibly modify its first argument. * [draft-ietf-ipngwg-icmp-v3-04.txt, Section 3.1] */ src_in6 = ip6->ip6_src; if (in6_setscope(&src_in6, rt->rt_ifp, &outzone)) { /* XXX: this should not happen */ IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); goto bad; } if (in6_setscope(&src_in6, m->m_pkthdr.rcvif, &inzone)) { IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); goto bad; } if (inzone != outzone) { IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard); if (V_ip6_log_time + V_ip6_log_interval < time_uptime) { V_ip6_log_time = time_uptime; log(LOG_DEBUG, "cannot forward " "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp)); } if (mcopy) icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_BEYONDSCOPE, 0); goto bad; } /* * Destination scope check: if a packet is going to break the scope * zone of packet's destination address, discard it. This case should * usually be prevented by appropriately-configured routing table, but * we need an explicit check because we may mistakenly forward the * packet to a different zone by (e.g.) a default route. */ dst_in6 = ip6->ip6_dst; if (in6_setscope(&dst_in6, m->m_pkthdr.rcvif, &inzone) != 0 || in6_setscope(&dst_in6, rt->rt_ifp, &outzone) != 0 || inzone != outzone) { IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); goto bad; } if (rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in6 *)rt->rt_gateway; /* * If we are to forward the packet using the same interface * as one we got the packet from, perhaps we should send a redirect * to sender to shortcut a hop. * Only send redirect if source is sending directly to us, * and if packet was not source routed (or has any options). * Also, don't send redirect if forwarding using a route * modified by a redirect. */ if (V_ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) { if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) { /* * If the incoming interface is equal to the outgoing * one, and the link attached to the interface is * point-to-point, then it will be highly probable * that a routing loop occurs. Thus, we immediately * drop the packet and send an ICMPv6 error message. * * type/code is based on suggestion by Rich Draves. * not sure if it is the best pick. */ icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0); goto bad; } type = ND_REDIRECT; } /* * Fake scoped addresses. Note that even link-local source or * destinaion can appear, if the originating node just sends the * packet to us (without address resolution for the destination). * Since both icmp6_error and icmp6_redirect_output fill the embedded * link identifiers, we can do this stuff after making a copy for * returning an error. */ if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { /* * See corresponding comments in ip6_output. * XXX: but is it possible that ip6_forward() sends a packet * to a loopback interface? I don't think so, and thus * I bark here. (jinmei@kame.net) * XXX: it is common to route invalid packets to loopback. * also, the codepath will be visited on use of ::1 in * rthdr. (itojun) */ #if 1 if (0) #else if ((rt->rt_flags & (RTF_BLACKHOLE|RTF_REJECT)) == 0) #endif { printf("ip6_forward: outgoing interface is loopback. " "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp)); } /* we can just use rcvif in forwarding. */ origifp = m->m_pkthdr.rcvif; } else origifp = rt->rt_ifp; /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); /* Jump over all PFIL processing if hooks are not active. */ if (!PFIL_HOOKED(&V_inet6_pfil_hook)) goto pass; odst = ip6->ip6_dst; /* Run through list of hooks for output packets. */ error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, NULL); if (error != 0 || m == NULL) goto freecopy; /* consumed by filter */ ip6 = mtod(m, struct ip6_hdr *); /* See if destination IP address was changed by packet filter. */ if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) { m->m_flags |= M_SKIP_FIREWALL; /* If destination is now ourself drop to ip6_input(). */ if (in6_localip(&ip6->ip6_dst)) m->m_flags |= M_FASTFWD_OURS; else { RTFREE(rt); goto again; /* Redo the routing table lookup. */ } } /* See if local, if yes, send it to netisr. */ if (m->m_flags & M_FASTFWD_OURS) { if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto out; } /* Or forward to some other address? */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { dst = (struct sockaddr_in6 *)&rin6.ro_dst; bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in6)); m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP6_NEXTHOP; m_tag_delete(m, fwd_tag); RTFREE(rt); goto again2; } pass: /* See if the size was changed by the packet filter. */ if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) { in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig); - if (mcopy) { - u_long mtu; -#ifdef IPSEC - size_t ipsechdrsiz; -#endif /* IPSEC */ - - mtu = IN6_LINKMTU(rt->rt_ifp); -#ifdef IPSEC - /* - * When we do IPsec tunnel ingress, we need to play - * with the link value (decrement IPsec header size - * from mtu value). The code is much simpler than v4 - * case, as we have the outgoing interface for - * encapsulated packet as "rt->rt_ifp". - */ - ipsechdrsiz = ipsec_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND, - NULL); - if (ipsechdrsiz < mtu) - mtu -= ipsechdrsiz; - /* - * if mtu becomes less than minimum MTU, - * tell minimum MTU (and I'll need to fragment it). - */ - if (mtu < IPV6_MMTU) - mtu = IPV6_MMTU; -#endif /* IPSEC */ - icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu); - } + if (mcopy) + icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, + IN6_LINKMTU(rt->rt_ifp)); goto bad; } error = nd6_output_ifp(rt->rt_ifp, origifp, m, dst, NULL); if (error) { in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard); IP6STAT_INC(ip6s_cantforward); } else { IP6STAT_INC(ip6s_forward); in6_ifstat_inc(rt->rt_ifp, ifs6_out_forward); if (type) IP6STAT_INC(ip6s_redirectsent); else { if (mcopy) goto freecopy; } } if (mcopy == NULL) goto out; switch (error) { case 0: if (type == ND_REDIRECT) { icmp6_redirect_output(mcopy, rt); goto out; } goto freecopy; case EMSGSIZE: /* xxx MTU is constant in PPP? */ goto freecopy; case ENOBUFS: /* Tell source to slow down like source quench in IP? */ goto freecopy; case ENETUNREACH: /* shouldn't happen, checked above */ case EHOSTUNREACH: case ENETDOWN: case EHOSTDOWN: default: type = ICMP6_DST_UNREACH; code = ICMP6_DST_UNREACH_ADDR; break; } icmp6_error(mcopy, type, code, 0); goto out; freecopy: m_freem(mcopy); goto out; bad: m_freem(m); out: if (rt != NULL) RTFREE(rt); } diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 7e208113d608..c53494e8d943 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -1,1831 +1,1821 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_route.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ #include #include #include #include #include #include #include #include #include #include -#ifdef IPSEC -#include -#include -#include -#include -#endif /* IPSEC */ +#include #include extern struct domain inet6domain; u_char ip6_protox[IPPROTO_MAX]; VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead); VNET_DEFINE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl); VNET_DEFINE(u_long, in6_ifaddrhmask); static struct netisr_handler ip6_nh = { .nh_name = "ip6", .nh_handler = ip6_input, .nh_proto = NETISR_IPV6, #ifdef RSS .nh_m2cpuid = rss_soft_m2cpuid_v6, .nh_policy = NETISR_POLICY_CPU, .nh_dispatch = NETISR_DISPATCH_HYBRID, #else .nh_policy = NETISR_POLICY_FLOW, #endif }; static int sysctl_netinet6_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&ip6_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&ip6_nh, qlimit)); } SYSCTL_DECL(_net_inet6_ip6); SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRQMAXLEN, intr_queue_maxlen, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_queue_maxlen, "I", "Maximum size of the IPv6 input queue"); #ifdef RSS static struct netisr_handler ip6_direct_nh = { .nh_name = "ip6_direct", .nh_handler = ip6_direct_input, .nh_proto = NETISR_IPV6_DIRECT, .nh_m2cpuid = rss_soft_m2cpuid_v6, .nh_policy = NETISR_POLICY_CPU, .nh_dispatch = NETISR_DISPATCH_HYBRID, }; static int sysctl_netinet6_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&ip6_direct_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&ip6_direct_nh, qlimit)); } SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_direct_queue_maxlen, "I", "Maximum size of the IPv6 direct input queue"); #endif VNET_DEFINE(struct pfil_head, inet6_pfil_hook); VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat); VNET_PCPUSTAT_SYSINIT(ip6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(ip6stat); #endif /* VIMAGE */ struct rmlock in6_ifaddr_lock; RM_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock"); static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); #ifdef PULLDOWN_TEST static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); #endif /* * IP6 initialization: fill in IP6 protocol switch table. * All protocols not implemented in kernel go to raw IP6 protocol handler. */ void ip6_init(void) { struct protosw *pr; int i; TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal", &V_ip6_auto_linklocal); TUNABLE_INT_FETCH("net.inet6.ip6.accept_rtadv", &V_ip6_accept_rtadv); TUNABLE_INT_FETCH("net.inet6.ip6.no_radr", &V_ip6_no_radr); TAILQ_INIT(&V_in6_ifaddrhead); V_in6_ifaddrhashtbl = hashinit(IN6ADDR_NHASH, M_IFADDR, &V_in6_ifaddrhmask); /* Initialize packet filter hooks. */ V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF; V_inet6_pfil_hook.ph_af = AF_INET6; if ((i = pfil_head_register(&V_inet6_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6, &V_ipsec_hhh_in[HHOOK_IPSEC_INET6], HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register input helper hook\n", __func__); if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET6, &V_ipsec_hhh_out[HHOOK_IPSEC_INET6], HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register output helper hook\n", __func__); scope6_init(); addrsel_policy_init(); nd6_init(); frag6_init(); V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR; /* Skip global initialization stuff for non-default instances. */ #ifdef VIMAGE if (!IS_DEFAULT_VNET(curvnet)) { netisr_register_vnet(&ip6_nh); #ifdef RSS netisr_register_vnet(&ip6_direct_nh); #endif return; } #endif pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) panic("ip6_init"); /* Initialize the entire ip6_protox[] array to IPPROTO_RAW. */ for (i = 0; i < IPPROTO_MAX; i++) ip6_protox[i] = pr - inet6sw; /* * Cycle through IP protocols and put them into the appropriate place * in ip6_protox[]. */ for (pr = inet6domain.dom_protosw; pr < inet6domain.dom_protoswNPROTOSW; pr++) if (pr->pr_domain->dom_family == PF_INET6 && pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { /* Be careful to only index valid IP protocols. */ if (pr->pr_protocol < IPPROTO_MAX) ip6_protox[pr->pr_protocol] = pr - inet6sw; } netisr_register(&ip6_nh); #ifdef RSS netisr_register(&ip6_direct_nh); #endif } /* * The protocol to be inserted into ip6_protox[] must be already registered * in inet6sw[], either statically or through pf_proto_register(). */ int ip6proto_register(short ip6proto) { struct protosw *pr; /* Sanity checks. */ if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX) return (EPROTONOSUPPORT); /* * The protocol slot must not be occupied by another protocol * already. An index pointing to IPPROTO_RAW is unused. */ pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) return (EPFNOSUPPORT); if (ip6_protox[ip6proto] != pr - inet6sw) /* IPPROTO_RAW */ return (EEXIST); /* * Find the protocol position in inet6sw[] and set the index. */ for (pr = inet6domain.dom_protosw; pr < inet6domain.dom_protoswNPROTOSW; pr++) { if (pr->pr_domain->dom_family == PF_INET6 && pr->pr_protocol && pr->pr_protocol == ip6proto) { ip6_protox[pr->pr_protocol] = pr - inet6sw; return (0); } } return (EPROTONOSUPPORT); } int ip6proto_unregister(short ip6proto) { struct protosw *pr; /* Sanity checks. */ if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX) return (EPROTONOSUPPORT); /* Check if the protocol was indeed registered. */ pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) return (EPFNOSUPPORT); if (ip6_protox[ip6proto] == pr - inet6sw) /* IPPROTO_RAW */ return (ENOENT); /* Reset the protocol slot to IPPROTO_RAW. */ ip6_protox[ip6proto] = pr - inet6sw; return (0); } #ifdef VIMAGE static void ip6_destroy(void *unused __unused) { struct ifaddr *ifa, *nifa; struct ifnet *ifp; int error; #ifdef RSS netisr_unregister_vnet(&ip6_direct_nh); #endif netisr_unregister_vnet(&ip6_nh); if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0) printf("%s: WARNING: unable to unregister pfil hook, " "error %d\n", __func__, error); error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET6]); if (error != 0) { printf("%s: WARNING: unable to deregister input helper hook " "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET6: " "error %d returned\n", __func__, error); } error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET6]); if (error != 0) { printf("%s: WARNING: unable to deregister output helper hook " "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET6: " "error %d returned\n", __func__, error); } /* Cleanup addresses. */ IFNET_RLOCK(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { /* Cannot lock here - lock recursion. */ /* IF_ADDR_LOCK(ifp); */ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6_purgeaddr(ifa); } /* IF_ADDR_UNLOCK(ifp); */ in6_ifdetach_destroy(ifp); mld_domifdetach(ifp); /* Make sure any routes are gone as well. */ rt_flushifroutes_af(ifp, AF_INET6); } IFNET_RUNLOCK(); nd6_destroy(); in6_ifattach_destroy(); hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask); } VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL); #endif static int ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off, int *nxt, int *ours) { struct ip6_hdr *ip6; struct ip6_hbh *hbh; if (ip6_hopopts_input(plen, rtalert, &m, off)) { #if 0 /*touches NULL pointer*/ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); #endif goto out; /* m have already been freed */ } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); /* * if the payload length field is 0 and the next header field * indicates Hop-by-Hop Options header, then a Jumbo Payload * option MUST be included. */ if (ip6->ip6_plen == 0 && *plen == 0) { /* * Note that if a valid jumbo payload option is * contained, ip6_hopopts_input() must set a valid * (non-zero) payload length to the variable plen. */ IP6STAT_INC(ip6s_badoptions); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); goto out; } #ifndef PULLDOWN_TEST /* ip6_hopopts_input() ensures that mbuf is contiguous */ hbh = (struct ip6_hbh *)(ip6 + 1); #else IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), sizeof(struct ip6_hbh)); if (hbh == NULL) { IP6STAT_INC(ip6s_tooshort); goto out; } #endif *nxt = hbh->ip6h_nxt; /* * If we are acting as a router and the packet contains a * router alert option, see if we know the option value. * Currently, we only support the option value for MLD, in which * case we should pass the packet to the multicast routing * daemon. */ if (*rtalert != ~0) { switch (*rtalert) { case IP6OPT_RTALERT_MLD: if (V_ip6_forwarding) *ours = 1; break; default: /* * RFC2711 requires unrecognized values must be * silently ignored. */ break; } } return (0); out: return (1); } #ifdef RSS /* * IPv6 direct input routine. * * This is called when reinjecting completed fragments where * all of the previous checking and book-keeping has been done. */ void ip6_direct_input(struct mbuf *m) { int off, nxt; int nest; struct m_tag *mtag; struct ip6_direct_ctx *ip6dc; mtag = m_tag_locate(m, MTAG_ABI_IPV6, IPV6_TAG_DIRECT, NULL); KASSERT(mtag != NULL, ("Reinjected packet w/o direct ctx tag!")); ip6dc = (struct ip6_direct_ctx *)(mtag + 1); nxt = ip6dc->ip6dc_nxt; off = ip6dc->ip6dc_off; nest = 0; m_tag_delete(m, mtag); while (nxt != IPPROTO_DONE) { if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) { IP6STAT_INC(ip6s_toomanyhdr); goto bad; } /* * protection against faulty packet - there should be * more sanity checks in header chain processing. */ if (m->m_pkthdr.len < off) { IP6STAT_INC(ip6s_tooshort); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); goto bad; } -#ifdef IPSEC - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if (ip6_ipsec_input(m, nxt)) - goto bad; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv6)) { + if (IPSEC_INPUT(ipv6, m, off, nxt) != 0) + return; + } #endif /* IPSEC */ nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); } return; bad: m_freem(m); } #endif void ip6_input(struct mbuf *m) { struct in6_addr odst; struct ip6_hdr *ip6; struct in6_ifaddr *ia; struct ifnet *rcvif; u_int32_t plen; u_int32_t rtalert = ~0; int off = sizeof(struct ip6_hdr), nest; int nxt, ours = 0; int srcrt = 0; /* * Drop the packet if IPv6 operation is disabled on the interface. */ rcvif = m->m_pkthdr.rcvif; if ((ND_IFINFO(rcvif)->flags & ND6_IFF_IFDISABLED)) goto bad; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * should the inner packet be considered authentic? * see comment in ah4_input(). * NB: m cannot be NULL when passed to the input routine */ m->m_flags &= ~M_AUTHIPHDR; m->m_flags &= ~M_AUTHIPDGM; #endif /* IPSEC */ if (m->m_flags & M_FASTFWD_OURS) { /* * Firewall changed destination to local. */ m->m_flags &= ~M_FASTFWD_OURS; ours = 1; ip6 = mtod(m, struct ip6_hdr *); goto hbhcheck; } /* * mbuf statistics */ if (m->m_flags & M_EXT) { if (m->m_next) IP6STAT_INC(ip6s_mext2m); else IP6STAT_INC(ip6s_mext1); } else { if (m->m_next) { if (m->m_flags & M_LOOP) { IP6STAT_INC(ip6s_m2m[V_loif->if_index]); } else if (rcvif->if_index < IP6S_M2MMAX) IP6STAT_INC(ip6s_m2m[rcvif->if_index]); else IP6STAT_INC(ip6s_m2m[0]); } else IP6STAT_INC(ip6s_m1); } in6_ifstat_inc(rcvif, ifs6_in_receive); IP6STAT_INC(ip6s_total); #ifndef PULLDOWN_TEST /* * L2 bridge code and some other code can return mbuf chain * that does not conform to KAME requirement. too bad. * XXX: fails to join if interface MTU > MCLBYTES. jumbogram? */ if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) { struct mbuf *n; if (m->m_pkthdr.len > MHLEN) n = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else n = m_gethdr(M_NOWAIT, MT_DATA); if (n == NULL) goto bad; m_move_pkthdr(n, m); m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t)); n->m_len = n->m_pkthdr.len; m_freem(m); m = n; } IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */); #endif if (m->m_len < sizeof(struct ip6_hdr)) { if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { IP6STAT_INC(ip6s_toosmall); in6_ifstat_inc(rcvif, ifs6_in_hdrerr); goto bad; } } ip6 = mtod(m, struct ip6_hdr *); if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { IP6STAT_INC(ip6s_badvers); in6_ifstat_inc(rcvif, ifs6_in_hdrerr); goto bad; } IP6STAT_INC(ip6s_nxthist[ip6->ip6_nxt]); IP_PROBE(receive, NULL, NULL, ip6, rcvif, NULL, ip6); /* * Check against address spoofing/corruption. */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) || IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) { /* * XXX: "badscope" is not very suitable for a multicast source. */ IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(rcvif, ifs6_in_addrerr); goto bad; } if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) && !(m->m_flags & M_LOOP)) { /* * In this case, the packet should come from the loopback * interface. However, we cannot just check the if_flags, * because ip6_mloopback() passes the "actual" interface * as the outgoing/incoming interface. */ IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(rcvif, ifs6_in_addrerr); goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && IPV6_ADDR_MC_SCOPE(&ip6->ip6_dst) == 0) { /* * RFC4291 2.7: * Nodes must not originate a packet to a multicast address * whose scop field contains the reserved value 0; if such * a packet is received, it must be silently dropped. */ IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(rcvif, ifs6_in_addrerr); goto bad; } #ifdef ALTQ if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) { /* packet is dropped by traffic conditioner */ return; } #endif /* * The following check is not documented in specs. A malicious * party may be able to use IPv4 mapped addr to confuse tcp/udp stack * and bypass security checks (act as if it was from 127.0.0.1 by using * IPv6 src ::ffff:127.0.0.1). Be cautious. * * This check chokes if we are in an SIIT cloud. As none of BSDs * support IPv4-less kernel compilation, we cannot support SIIT * environment at all. So, it makes more sense for us to reject any * malicious packets for non-SIIT environment, than try to do a * partial support for SIIT environment. */ if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(rcvif, ifs6_in_addrerr); goto bad; } #if 0 /* * Reject packets with IPv4 compatible addresses (auto tunnel). * * The code forbids auto tunnel relay case in RFC1933 (the check is * stronger than RFC1933). We may want to re-enable it if mech-xx * is revised to forbid relaying case. */ if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) || IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) { IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); goto bad; } #endif /* * Try to forward the packet, but if we fail continue. * ip6_tryforward() does inbound and outbound packet firewall * processing. If firewall has decided that destination becomes * our local address, it sets M_FASTFWD_OURS flag. In this * case skip another inbound firewall processing and update * ip6 pointer. */ if (V_ip6_forwarding != 0 -#ifdef IPSEC - && !key_havesp(IPSEC_DIR_INBOUND) - && !key_havesp(IPSEC_DIR_OUTBOUND) +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + && (!IPSEC_ENABLED(ipv6) || + IPSEC_CAPS(ipv6, m, IPSEC_CAP_OPERABLE) == 0) #endif ) { if ((m = ip6_tryforward(m)) == NULL) return; if (m->m_flags & M_FASTFWD_OURS) { m->m_flags &= ~M_FASTFWD_OURS; ours = 1; ip6 = mtod(m, struct ip6_hdr *); goto hbhcheck; } } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Bypass packet filtering for packets previously handled by IPsec. */ - if (ip6_ipsec_filtertunnel(m)) - goto passin; + if (IPSEC_ENABLED(ipv6) && + IPSEC_CAPS(ipv6, m, IPSEC_CAP_BYPASS_FILTER) != 0) + goto passin; #endif /* * Run through list of hooks for input packets. * * NB: Beware of the destination address changing * (e.g. by NAT rewriting). When this happens, * tell ip6_forward to do the right thing. */ /* Jump over all PFIL processing if hooks are not active. */ if (!PFIL_HOOKED(&V_inet6_pfil_hook)) goto passin; odst = ip6->ip6_dst; if (pfil_run_hooks(&V_inet6_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL)) return; if (m == NULL) /* consumed by filter */ return; ip6 = mtod(m, struct ip6_hdr *); srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst); if (m->m_flags & M_FASTFWD_OURS) { m->m_flags &= ~M_FASTFWD_OURS; ours = 1; goto hbhcheck; } if ((m->m_flags & M_IP6_NEXTHOP) && m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { /* * Directly ship the packet on. This allows forwarding * packets originally destined to us to some other directly * connected host. */ ip6_forward(m, 1); return; } passin: /* * Disambiguate address scope zones (if there is ambiguity). * We first make sure that the original source or destination address * is not in our internal form for scoped addresses. Such addresses * are not necessarily invalid spec-wise, but we cannot accept them due * to the usage conflict. * in6_setscope() then also checks and rejects the cases where src or * dst are the loopback address and the receiving interface * is not loopback. */ if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) { IP6STAT_INC(ip6s_badscope); /* XXX */ goto bad; } if (in6_setscope(&ip6->ip6_src, rcvif, NULL) || in6_setscope(&ip6->ip6_dst, rcvif, NULL)) { IP6STAT_INC(ip6s_badscope); goto bad; } /* * Multicast check. Assume packet is for us to avoid * prematurely taking locks. */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { ours = 1; in6_ifstat_inc(rcvif, ifs6_in_mcast); goto hbhcheck; } /* * Unicast check * XXX: For now we keep link-local IPv6 addresses with embedded * scope zone id, therefore we use zero zoneid here. */ ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia != NULL) { if (ia->ia6_flags & IN6_IFF_NOTREADY) { char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; /* address is not ready, so discard the packet. */ nd6log((LOG_INFO, "ip6_input: packet to an unready address %s->%s\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst))); ifa_free(&ia->ia_ifa); goto bad; } /* Count the packet in the ip address stats */ counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); ifa_free(&ia->ia_ifa); ours = 1; goto hbhcheck; } /* * Now there is no reason to process the packet if it's not our own * and we're not a router. */ if (!V_ip6_forwarding) { IP6STAT_INC(ip6s_cantforward); goto bad; } hbhcheck: /* * Process Hop-by-Hop options header if it's contained. * m may be modified in ip6_hopopts_input(). * If a JumboPayload option is included, plen will also be modified. */ plen = (u_int32_t)ntohs(ip6->ip6_plen); if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0) return; } else nxt = ip6->ip6_nxt; /* * Use mbuf flags to propagate Router Alert option to * ICMPv6 layer, as hop-by-hop options have been stripped. */ if (rtalert != ~0) m->m_flags |= M_RTALERT_MLD; /* * Check that the amount of data in the buffers * is as at least much as the IPv6 header would have us expect. * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) { IP6STAT_INC(ip6s_tooshort); in6_ifstat_inc(rcvif, ifs6_in_truncated); goto bad; } if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) { if (m->m_len == m->m_pkthdr.len) { m->m_len = sizeof(struct ip6_hdr) + plen; m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; } else m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len); } /* * Forward if desirable. */ if (V_ip6_mrouter && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { /* * If we are acting as a multicast router, all * incoming multicast packets are passed to the * kernel-level multicast forwarding function. * The packet is returned (relatively) intact; if * ip6_mforward() returns a non-zero value, the packet * must be discarded, else it may be accepted below. * * XXX TODO: Check hlim and multicast scope here to avoid * unnecessarily calling into ip6_mforward(). */ if (ip6_mforward && ip6_mforward(ip6, rcvif, m)) { IP6STAT_INC(ip6s_cantforward); goto bad; } } else if (!ours) { ip6_forward(m, srcrt); return; } ip6 = mtod(m, struct ip6_hdr *); /* * Malicious party may be able to use IPv4 mapped addr to confuse * tcp/udp stack and bypass security checks (act as if it was from * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1). Be cautious. * * For SIIT end node behavior, you may want to disable the check. * However, you will become vulnerable to attacks using IPv4 mapped * source. */ if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(rcvif, ifs6_in_addrerr); goto bad; } /* * Tell launch routine the next header */ IP6STAT_INC(ip6s_delivered); in6_ifstat_inc(rcvif, ifs6_in_deliver); nest = 0; while (nxt != IPPROTO_DONE) { if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) { IP6STAT_INC(ip6s_toomanyhdr); goto bad; } /* * protection against faulty packet - there should be * more sanity checks in header chain processing. */ if (m->m_pkthdr.len < off) { IP6STAT_INC(ip6s_tooshort); in6_ifstat_inc(rcvif, ifs6_in_truncated); goto bad; } -#ifdef IPSEC - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if (ip6_ipsec_input(m, nxt)) - goto bad; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv6)) { + if (IPSEC_INPUT(ipv6, m, off, nxt) != 0) + return; + } #endif /* IPSEC */ nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); } return; bad: in6_ifstat_inc(rcvif, ifs6_in_discard); if (m != NULL) m_freem(m); } /* * Hop-by-Hop options header processing. If a valid jumbo payload option is * included, the real payload length will be stored in plenp. * * rtalertp - XXX: should be stored more smart way */ static int ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp, struct mbuf **mp, int *offp) { struct mbuf *m = *mp; int off = *offp, hbhlen; struct ip6_hbh *hbh; /* validation of the length of the header */ #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1); hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); hbhlen = (hbh->ip6h_len + 1) << 3; IP6_EXTHDR_CHECK(m, off, hbhlen, -1); hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), sizeof(struct ip6_hbh)); if (hbh == NULL) { IP6STAT_INC(ip6s_tooshort); return -1; } hbhlen = (hbh->ip6h_len + 1) << 3; IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), hbhlen); if (hbh == NULL) { IP6STAT_INC(ip6s_tooshort); return -1; } #endif off += hbhlen; hbhlen -= sizeof(struct ip6_hbh); if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh), hbhlen, rtalertp, plenp) < 0) return (-1); *offp = off; *mp = m; return (0); } /* * Search header for all Hop-by-hop options and process each option. * This function is separate from ip6_hopopts_input() in order to * handle a case where the sending node itself process its hop-by-hop * options header. In such a case, the function is called from ip6_output(). * * The function assumes that hbh header is located right after the IPv6 header * (RFC2460 p7), opthead is pointer into data content in m, and opthead to * opthead + hbhlen is located in contiguous memory region. */ int ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen, u_int32_t *rtalertp, u_int32_t *plenp) { struct ip6_hdr *ip6; int optlen = 0; u_int8_t *opt = opthead; u_int16_t rtalert_val; u_int32_t jumboplen; const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh); for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) { switch (*opt) { case IP6OPT_PAD1: optlen = 1; break; case IP6OPT_PADN: if (hbhlen < IP6OPT_MINLEN) { IP6STAT_INC(ip6s_toosmall); goto bad; } optlen = *(opt + 1) + 2; break; case IP6OPT_ROUTER_ALERT: /* XXX may need check for alignment */ if (hbhlen < IP6OPT_RTALERT_LEN) { IP6STAT_INC(ip6s_toosmall); goto bad; } if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) { /* XXX stat */ icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 1 - opthead); return (-1); } optlen = IP6OPT_RTALERT_LEN; bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2); *rtalertp = ntohs(rtalert_val); break; case IP6OPT_JUMBO: /* XXX may need check for alignment */ if (hbhlen < IP6OPT_JUMBO_LEN) { IP6STAT_INC(ip6s_toosmall); goto bad; } if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) { /* XXX stat */ icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 1 - opthead); return (-1); } optlen = IP6OPT_JUMBO_LEN; /* * IPv6 packets that have non 0 payload length * must not contain a jumbo payload option. */ ip6 = mtod(m, struct ip6_hdr *); if (ip6->ip6_plen) { IP6STAT_INC(ip6s_badoptions); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt - opthead); return (-1); } /* * We may see jumbolen in unaligned location, so * we'd need to perform bcopy(). */ bcopy(opt + 2, &jumboplen, sizeof(jumboplen)); jumboplen = (u_int32_t)htonl(jumboplen); #if 1 /* * if there are multiple jumbo payload options, * *plenp will be non-zero and the packet will be * rejected. * the behavior may need some debate in ipngwg - * multiple options does not make sense, however, * there's no explicit mention in specification. */ if (*plenp != 0) { IP6STAT_INC(ip6s_badoptions); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 2 - opthead); return (-1); } #endif /* * jumbo payload length must be larger than 65535. */ if (jumboplen <= IPV6_MAXPACKET) { IP6STAT_INC(ip6s_badoptions); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 2 - opthead); return (-1); } *plenp = jumboplen; break; default: /* unknown option */ if (hbhlen < IP6OPT_MINLEN) { IP6STAT_INC(ip6s_toosmall); goto bad; } optlen = ip6_unknown_opt(opt, m, erroff + opt - opthead); if (optlen == -1) return (-1); optlen += 2; break; } } return (0); bad: m_freem(m); return (-1); } /* * Unknown option processing. * The third argument `off' is the offset from the IPv6 header to the option, * which is necessary if the IPv6 header the and option header and IPv6 header * is not contiguous in order to return an ICMPv6 error. */ int ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off) { struct ip6_hdr *ip6; switch (IP6OPT_TYPE(*optp)) { case IP6OPT_TYPE_SKIP: /* ignore the option */ return ((int)*(optp + 1)); case IP6OPT_TYPE_DISCARD: /* silently discard */ m_freem(m); return (-1); case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */ IP6STAT_INC(ip6s_badoptions); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off); return (-1); case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */ IP6STAT_INC(ip6s_badoptions); ip6 = mtod(m, struct ip6_hdr *); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || (m->m_flags & (M_BCAST|M_MCAST))) m_freem(m); else icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off); return (-1); } m_freem(m); /* XXX: NOTREACHED */ return (-1); } /* * Create the "control" list for this pcb. * These functions will not modify mbuf chain at all. * * With KAME mbuf chain restriction: * The routine will be called from upper layer handlers like tcp6_input(). * Thus the routine assumes that the caller (tcp6_input) have already * called IP6_EXTHDR_CHECK() and all the extension headers are located in the * very first mbuf on the mbuf chain. * * ip6_savecontrol_v4 will handle those options that are possible to be * set on a v4-mapped socket. * ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those * options and handle the v6-only ones itself. */ struct mbuf ** ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp, int *v4only) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); #ifdef SO_TIMESTAMP if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) { union { struct timeval tv; struct bintime bt; struct timespec ts; } t; switch (inp->inp_socket->so_ts_clock) { case SO_TS_REALTIME_MICRO: microtime(&t.tv); *mp = sbcreatecontrol((caddr_t) &t.tv, sizeof(t.tv), SCM_TIMESTAMP, SOL_SOCKET); if (*mp) mp = &(*mp)->m_next; break; case SO_TS_BINTIME: bintime(&t.bt); *mp = sbcreatecontrol((caddr_t)&t.bt, sizeof(t.bt), SCM_BINTIME, SOL_SOCKET); if (*mp) mp = &(*mp)->m_next; break; case SO_TS_REALTIME: nanotime(&t.ts); *mp = sbcreatecontrol((caddr_t)&t.ts, sizeof(t.ts), SCM_REALTIME, SOL_SOCKET); if (*mp) mp = &(*mp)->m_next; break; case SO_TS_MONOTONIC: nanouptime(&t.ts); *mp = sbcreatecontrol((caddr_t)&t.ts, sizeof(t.ts), SCM_MONOTONIC, SOL_SOCKET); if (*mp) mp = &(*mp)->m_next; break; default: panic("unknown (corrupted) so_ts_clock"); } } #endif #define IS2292(inp, x, y) (((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y)) /* RFC 2292 sec. 5 */ if ((inp->inp_flags & IN6P_PKTINFO) != 0) { struct in6_pktinfo pi6; if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { #ifdef INET struct ip *ip; ip = mtod(m, struct ip *); pi6.ipi6_addr.s6_addr32[0] = 0; pi6.ipi6_addr.s6_addr32[1] = 0; pi6.ipi6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP; pi6.ipi6_addr.s6_addr32[3] = ip->ip_dst.s_addr; #else /* We won't hit this code */ bzero(&pi6.ipi6_addr, sizeof(struct in6_addr)); #endif } else { bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr)); in6_clearscope(&pi6.ipi6_addr); /* XXX */ } pi6.ipi6_ifindex = (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0; *mp = sbcreatecontrol((caddr_t) &pi6, sizeof(struct in6_pktinfo), IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; } if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) { int hlim; if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { #ifdef INET struct ip *ip; ip = mtod(m, struct ip *); hlim = ip->ip_ttl; #else /* We won't hit this code */ hlim = 0; #endif } else { hlim = ip6->ip6_hlim & 0xff; } *mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int), IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT), IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; } if ((inp->inp_flags & IN6P_TCLASS) != 0) { int tclass; if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { #ifdef INET struct ip *ip; ip = mtod(m, struct ip *); tclass = ip->ip_tos; #else /* We won't hit this code */ tclass = 0; #endif } else { u_int32_t flowinfo; flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK); flowinfo >>= 20; tclass = flowinfo & 0xff; } *mp = sbcreatecontrol((caddr_t) &tclass, sizeof(int), IPV6_TCLASS, IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; } if (v4only != NULL) { if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { *v4only = 1; } else { *v4only = 0; } } return (mp); } void ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); int v4only = 0; mp = ip6_savecontrol_v4(in6p, m, mp, &v4only); if (v4only) return; /* * IPV6_HOPOPTS socket option. Recall that we required super-user * privilege for the option (see ip6_ctloutput), but it might be too * strict, since there might be some hop-by-hop options which can be * returned to normal user. * See also RFC 2292 section 6 (or RFC 3542 section 8). */ if ((in6p->inp_flags & IN6P_HOPOPTS) != 0) { /* * Check if a hop-by-hop options header is contatined in the * received packet, and if so, store the options as ancillary * data. Note that a hop-by-hop options header must be * just after the IPv6 header, which is assured through the * IPv6 input processing. */ if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { struct ip6_hbh *hbh; int hbhlen = 0; #ifdef PULLDOWN_TEST struct mbuf *ext; #endif #ifndef PULLDOWN_TEST hbh = (struct ip6_hbh *)(ip6 + 1); hbhlen = (hbh->ip6h_len + 1) << 3; #else ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr), ip6->ip6_nxt); if (ext == NULL) { IP6STAT_INC(ip6s_tooshort); return; } hbh = mtod(ext, struct ip6_hbh *); hbhlen = (hbh->ip6h_len + 1) << 3; if (hbhlen != ext->m_len) { m_freem(ext); IP6STAT_INC(ip6s_tooshort); return; } #endif /* * XXX: We copy the whole header even if a * jumbo payload option is included, the option which * is to be removed before returning according to * RFC2292. * Note: this constraint is removed in RFC3542 */ *mp = sbcreatecontrol((caddr_t)hbh, hbhlen, IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS), IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; #ifdef PULLDOWN_TEST m_freem(ext); #endif } } if ((in6p->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) { int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr); /* * Search for destination options headers or routing * header(s) through the header chain, and stores each * header as ancillary data. * Note that the order of the headers remains in * the chain of ancillary data. */ while (1) { /* is explicit loop prevention necessary? */ struct ip6_ext *ip6e = NULL; int elen; #ifdef PULLDOWN_TEST struct mbuf *ext = NULL; #endif /* * if it is not an extension header, don't try to * pull it from the chain. */ switch (nxt) { case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: case IPPROTO_AH: /* is it possible? */ break; default: goto loopend; } #ifndef PULLDOWN_TEST if (off + sizeof(*ip6e) > m->m_len) goto loopend; ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off); if (nxt == IPPROTO_AH) elen = (ip6e->ip6e_len + 2) << 2; else elen = (ip6e->ip6e_len + 1) << 3; if (off + elen > m->m_len) goto loopend; #else ext = ip6_pullexthdr(m, off, nxt); if (ext == NULL) { IP6STAT_INC(ip6s_tooshort); return; } ip6e = mtod(ext, struct ip6_ext *); if (nxt == IPPROTO_AH) elen = (ip6e->ip6e_len + 2) << 2; else elen = (ip6e->ip6e_len + 1) << 3; if (elen != ext->m_len) { m_freem(ext); IP6STAT_INC(ip6s_tooshort); return; } #endif switch (nxt) { case IPPROTO_DSTOPTS: if (!(in6p->inp_flags & IN6P_DSTOPTS)) break; *mp = sbcreatecontrol((caddr_t)ip6e, elen, IS2292(in6p, IPV6_2292DSTOPTS, IPV6_DSTOPTS), IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; break; case IPPROTO_ROUTING: if (!(in6p->inp_flags & IN6P_RTHDR)) break; *mp = sbcreatecontrol((caddr_t)ip6e, elen, IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR), IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; break; case IPPROTO_HOPOPTS: case IPPROTO_AH: /* is it possible? */ break; default: /* * other cases have been filtered in the above. * none will visit this case. here we supply * the code just in case (nxt overwritten or * other cases). */ #ifdef PULLDOWN_TEST m_freem(ext); #endif goto loopend; } /* proceed with the next header. */ off += elen; nxt = ip6e->ip6e_nxt; ip6e = NULL; #ifdef PULLDOWN_TEST m_freem(ext); ext = NULL; #endif } loopend: ; } if (in6p->inp_flags2 & INP_RECVFLOWID) { uint32_t flowid, flow_type; flowid = m->m_pkthdr.flowid; flow_type = M_HASHTYPE_GET(m); /* * XXX should handle the failure of one or the * other - don't populate both? */ *mp = sbcreatecontrol((caddr_t) &flowid, sizeof(uint32_t), IPV6_FLOWID, IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; *mp = sbcreatecontrol((caddr_t) &flow_type, sizeof(uint32_t), IPV6_FLOWTYPE, IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; } #ifdef RSS if (in6p->inp_flags2 & INP_RECVRSSBUCKETID) { uint32_t flowid, flow_type; uint32_t rss_bucketid; flowid = m->m_pkthdr.flowid; flow_type = M_HASHTYPE_GET(m); if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { *mp = sbcreatecontrol((caddr_t) &rss_bucketid, sizeof(uint32_t), IPV6_RSSBUCKETID, IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; } } #endif } #undef IS2292 void ip6_notify_pmtu(struct inpcb *inp, struct sockaddr_in6 *dst, u_int32_t mtu) { struct socket *so; struct mbuf *m_mtu; struct ip6_mtuinfo mtuctl; KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); /* * Notify the error by sending IPV6_PATHMTU ancillary data if * application wanted to know the MTU value. * NOTE: we notify disconnected sockets, because some udp * applications keep sending sockets disconnected. * NOTE: our implementation doesn't notify connected sockets that has * foreign address that is different than given destination addresses * (this is permitted by RFC 3542). */ if ((inp->inp_flags & IN6P_MTU) == 0 || ( !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &dst->sin6_addr))) return; mtuctl.ip6m_mtu = mtu; mtuctl.ip6m_addr = *dst; if (sa6_recoverscope(&mtuctl.ip6m_addr)) return; if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl), IPV6_PATHMTU, IPPROTO_IPV6)) == NULL) return; so = inp->inp_socket; if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu) == 0) { m_freem(m_mtu); /* XXX: should count statistics */ } else sorwakeup(so); } #ifdef PULLDOWN_TEST /* * pull single extension header from mbuf chain. returns single mbuf that * contains the result, or NULL on error. */ static struct mbuf * ip6_pullexthdr(struct mbuf *m, size_t off, int nxt) { struct ip6_ext ip6e; size_t elen; struct mbuf *n; #ifdef DIAGNOSTIC switch (nxt) { case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: case IPPROTO_AH: /* is it possible? */ break; default: printf("ip6_pullexthdr: invalid nxt=%d\n", nxt); } #endif m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); if (nxt == IPPROTO_AH) elen = (ip6e.ip6e_len + 2) << 2; else elen = (ip6e.ip6e_len + 1) << 3; if (elen > MLEN) n = m_getcl(M_NOWAIT, MT_DATA, 0); else n = m_get(M_NOWAIT, MT_DATA); if (n == NULL) return NULL; m_copydata(m, off, elen, mtod(n, caddr_t)); n->m_len = elen; return n; } #endif /* * Get pointer to the previous header followed by the header * currently processed. * XXX: This function supposes that * M includes all headers, * the next header field and the header length field of each header * are valid, and * the sum of each header length equals to OFF. * Because of these assumptions, this function must be called very * carefully. Moreover, it will not be used in the near future when * we develop `neater' mechanism to process extension headers. */ char * ip6_get_prevhdr(const struct mbuf *m, int off) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); if (off == sizeof(struct ip6_hdr)) return (&ip6->ip6_nxt); else { int len, nxt; struct ip6_ext *ip6e = NULL; nxt = ip6->ip6_nxt; len = sizeof(struct ip6_hdr); while (len < off) { ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len); switch (nxt) { case IPPROTO_FRAGMENT: len += sizeof(struct ip6_frag); break; case IPPROTO_AH: len += (ip6e->ip6e_len + 2) << 2; break; default: len += (ip6e->ip6e_len + 1) << 3; break; } nxt = ip6e->ip6e_nxt; } if (ip6e) return (&ip6e->ip6e_nxt); else return NULL; } } /* * get next header offset. m will be retained. */ int ip6_nexthdr(const struct mbuf *m, int off, int proto, int *nxtp) { struct ip6_hdr ip6; struct ip6_ext ip6e; struct ip6_frag fh; /* just in case */ if (m == NULL) panic("ip6_nexthdr: m == NULL"); if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off) return -1; switch (proto) { case IPPROTO_IPV6: if (m->m_pkthdr.len < off + sizeof(ip6)) return -1; m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6); if (nxtp) *nxtp = ip6.ip6_nxt; off += sizeof(ip6); return off; case IPPROTO_FRAGMENT: /* * terminate parsing if it is not the first fragment, * it does not make sense to parse through it. */ if (m->m_pkthdr.len < off + sizeof(fh)) return -1; m_copydata(m, off, sizeof(fh), (caddr_t)&fh); /* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */ if (fh.ip6f_offlg & IP6F_OFF_MASK) return -1; if (nxtp) *nxtp = fh.ip6f_nxt; off += sizeof(struct ip6_frag); return off; case IPPROTO_AH: if (m->m_pkthdr.len < off + sizeof(ip6e)) return -1; m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); if (nxtp) *nxtp = ip6e.ip6e_nxt; off += (ip6e.ip6e_len + 2) << 2; return off; case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: if (m->m_pkthdr.len < off + sizeof(ip6e)) return -1; m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); if (nxtp) *nxtp = ip6e.ip6e_nxt; off += (ip6e.ip6e_len + 1) << 3; return off; case IPPROTO_NONE: case IPPROTO_ESP: case IPPROTO_IPCOMP: /* give up */ return -1; default: return -1; } /* NOTREACHED */ } /* * get offset for the last header in the chain. m will be kept untainted. */ int ip6_lasthdr(const struct mbuf *m, int off, int proto, int *nxtp) { int newoff; int nxt; if (!nxtp) { nxt = -1; nxtp = &nxt; } while (1) { newoff = ip6_nexthdr(m, off, proto, nxtp); if (newoff < 0) return off; else if (newoff < off) return -1; /* invalid */ else if (newoff == off) return newoff; off = newoff; proto = *nxtp; } } /* * System control for IP6 */ u_char inet6ctlerrmap[PRC_NCMDS] = { 0, 0, 0, 0, 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, EMSGSIZE, EHOSTUNREACH, 0, 0, 0, 0, EHOSTUNREACH, 0, ENOPROTOOPT, ECONNREFUSED }; diff --git a/sys/netinet6/ip6_ipsec.c b/sys/netinet6/ip6_ipsec.c deleted file mode 100644 index 926e45c65713..000000000000 --- a/sys/netinet6/ip6_ipsec.c +++ /dev/null @@ -1,293 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include "opt_inet.h" -#include "opt_sctp.h" -#include "opt_ipsec.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef SCTP -#include -#endif - -#include - -#include -#include -#include -#include -#ifdef IPSEC_DEBUG -#include -#else -#define KEYDEBUG(lev,arg) -#endif - -#include -#include - -extern struct protosw inet6sw[]; - -static VNET_DEFINE(int, ip6_ipsec6_filtertunnel) = 0; -#define V_ip6_ipsec6_filtertunnel VNET(ip6_ipsec6_filtertunnel) - -SYSCTL_DECL(_net_inet6_ipsec6); -SYSCTL_INT(_net_inet6_ipsec6, OID_AUTO, filtertunnel, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ipsec6_filtertunnel), 0, - "If set filter packets from an IPsec tunnel."); - -/* - * Check if we have to jump over firewall processing for this packet. - * Called from ip6_input(). - * 1 = jump over firewall, 0 = packet goes through firewall. - */ -int -ip6_ipsec_filtertunnel(struct mbuf *m) -{ - - /* - * Bypass packet filtering for packets previously handled by IPsec. - */ - if (!V_ip6_ipsec6_filtertunnel && - m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) - return (1); - return (0); -} - -/* - * Check if this packet has an active SA and needs to be dropped instead - * of forwarded. - * Called from ip6_forward(). - * 1 = drop packet, 0 = forward packet. - */ -int -ip6_ipsec_fwd(struct mbuf *m) -{ - - return (ipsec6_in_reject(m, NULL)); -} - -/* - * Check if protocol type doesn't have a further header and do IPSEC - * decryption or reject right now. Protocols with further headers get - * their IPSEC treatment within the protocol specific processing. - * Called from ip6_input(). - * 1 = drop packet, 0 = continue processing packet. - */ -int -ip6_ipsec_input(struct mbuf *m, int nxt) -{ - - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0) - return (ipsec6_in_reject(m, NULL)); - return (0); -} - -/* - * Called from ip6_output(). - * 1 = drop packet, 0 = continue processing packet, - * -1 = packet was reinjected and stop processing packet - */ - -int -ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *error) -{ - struct secpolicy *sp; - - /* - * Check the security policy (SP) for the packet and, if - * required, do IPsec-related processing. There are two - * cases here; the first time a packet is sent through - * it will be untagged and handled by ipsec4_checkpolicy. - * If the packet is resubmitted to ip6_output (e.g. after - * AH, ESP, etc. processing), there will be a tag to bypass - * the lookup and related policy checking. - */ - if (m_tag_find(*m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { - *error = 0; - return (0); - } - sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, error, inp); - /* - * There are four return cases: - * sp != NULL apply IPsec policy - * sp == NULL, error == 0 no IPsec handling needed - * sp == NULL, error == -EINVAL discard packet w/o error - * sp == NULL, error != 0 discard packet, report error - */ - if (sp != NULL) { - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - */ -#ifdef INET - if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(*m); - (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } -#endif - if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { - in6_delayed_cksum(*m, (*m)->m_pkthdr.len - sizeof(struct ip6_hdr), - sizeof(struct ip6_hdr)); - (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; - } -#ifdef SCTP - if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { - sctp_delayed_cksum(*m, sizeof(struct ip6_hdr)); - (*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; - } -#endif - - /* NB: callee frees mbuf */ - *error = ipsec6_process_packet(*m, sp->req); - KEY_FREESP(&sp); - if (*error == EJUSTRETURN) { - /* - * We had a SP with a level of 'use' and no SA. We - * will just continue to process the packet without - * IPsec processing. - */ - *error = 0; - goto done; - } - - /* - * Preserve KAME behaviour: ENOENT can be returned - * when an SA acquire is in progress. Don't propagate - * this to user-level; it confuses applications. - * - * XXX this will go away when the SADB is redone. - */ - if (*error == ENOENT) - *error = 0; - goto reinjected; - } else { /* sp == NULL */ - if (*error != 0) { - /* - * Hack: -EINVAL is used to signal that a packet - * should be silently discarded. This is typically - * because we asked key management for an SA and - * it was delayed (e.g. kicked up to IKE). - */ - if (*error == -EINVAL) - *error = 0; - goto bad; - } - /* No IPsec processing for this packet. */ - } -done: - return (0); -reinjected: - return (-1); -bad: - if (sp != NULL) - KEY_FREESP(&sp); - return (1); -} - -#if 0 -/* - * Compute the MTU for a forwarded packet that gets IPSEC encapsulated. - * Called from ip_forward(). - * Returns MTU suggestion for ICMP needfrag reply. - */ -int -ip6_ipsec_mtu(struct mbuf *m) -{ - int mtu = 0; - /* - * If the packet is routed over IPsec tunnel, tell the - * originator the tunnel MTU. - * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz - * XXX quickhack!!! - */ -#ifdef IPSEC - struct secpolicy *sp = NULL; - int ipsecerror; - int ipsechdr; - struct route *ro; - sp = ipsec_getpolicybyaddr(m, - IPSEC_DIR_OUTBOUND, - IP_FORWARDING, - &ipsecerror); - if (sp != NULL) { - /* count IPsec header size */ - ipsechdr = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL); - - /* - * find the correct route for outer IPv4 - * header, compute tunnel MTU. - */ - if (sp->req != NULL && - sp->req->sav != NULL && - sp->req->sav->sah != NULL) { - ro = &sp->req->sav->sah->route_cache.sa_route; - if (ro->ro_rt && ro->ro_rt->rt_ifp) { - mtu = ro->ro_rt->rt_mtu ? ro->ro_rt->rt_mtu : - ro->ro_rt->rt_ifp->if_mtu; - mtu -= ipsechdr; - } - } - KEY_FREESP(&sp); - } -#endif /* IPSEC */ - /* XXX else case missing. */ - return mtu; -} -#endif diff --git a/sys/netinet6/ip6_ipsec.h b/sys/netinet6/ip6_ipsec.h deleted file mode 100644 index e335d8501b3a..000000000000 --- a/sys/netinet6/ip6_ipsec.h +++ /dev/null @@ -1,42 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _NETINET_IP6_IPSEC_H_ -#define _NETINET_IP6_IPSEC_H_ - -int ip6_ipsec_filtertunnel(struct mbuf *); -int ip6_ipsec_fwd(struct mbuf *); -int ip6_ipsec_input(struct mbuf *, int); -int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *); -#if 0 -int ip6_ipsec_mtu(struct mbuf *); -#endif -#endif diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index be05c8ade522..6644361369cc 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -1,3139 +1,3098 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ratelimit.h" #include "opt_ipsec.h" #include "opt_sctp.h" #include "opt_route.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef IPSEC -#include -#include -#include -#include -#endif /* IPSEC */ +#include #ifdef SCTP #include #include #endif #include #include #ifdef FLOWTABLE #include #endif extern int in6_mcast_loop; struct ip6_exthdrs { struct mbuf *ip6e_ip6; struct mbuf *ip6e_hbh; struct mbuf *ip6e_dest1; struct mbuf *ip6e_rthdr; struct mbuf *ip6e_dest2; }; static MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, struct ucred *, int); static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *, struct sockopt *); static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *); static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, struct ucred *, int, int, int); static int ip6_copyexthdr(struct mbuf **, caddr_t, int); static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, struct ip6_frag **); static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); static int ip6_getpmtu(struct route_in6 *, int, struct ifnet *, const struct in6_addr *, u_long *, int *, u_int, u_int); static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long, u_long *, int *, u_int); static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *); static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); /* * Make an extension header from option data. hp is the source, and * mp is the destination. */ #define MAKE_EXTHDR(hp, mp) \ do { \ if (hp) { \ struct ip6_ext *eh = (struct ip6_ext *)(hp); \ error = ip6_copyexthdr((mp), (caddr_t)(hp), \ ((eh)->ip6e_len + 1) << 3); \ if (error) \ goto freehdrs; \ } \ } while (/*CONSTCOND*/ 0) /* * Form a chain of extension headers. * m is the extension header mbuf * mp is the previous mbuf in the chain * p is the next header * i is the type of option. */ #define MAKE_CHAIN(m, mp, p, i)\ do {\ if (m) {\ if (!hdrsplit) \ panic("assumption failed: hdr not split"); \ *mtod((m), u_char *) = *(p);\ *(p) = (i);\ p = mtod((m), u_char *);\ (m)->m_next = (mp)->m_next;\ (mp)->m_next = (m);\ (mp) = (m);\ }\ } while (/*CONSTCOND*/ 0) void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) { u_short csum; csum = in_cksum_skip(m, offset + plen, offset); if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0) csum = 0xffff; offset += m->m_pkthdr.csum_data; /* checksum offset */ if (offset + sizeof(u_short) > m->m_len) { printf("%s: delayed m_pullup, m->len: %d plen %u off %u " "csum_flags=%b\n", __func__, m->m_len, plen, offset, (int)m->m_pkthdr.csum_flags, CSUM_BITS); /* * XXX this should not happen, but if it does, the correct * behavior may be to insert the checksum in the appropriate * next mbuf in the chain. */ return; } *(u_short *)(m->m_data + offset) = csum; } int ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto, int mtu, uint32_t id) { struct mbuf *m, **mnext, *m_frgpart; struct ip6_hdr *ip6, *mhip6; struct ip6_frag *ip6f; int off; int error; int tlen = m0->m_pkthdr.len; m = m0; ip6 = mtod(m, struct ip6_hdr *); mnext = &m->m_nextpkt; for (off = hlen; off < tlen; off += mtu) { m = m_gethdr(M_NOWAIT, MT_DATA); if (!m) { IP6STAT_INC(ip6s_odropped); return (ENOBUFS); } m->m_flags = m0->m_flags & M_COPYFLAGS; *mnext = m; mnext = &m->m_nextpkt; m->m_data += max_linkhdr; mhip6 = mtod(m, struct ip6_hdr *); *mhip6 = *ip6; m->m_len = sizeof(*mhip6); error = ip6_insertfraghdr(m0, m, hlen, &ip6f); if (error) { IP6STAT_INC(ip6s_odropped); return (error); } ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); if (off + mtu >= tlen) mtu = tlen - off; else ip6f->ip6f_offlg |= IP6F_MORE_FRAG; mhip6->ip6_plen = htons((u_short)(mtu + hlen + sizeof(*ip6f) - sizeof(struct ip6_hdr))); if ((m_frgpart = m_copym(m0, off, mtu, M_NOWAIT)) == NULL) { IP6STAT_INC(ip6s_odropped); return (ENOBUFS); } m_cat(m, m_frgpart); m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f); m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum; m->m_pkthdr.rcvif = NULL; ip6f->ip6f_reserved = 0; ip6f->ip6f_ident = id; ip6f->ip6f_nxt = nextproto; IP6STAT_INC(ip6s_ofragments); in6_ifstat_inc(ifp, ifs6_out_fragcreat); } return (0); } /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). * This function may modify ver and hlim only. * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * If route_in6 ro is present and has ro_rt initialized, route lookup would be * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, * then result of route lookup is stored in ro->ro_rt. * * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, * which is rt_mtu. * * ifpp - XXX: just for statistics */ /* * XXX TODO: no flowid is assigned for outbound flows? */ int ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, struct inpcb *inp) { struct ip6_hdr *ip6; struct ifnet *ifp, *origifp; struct mbuf *m = m0; struct mbuf *mprev = NULL; int hlen, tlen, len; struct route_in6 ip6route; struct rtentry *rt = NULL; struct sockaddr_in6 *dst, src_sa, dst_sa; struct in6_addr odst; int error = 0; struct in6_ifaddr *ia = NULL; u_long mtu; int alwaysfrag, dontfrag; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; struct in6_addr src0, dst0; u_int32_t zone; struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; int sw_csum, tso; int needfiblookup; uint32_t fibnum; struct m_tag *fwd_tag = NULL; uint32_t id; if (inp != NULL) { M_SETFIB(m, inp->inp_inc.inc_fibnum); if ((flags & IP_NODEFAULTFLOWID) == 0) { /* unconditionally set flowid */ m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } } +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + /* + * IPSec checking which handles several cases. + * FAST IPSEC: We re-injected the packet. + * XXX: need scope argument. + */ + if (IPSEC_ENABLED(ipv6)) { + if ((error = IPSEC_OUTPUT(ipv6, m, inp)) != 0) { + if (error == EINPROGRESS) + error = 0; + goto done; + } + } +#endif /* IPSEC */ + bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); /* Destination options header(1st part) */ if (opt->ip6po_rthdr) { /* * Destination options header(1st part) * This only makes sense with a routing header. * See Section 9.2 of RFC 3542. * Disabling this part just for MIP6 convenience is * a bad idea. We need to think carefully about a * way to make the advanced API coexist with MIP6 * options, which might automatically be inserted in * the kernel. */ MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); } /* Routing header */ MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); /* Destination options header(2nd part) */ MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); } -#ifdef IPSEC - /* - * IPSec checking which handles several cases. - * FAST IPSEC: We re-injected the packet. - * XXX: need scope argument. - */ - switch(ip6_ipsec_output(&m, inp, &error)) - { - case 1: /* Bad packet */ - goto freehdrs; - case -1: /* IPSec done */ - goto done; - case 0: /* No IPSec */ - default: - break; - } -#endif /* IPSEC */ - /* * Calculate the total length of the extension header chain. * Keep the length of the unfragmentable part for fragmentation. */ optlen = 0; if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len; if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len; if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len; unfragpartlen = optlen + sizeof(struct ip6_hdr); /* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */ if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; /* * If there is at least one extension header, * separate IP6 header from the payload. */ if (optlen && !hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } ip6 = mtod(m, struct ip6_hdr *); /* adjust mbuf packet header length */ m->m_pkthdr.len += optlen; plen = m->m_pkthdr.len - sizeof(*ip6); /* If this is a jumbo payload, insert a jumbo payload option. */ if (plen > IPV6_MAXPACKET) { if (!hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) goto freehdrs; ip6->ip6_plen = 0; } else ip6->ip6_plen = htons(plen); /* * Concatenate headers and fill in next header fields. * Here we have, on "m" * IPv6 payload * and we insert headers accordingly. Finally, we should be getting: * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] * * during the header composing process, "m" points to IPv6 header. * "mprev" points to an extension header prior to esp. */ u_char *nexthdrp = &ip6->ip6_nxt; mprev = m; /* * we treat dest2 specially. this makes IPsec processing * much easier. the goal here is to make mprev point the * mbuf prior to dest2. * * result: IPv6 dest2 payload * m and mprev will point to IPv6 header. */ if (exthdrs.ip6e_dest2) { if (!hdrsplit) panic("assumption failed: hdr not split"); exthdrs.ip6e_dest2->m_next = m->m_next; m->m_next = exthdrs.ip6e_dest2; *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_DSTOPTS; } /* * result: IPv6 hbh dest1 rthdr dest2 payload * m will point to IPv6 header. mprev will point to the * extension header prior to dest2 (rthdr in the above case). */ MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS); MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); /* * If there is a routing header, discard the packet. */ if (exthdrs.ip6e_rthdr) { error = EINVAL; goto bad; } /* Source address validation */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && (flags & IPV6_UNSPECSRC) == 0) { error = EOPNOTSUPP; IP6STAT_INC(ip6s_badscope); goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { error = EOPNOTSUPP; IP6STAT_INC(ip6s_badscope); goto bad; } IP6STAT_INC(ip6s_localout); /* * Route packet. */ if (ro == NULL) { ro = &ip6route; bzero((caddr_t)ro, sizeof(*ro)); } else ro->ro_flags |= RT_LLE_CACHE; ro_pmtu = ro; if (opt && opt->ip6po_rthdr) ro = &opt->ip6po_route; dst = (struct sockaddr_in6 *)&ro->ro_dst; #ifdef FLOWTABLE if (ro->ro_rt == NULL) (void )flowtable_lookup(AF_INET6, m, (struct route *)ro); #endif fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); again: /* * if specified, try to fill in the traffic class field. * do not override if a non-zero value is already set. * we check the diffserv field and the ecn field separately. */ if (opt && opt->ip6po_tclass >= 0) { int mask = 0; if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) mask |= 0xfc; if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) mask |= 0x03; if (mask != 0) ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); } /* fill in or override the hop limit field, if necessary. */ if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (im6o != NULL) ip6->ip6_hlim = im6o->im6o_multicast_hlim; else ip6->ip6_hlim = V_ip6_defmcasthlim; } /* * Validate route against routing table additions; * a better/more specific route might have been added. * Make sure address family is set in route. */ if (inp) { ro->ro_dst.sin6_family = AF_INET6; RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum); } if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) && ro->ro_dst.sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) { rt = ro->ro_rt; ifp = ro->ro_rt->rt_ifp; } else { if (ro->ro_lle) LLE_FREE(ro->ro_lle); /* zeros ro_lle */ ro->ro_lle = NULL; if (fwd_tag == NULL) { bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; } error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp, &rt, fibnum); if (error != 0) { if (ifp != NULL) in6_ifstat_inc(ifp, ifs6_out_discard); goto bad; } } if (rt == NULL) { /* * If in6_selectroute() does not return a route entry, * dst may not have been updated. */ *dst = dst_sa; /* XXX */ } /* * then rt (for unicast) and ifp must be non-NULL valid values. */ if ((flags & IPV6_FORWARDING) == 0) { /* XXX: the FORWARDING flag can be set for mrouting. */ in6_ifstat_inc(ifp, ifs6_out_request); } if (rt != NULL) { ia = (struct in6_ifaddr *)(rt->rt_ifa); counter_u64_add(rt->rt_pksent, 1); } /* * The outgoing interface must be in the zone of source and * destination addresses. */ origifp = ifp; src0 = ip6->ip6_src; if (in6_setscope(&src0, origifp, &zone)) goto badscope; bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = ip6->ip6_src; if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id) goto badscope; dst0 = ip6->ip6_dst; if (in6_setscope(&dst0, origifp, &zone)) goto badscope; /* re-initialize to be sure */ bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) { goto badscope; } /* We should use ia_ifp to support the case of * sending packets to an address of our own. */ if (ia != NULL && ia->ia_ifp) ifp = ia->ia_ifp; /* scope check is done. */ goto routefound; badscope: IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(origifp, ifs6_out_discard); if (error == 0) error = EHOSTUNREACH; /* XXX */ goto bad; routefound: if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (opt && opt->ip6po_nextroute.ro_rt) { /* * The nexthop is explicitly specified by the * application. We assume the next hop is an IPv6 * address. */ dst = (struct sockaddr_in6 *)opt->ip6po_nexthop; } else if ((rt->rt_flags & RTF_GATEWAY)) dst = (struct sockaddr_in6 *)rt->rt_gateway; } if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ } else { m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; in6_ifstat_inc(ifp, ifs6_out_mcast); /* * Confirm that the outgoing interface supports multicast. */ if (!(ifp->if_flags & IFF_MULTICAST)) { IP6STAT_INC(ip6s_noroute); in6_ifstat_inc(ifp, ifs6_out_discard); error = ENETUNREACH; goto bad; } if ((im6o == NULL && in6_mcast_loop) || (im6o && im6o->im6o_multicast_loop)) { /* * Loop back multicast datagram if not expressly * forbidden to do so, even if we have not joined * the address; protocols will filter it later, * thus deferring a hash lookup and lock acquisition * at the expense of an m_copym(). */ ip6_mloopback(ifp, m); } else { /* * If we are acting as a multicast router, perform * multicast forwarding as if the packet had just * arrived on the interface to which we are about * to send. The multicast forwarding function * recursively calls this function, using the * IPV6_FORWARDING flag to prevent infinite recursion. * * Multicasts that are looped back by ip6_mloopback(), * above, will be forwarded by the ip6_input() routine, * if necessary. */ if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) { /* * XXX: ip6_mforward expects that rcvif is NULL * when it is called from the originating path. * However, it may not always be the case. */ m->m_pkthdr.rcvif = NULL; if (ip6_mforward(ip6, ifp, m) != 0) { m_freem(m); goto done; } } } /* * Multicasts with a hoplimit of zero may be looped back, * above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip6_mloopback() will * loop back a copy if this host actually belongs to the * destination group on the loopback interface. */ if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) { m_freem(m); goto done; } } /* * Fill the outgoing inteface to tell the upper layer * to increment per-interface statistics. */ if (ifpp) *ifpp = ifp; /* Determine path MTU. */ if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst, &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0) goto bad; /* * The caller of this function may specify to use the minimum MTU * in some cases. * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU * setting. The logic is a bit complicated; by default, unicast * packets will follow path MTU while multicast packets will be sent at * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets * including unicast ones will be sent at the minimum MTU. Multicast * packets will always be sent at the minimum MTU unless * IP6PO_MINMTU_DISABLE is explicitly specified. * See RFC 3542 for more details. */ if (mtu > IPV6_MMTU) { if ((flags & IPV6_MINMTU)) mtu = IPV6_MMTU; else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) mtu = IPV6_MMTU; else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && (opt == NULL || opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { mtu = IPV6_MMTU; } } /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); /* * If the outgoing packet contains a hop-by-hop options header, * it must be examined and processed even by the source node. * (RFC 2460, section 4.) */ if (exthdrs.ip6e_hbh) { struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); u_int32_t dummy; /* XXX unused */ u_int32_t plen = 0; /* XXX: ip6_process will check the value */ #ifdef DIAGNOSTIC if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) panic("ip6e_hbh is not contiguous"); #endif /* * XXX: if we have to send an ICMPv6 error to the sender, * we need the M_LOOP flag since icmp6_error() expects * the IPv6 and the hop-by-hop options header are * contiguous unless the flag is set. */ m->m_flags |= M_LOOP; m->m_pkthdr.rcvif = ifp; if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), &dummy, &plen) < 0) { /* m was already freed at this point */ error = EINVAL;/* better error? */ goto done; } m->m_flags &= ~M_LOOP; /* XXX */ m->m_pkthdr.rcvif = NULL; } /* Jump over all PFIL processing if hooks are not active. */ if (!PFIL_HOOKED(&V_inet6_pfil_hook)) goto passout; odst = ip6->ip6_dst; /* Run through list of hooks for output packets. */ error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp); if (error != 0 || m == NULL) goto done; /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); needfiblookup = 0; /* See if destination IP address was changed by packet filter. */ if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) { m->m_flags |= M_SKIP_FIREWALL; /* If destination is now ourself drop to ip6_input(). */ if (in6_localip(&ip6->ip6_dst)) { m->m_flags |= M_FASTFWD_OURS; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto done; } else { RO_RTFREE(ro); needfiblookup = 1; /* Redo the routing table lookup. */ if (ro->ro_lle) LLE_FREE(ro->ro_lle); /* zeros ro_lle */ ro->ro_lle = NULL; } } /* See if fib was changed by packet filter. */ if (fibnum != M_GETFIB(m)) { m->m_flags |= M_SKIP_FIREWALL; fibnum = M_GETFIB(m); RO_RTFREE(ro); needfiblookup = 1; if (ro->ro_lle) LLE_FREE(ro->ro_lle); /* zeros ro_lle */ ro->ro_lle = NULL; } if (needfiblookup) goto again; /* See if local, if yes, send it to netisr. */ if (m->m_flags & M_FASTFWD_OURS) { if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto done; } /* Or forward to some other address? */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { dst = (struct sockaddr_in6 *)&ro->ro_dst; bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6)); m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP6_NEXTHOP; m_tag_delete(m, fwd_tag); goto again; } passout: /* * Send the packet to the outgoing interface. * If necessary, do IPv6 fragmentation before sending. * * the logic here is rather complex: * 1: normal case (dontfrag == 0, alwaysfrag == 0) * 1-a: send as is if tlen <= path mtu * 1-b: fragment if tlen > path mtu * * 2: if user asks us not to fragment (dontfrag == 1) * 2-a: send as is if tlen <= interface mtu * 2-b: error if tlen > interface mtu * * 3: if we always need to attach fragment header (alwaysfrag == 1) * always fragment * * 4: if dontfrag == 1 && alwaysfrag == 1 * error, as we cannot handle this conflicting request */ sw_csum = m->m_pkthdr.csum_flags; if (!hdrsplit) { tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0; sw_csum &= ~ifp->if_hwassist; } else tso = 0; /* * If we added extension headers, we will not do TSO and calculate the * checksums ourselves for now. * XXX-BZ Need a framework to know when the NIC can handle it, even * with ext. hdrs. */ if (sw_csum & CSUM_DELAY_DATA_IPV6) { sw_csum &= ~CSUM_DELAY_DATA_IPV6; in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr)); } #ifdef SCTP if (sw_csum & CSUM_SCTP_IPV6) { sw_csum &= ~CSUM_SCTP_IPV6; sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); } #endif m->m_pkthdr.csum_flags &= ifp->if_hwassist; tlen = m->m_pkthdr.len; if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso) dontfrag = 1; else dontfrag = 0; if (dontfrag && alwaysfrag) { /* case 4 */ /* conflicting request - can't transmit */ error = EMSGSIZE; goto bad; } if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) { /* case 2-b */ /* * Even if the DONTFRAG option is specified, we cannot send the * packet when the data length is larger than the MTU of the * outgoing interface. * Notify the error by sending IPV6_PATHMTU ancillary data if * application wanted to know the MTU value. Also return an * error code (this is not described in the API spec). */ if (inp != NULL) ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu); error = EMSGSIZE; goto bad; } /* * transmit packet without fragmentation */ if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */ struct in6_ifaddr *ia6; ip6 = mtod(m, struct ip6_hdr *); ia6 = in6_ifawithifp(ifp, &ip6->ip6_src); if (ia6) { /* Record statistics for this interface address. */ counter_u64_add(ia6->ia_ifa.ifa_opackets, 1); counter_u64_add(ia6->ia_ifa.ifa_obytes, m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } #ifdef RATELIMIT if (inp != NULL) { if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) in_pcboutput_txrtlmt(inp, ifp, m); /* stamp send tag on mbuf */ m->m_pkthdr.snd_tag = inp->inp_snd_tag; } else { m->m_pkthdr.snd_tag = NULL; } #endif error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro); #ifdef RATELIMIT /* check for route change */ if (error == EAGAIN) in_pcboutput_eagain(inp); #endif goto done; } /* * try to fragment the packet. case 1-b and 3 */ if (mtu < IPV6_MMTU) { /* path MTU cannot be less than IPV6_MMTU */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else { u_char nextproto; /* * Too large for the destination or interface; * fragment if possible. * Must be able to put at least 8 bytes per fragment. */ hlen = unfragpartlen; if (mtu > IPV6_MAXPACKET) mtu = IPV6_MAXPACKET; len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; if (len < 8) { error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } /* * If the interface will not calculate checksums on * fragmented packets, then do it here. * XXX-BZ handle the hw offloading case. Need flags. */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { in6_delayed_cksum(m, plen, hlen); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { sctp_delayed_cksum(m, hlen); m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; } #endif /* * Change the next header field of the last header in the * unfragmentable part. */ if (exthdrs.ip6e_rthdr) { nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_dest1) { nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_hbh) { nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; } else { nextproto = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_FRAGMENT; } /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto * chain. */ m0 = m; id = htonl(ip6_randomid()); if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id))) goto sendorfree; in6_ifstat_inc(ifp, ifs6_out_fragok); } /* * Remove leading garbages. */ sendorfree: m = m0->m_nextpkt; m0->m_nextpkt = 0; m_freem(m0); for (m0 = m; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) { /* Record statistics for this interface address. */ if (ia) { counter_u64_add(ia->ia_ifa.ifa_opackets, 1); counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } #ifdef RATELIMIT if (inp != NULL) { if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) in_pcboutput_txrtlmt(inp, ifp, m); /* stamp send tag on mbuf */ m->m_pkthdr.snd_tag = inp->inp_snd_tag; } else { m->m_pkthdr.snd_tag = NULL; } #endif error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro); #ifdef RATELIMIT /* check for route change */ if (error == EAGAIN) in_pcboutput_eagain(inp); #endif } else m_freem(m); } if (error == 0) IP6STAT_INC(ip6s_fragmented); done: if (ro == &ip6route) RO_RTFREE(ro); return (error); freehdrs: m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ m_freem(exthdrs.ip6e_dest1); m_freem(exthdrs.ip6e_rthdr); m_freem(exthdrs.ip6e_dest2); /* FALLTHROUGH */ bad: if (m) m_freem(m); goto done; } static int ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen) { struct mbuf *m; if (hlen > MCLBYTES) return (ENOBUFS); /* XXX */ if (hlen > MLEN) m = m_getcl(M_NOWAIT, MT_DATA, 0); else m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_len = hlen; if (hdr) bcopy(hdr, mtod(m, caddr_t), hlen); *mp = m; return (0); } /* * Insert jumbo payload option. */ static int ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) { struct mbuf *mopt; u_char *optbuf; u_int32_t v; #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ /* * If there is no hop-by-hop options header, allocate new one. * If there is one but it doesn't have enough space to store the * jumbo payload option, allocate a cluster to store the whole options. * Otherwise, use it to store the options. */ if (exthdrs->ip6e_hbh == NULL) { mopt = m_get(M_NOWAIT, MT_DATA); if (mopt == NULL) return (ENOBUFS); mopt->m_len = JUMBOOPTLEN; optbuf = mtod(mopt, u_char *); optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ exthdrs->ip6e_hbh = mopt; } else { struct ip6_hbh *hbh; mopt = exthdrs->ip6e_hbh; if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { /* * XXX assumption: * - exthdrs->ip6e_hbh is not referenced from places * other than exthdrs. * - exthdrs->ip6e_hbh is not an mbuf chain. */ int oldoptlen = mopt->m_len; struct mbuf *n; /* * XXX: give up if the whole (new) hbh header does * not fit even in an mbuf cluster. */ if (oldoptlen + JUMBOOPTLEN > MCLBYTES) return (ENOBUFS); /* * As a consequence, we must always prepare a cluster * at this point. */ n = m_getcl(M_NOWAIT, MT_DATA, 0); if (n == NULL) return (ENOBUFS); n->m_len = oldoptlen + JUMBOOPTLEN; bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), oldoptlen); optbuf = mtod(n, caddr_t) + oldoptlen; m_freem(mopt); mopt = exthdrs->ip6e_hbh = n; } else { optbuf = mtod(mopt, u_char *) + mopt->m_len; mopt->m_len += JUMBOOPTLEN; } optbuf[0] = IP6OPT_PADN; optbuf[1] = 1; /* * Adjust the header length according to the pad and * the jumbo payload option. */ hbh = mtod(mopt, struct ip6_hbh *); hbh->ip6h_len += (JUMBOOPTLEN >> 3); } /* fill in the option. */ optbuf[2] = IP6OPT_JUMBO; optbuf[3] = 4; v = (u_int32_t)htonl(plen + JUMBOOPTLEN); bcopy(&v, &optbuf[4], sizeof(u_int32_t)); /* finally, adjust the packet header length */ exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; return (0); #undef JUMBOOPTLEN } /* * Insert fragment header and copy unfragmentable header portions. */ static int ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, struct ip6_frag **frghdrp) { struct mbuf *n, *mlast; if (hlen > sizeof(struct ip6_hdr)) { n = m_copym(m0, sizeof(struct ip6_hdr), hlen - sizeof(struct ip6_hdr), M_NOWAIT); if (n == NULL) return (ENOBUFS); m->m_next = n; } else n = m; /* Search for the last mbuf of unfragmentable part. */ for (mlast = n; mlast->m_next; mlast = mlast->m_next) ; if (M_WRITABLE(mlast) && M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { /* use the trailing space of the last mbuf for the fragment hdr */ *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len); mlast->m_len += sizeof(struct ip6_frag); m->m_pkthdr.len += sizeof(struct ip6_frag); } else { /* allocate a new mbuf for the fragment header */ struct mbuf *mfrg; mfrg = m_get(M_NOWAIT, MT_DATA); if (mfrg == NULL) return (ENOBUFS); mfrg->m_len = sizeof(struct ip6_frag); *frghdrp = mtod(mfrg, struct ip6_frag *); mlast->m_next = mfrg; } return (0); } /* * Calculates IPv6 path mtu for destination @dst. * Resulting MTU is stored in @mtup. * * Returns 0 on success. */ static int ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup) { struct nhop6_extended nh6; struct in6_addr kdst; uint32_t scopeid; struct ifnet *ifp; u_long mtu; int error; in6_splitscope(dst, &kdst, &scopeid); if (fib6_lookup_nh_ext(fibnum, &kdst, scopeid, NHR_REF, 0, &nh6) != 0) return (EHOSTUNREACH); ifp = nh6.nh_ifp; mtu = nh6.nh_mtu; error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL, 0); fib6_free_nh_ext(fibnum, &nh6); return (error); } /* * Calculates IPv6 path MTU for @dst based on transmit @ifp, * and cached data in @ro_pmtu. * MTU from (successful) route lookup is saved (along with dst) * inside @ro_pmtu to avoid subsequent route lookups after packet * filter processing. * * Stores mtu and always-frag value into @mtup and @alwaysfragp. * Returns 0 on success. */ static int ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup, struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup, int *alwaysfragp, u_int fibnum, u_int proto) { struct nhop6_basic nh6; struct in6_addr kdst; uint32_t scopeid; struct sockaddr_in6 *sa6_dst; u_long mtu; mtu = 0; if (do_lookup) { /* * Here ro_pmtu has final destination address, while * ro might represent immediate destination. * Use ro_pmtu destination since mtu might differ. */ sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst; if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst)) ro_pmtu->ro_mtu = 0; if (ro_pmtu->ro_mtu == 0) { bzero(sa6_dst, sizeof(*sa6_dst)); sa6_dst->sin6_family = AF_INET6; sa6_dst->sin6_len = sizeof(struct sockaddr_in6); sa6_dst->sin6_addr = *dst; in6_splitscope(dst, &kdst, &scopeid); if (fib6_lookup_nh_basic(fibnum, &kdst, scopeid, 0, 0, &nh6) == 0) ro_pmtu->ro_mtu = nh6.nh_mtu; } mtu = ro_pmtu->ro_mtu; } if (ro_pmtu->ro_rt) mtu = ro_pmtu->ro_rt->rt_mtu; return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto)); } /* * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and * hostcache data for @dst. * Stores mtu and always-frag value into @mtup and @alwaysfragp. * * Returns 0 on success. */ static int ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu, u_long *mtup, int *alwaysfragp, u_int proto) { u_long mtu = 0; int alwaysfrag = 0; int error = 0; if (rt_mtu > 0) { u_int32_t ifmtu; struct in_conninfo inc; bzero(&inc, sizeof(inc)); inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; ifmtu = IN6_LINKMTU(ifp); /* TCP is known to react to pmtu changes so skip hc */ if (proto != IPPROTO_TCP) mtu = tcp_hc_getmtu(&inc); if (mtu) mtu = min(mtu, rt_mtu); else mtu = rt_mtu; if (mtu == 0) mtu = ifmtu; else if (mtu < IPV6_MMTU) { /* * RFC2460 section 5, last paragraph: * if we record ICMPv6 too big message with * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU * or smaller, with framgent header attached. * (fragment header is needed regardless from the * packet size, for translators to identify packets) */ alwaysfrag = 1; mtu = IPV6_MMTU; } } else if (ifp) { mtu = IN6_LINKMTU(ifp); } else error = EHOSTUNREACH; /* XXX */ *mtup = mtu; if (alwaysfragp) *alwaysfragp = alwaysfrag; return (error); } /* * IP6 socket option processing. */ int ip6_ctloutput(struct socket *so, struct sockopt *sopt) { int optdatalen, uproto; void *optdata; struct inpcb *in6p = sotoinpcb(so); int error, optval; int level, op, optname; int optlen; struct thread *td; #ifdef RSS uint32_t rss_bucket; int retval; #endif /* * Don't use more than a quarter of mbuf clusters. N.B.: * nmbclusters is an int, but nmbclusters * MCLBYTES may overflow * on LP64 architectures, so cast to u_long to avoid undefined * behavior. ILP32 architectures cannot have nmbclusters * large enough to overflow for other reasons. */ #define IPV6_PKTOPTIONS_MBUF_LIMIT ((u_long)nmbclusters * MCLBYTES / 4) level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; td = sopt->sopt_td; error = 0; optval = 0; uproto = (int)so->so_proto->pr_protocol; if (level != IPPROTO_IPV6) { error = EINVAL; if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_dir == SOPT_SET) { switch (sopt->sopt_name) { case SO_REUSEADDR: INP_WLOCK(in6p); if ((so->so_options & SO_REUSEADDR) != 0) in6p->inp_flags2 |= INP_REUSEADDR; else in6p->inp_flags2 &= ~INP_REUSEADDR; INP_WUNLOCK(in6p); error = 0; break; case SO_REUSEPORT: INP_WLOCK(in6p); if ((so->so_options & SO_REUSEPORT) != 0) in6p->inp_flags2 |= INP_REUSEPORT; else in6p->inp_flags2 &= ~INP_REUSEPORT; INP_WUNLOCK(in6p); error = 0; break; case SO_SETFIB: INP_WLOCK(in6p); in6p->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(in6p); error = 0; break; case SO_MAX_PACING_RATE: #ifdef RATELIMIT INP_WLOCK(in6p); in6p->inp_flags2 |= INP_RATE_LIMIT_CHANGED; INP_WUNLOCK(in6p); error = 0; #else error = EOPNOTSUPP; #endif break; default: break; } } } else { /* level == IPPROTO_IPV6 */ switch (op) { case SOPT_SET: switch (optname) { case IPV6_2292PKTOPTIONS: #ifdef IPV6_PKTOPTIONS case IPV6_PKTOPTIONS: #endif { struct mbuf *m; if (optlen > IPV6_PKTOPTIONS_MBUF_LIMIT) { printf("ip6_ctloutput: mbuf limit hit\n"); error = ENOBUFS; break; } error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; error = ip6_pcbopts(&in6p->in6p_outputopts, m, so, sopt); m_freem(m); /* XXX */ break; } /* * Use of some Hop-by-Hop options or some * Destination options, might require special * privilege. That is, normal applications * (without special privilege) might be forbidden * from setting certain options in outgoing packets, * and might never see certain options in received * packets. [RFC 2292 Section 6] * KAME specific note: * KAME prevents non-privileged users from sending or * receiving ANY hbh/dst options in order to avoid * overhead of parsing options in the kernel. */ case IPV6_RECVHOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_RECVRTHDRDSTOPTS: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) break; } /* FALLTHROUGH */ case IPV6_UNICAST_HOPS: case IPV6_HOPLIMIT: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: case IPV6_RECVTCLASS: case IPV6_RECVFLOWID: #ifdef RSS case IPV6_RECVRSSBUCKETID: #endif case IPV6_V6ONLY: case IPV6_AUTOFLOWLABEL: case IPV6_BINDANY: case IPV6_BINDMULTI: #ifdef RSS case IPV6_RSS_LISTEN_BUCKET: #endif if (optname == IPV6_BINDANY && td != NULL) { error = priv_check(td, PRIV_NETINET_BINDANY); if (error) break; } if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_UNICAST_HOPS: if (optval < -1 || optval >= 256) error = EINVAL; else { /* -1 = kernel default */ in6p->in6p_hops = optval; if ((in6p->inp_vflag & INP_IPV4) != 0) in6p->inp_ip_ttl = optval; } break; #define OPTSET(bit) \ do { \ INP_WLOCK(in6p); \ if (optval) \ in6p->inp_flags |= (bit); \ else \ in6p->inp_flags &= ~(bit); \ INP_WUNLOCK(in6p); \ } while (/*CONSTCOND*/ 0) #define OPTSET2292(bit) \ do { \ INP_WLOCK(in6p); \ in6p->inp_flags |= IN6P_RFC2292; \ if (optval) \ in6p->inp_flags |= (bit); \ else \ in6p->inp_flags &= ~(bit); \ INP_WUNLOCK(in6p); \ } while (/*CONSTCOND*/ 0) #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) #define OPTSET2(bit, val) do { \ INP_WLOCK(in6p); \ if (val) \ in6p->inp_flags2 |= bit; \ else \ in6p->inp_flags2 &= ~bit; \ INP_WUNLOCK(in6p); \ } while (0) #define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0) case IPV6_RECVPKTINFO: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: { struct ip6_pktopts **optp; /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } optp = &in6p->in6p_outputopts; error = ip6_pcbopt(IPV6_HOPLIMIT, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } case IPV6_RECVHOPLIMIT: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_HOPLIMIT); break; case IPV6_RECVHOPOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_HOPOPTS); break; case IPV6_RECVDSTOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_DSTOPTS); break; case IPV6_RECVRTHDRDSTOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_RTHDRDSTOPTS); break; case IPV6_RECVRTHDR: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_RTHDR); break; case IPV6_RECVPATHMTU: /* * We ignore this option for TCP * sockets. * (RFC3542 leaves this case * unspecified.) */ if (uproto != IPPROTO_TCP) OPTSET(IN6P_MTU); break; case IPV6_RECVFLOWID: OPTSET2(INP_RECVFLOWID, optval); break; #ifdef RSS case IPV6_RECVRSSBUCKETID: OPTSET2(INP_RECVRSSBUCKETID, optval); break; #endif case IPV6_V6ONLY: /* * make setsockopt(IPV6_V6ONLY) * available only prior to bind(2). * see ipng mailing list, Jun 22 2001. */ if (in6p->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { error = EINVAL; break; } OPTSET(IN6P_IPV6_V6ONLY); if (optval) in6p->inp_vflag &= ~INP_IPV4; else in6p->inp_vflag |= INP_IPV4; break; case IPV6_RECVTCLASS: /* cannot mix with RFC2292 XXX */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_TCLASS); break; case IPV6_AUTOFLOWLABEL: OPTSET(IN6P_AUTOFLOWLABEL); break; case IPV6_BINDANY: OPTSET(INP_BINDANY); break; case IPV6_BINDMULTI: OPTSET2(INP_BINDMULTI, optval); break; #ifdef RSS case IPV6_RSS_LISTEN_BUCKET: if ((optval >= 0) && (optval < rss_getnumbuckets())) { in6p->inp_rss_listen_bucket = optval; OPTSET2(INP_RSS_BUCKET_SET, 1); } else { error = EINVAL; } break; #endif } break; case IPV6_TCLASS: case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: if (optlen != sizeof(optval)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; { struct ip6_pktopts **optp; optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292HOPOPTS: case IPV6_2292DSTOPTS: case IPV6_2292RTHDR: /* RFC 2292 */ if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_2292PKTINFO: OPTSET2292(IN6P_PKTINFO); break; case IPV6_2292HOPLIMIT: OPTSET2292(IN6P_HOPLIMIT); break; case IPV6_2292HOPOPTS: /* * Check super-user privilege. * See comments for IPV6_RECVHOPOPTS. */ if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) return (error); } OPTSET2292(IN6P_HOPOPTS); break; case IPV6_2292DSTOPTS: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) return (error); } OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ break; case IPV6_2292RTHDR: OPTSET2292(IN6P_RTHDR); break; } break; case IPV6_PKTINFO: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_NEXTHOP: { /* new advanced API (RFC3542) */ u_char *optbuf; u_char optbuf_storage[MCLBYTES]; int optlen; struct ip6_pktopts **optp; /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } /* * We only ensure valsize is not too large * here. Further validation will be done * later. */ error = sooptcopyin(sopt, optbuf_storage, sizeof(optbuf_storage), 0); if (error) break; optlen = sopt->sopt_valsize; optbuf = optbuf_storage; optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, optbuf, optlen, optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } #undef OPTSET case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_JOIN_GROUP: case IPV6_LEAVE_GROUP: case IPV6_MSFILTER: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: error = ip6_setmoptions(in6p, sopt); break; case IPV6_PORTRANGE: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; INP_WLOCK(in6p); switch (optval) { case IPV6_PORTRANGE_DEFAULT: in6p->inp_flags &= ~(INP_LOWPORT); in6p->inp_flags &= ~(INP_HIGHPORT); break; case IPV6_PORTRANGE_HIGH: in6p->inp_flags &= ~(INP_LOWPORT); in6p->inp_flags |= INP_HIGHPORT; break; case IPV6_PORTRANGE_LOW: in6p->inp_flags &= ~(INP_HIGHPORT); in6p->inp_flags |= INP_LOWPORT; break; default: error = EINVAL; break; } INP_WUNLOCK(in6p); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: - { - caddr_t req; - struct mbuf *m; - - if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ + if (IPSEC_ENABLED(ipv6)) { + error = IPSEC_PCBCTL(ipv6, in6p, sopt); break; - if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ - break; - req = mtod(m, caddr_t); - error = ipsec_set_policy(in6p, optname, req, - m->m_len, (sopt->sopt_td != NULL) ? - sopt->sopt_td->td_ucred : NULL); - m_freem(m); - break; - } + } + /* FALLTHROUGH */ #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (optname) { case IPV6_2292PKTOPTIONS: #ifdef IPV6_PKTOPTIONS case IPV6_PKTOPTIONS: #endif /* * RFC3542 (effectively) deprecated the * semantics of the 2292-style pktoptions. * Since it was not reliable in nature (i.e., * applications had to expect the lack of some * information after all), it would make sense * to simplify this part by always returning * empty data. */ sopt->sopt_valsize = 0; break; case IPV6_RECVHOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_RECVRTHDRDSTOPTS: case IPV6_UNICAST_HOPS: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: case IPV6_V6ONLY: case IPV6_PORTRANGE: case IPV6_RECVTCLASS: case IPV6_AUTOFLOWLABEL: case IPV6_BINDANY: case IPV6_FLOWID: case IPV6_FLOWTYPE: case IPV6_RECVFLOWID: #ifdef RSS case IPV6_RSSBUCKETID: case IPV6_RECVRSSBUCKETID: #endif case IPV6_BINDMULTI: switch (optname) { case IPV6_RECVHOPOPTS: optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_RECVDSTOPTS: optval = OPTBIT(IN6P_DSTOPTS); break; case IPV6_RECVRTHDRDSTOPTS: optval = OPTBIT(IN6P_RTHDRDSTOPTS); break; case IPV6_UNICAST_HOPS: optval = in6p->in6p_hops; break; case IPV6_RECVPKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_RECVHOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_RECVRTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_RECVPATHMTU: optval = OPTBIT(IN6P_MTU); break; case IPV6_V6ONLY: optval = OPTBIT(IN6P_IPV6_V6ONLY); break; case IPV6_PORTRANGE: { int flags; flags = in6p->inp_flags; if (flags & INP_HIGHPORT) optval = IPV6_PORTRANGE_HIGH; else if (flags & INP_LOWPORT) optval = IPV6_PORTRANGE_LOW; else optval = 0; break; } case IPV6_RECVTCLASS: optval = OPTBIT(IN6P_TCLASS); break; case IPV6_AUTOFLOWLABEL: optval = OPTBIT(IN6P_AUTOFLOWLABEL); break; case IPV6_BINDANY: optval = OPTBIT(INP_BINDANY); break; case IPV6_FLOWID: optval = in6p->inp_flowid; break; case IPV6_FLOWTYPE: optval = in6p->inp_flowtype; break; case IPV6_RECVFLOWID: optval = OPTBIT2(INP_RECVFLOWID); break; #ifdef RSS case IPV6_RSSBUCKETID: retval = rss_hash2bucket(in6p->inp_flowid, in6p->inp_flowtype, &rss_bucket); if (retval == 0) optval = rss_bucket; else error = EINVAL; break; case IPV6_RECVRSSBUCKETID: optval = OPTBIT2(INP_RECVRSSBUCKETID); break; #endif case IPV6_BINDMULTI: optval = OPTBIT2(INP_BINDMULTI); break; } if (error) break; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PATHMTU: { u_long pmtu = 0; struct ip6_mtuinfo mtuinfo; if (!(so->so_state & SS_ISCONNECTED)) return (ENOTCONN); /* * XXX: we dot not consider the case of source * routing, or optional information to specify * the outgoing interface. */ error = ip6_getpmtu_ctl(so->so_fibnum, &in6p->in6p_faddr, &pmtu); if (error) break; if (pmtu > IPV6_MAXPACKET) pmtu = IPV6_MAXPACKET; bzero(&mtuinfo, sizeof(mtuinfo)); mtuinfo.ip6m_mtu = (u_int32_t)pmtu; optdata = (void *)&mtuinfo; optdatalen = sizeof(mtuinfo); error = sooptcopyout(sopt, optdata, optdatalen); break; } case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292HOPOPTS: case IPV6_2292RTHDR: case IPV6_2292DSTOPTS: switch (optname) { case IPV6_2292PKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_2292HOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_2292HOPOPTS: optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_2292RTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_2292DSTOPTS: optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); break; } error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PKTINFO: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_NEXTHOP: case IPV6_TCLASS: case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: error = ip6_getpcbopt(in6p->in6p_outputopts, optname, sopt); break; case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_MSFILTER: error = ip6_getmoptions(in6p, sopt); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: - { - caddr_t req = NULL; - size_t len = 0; - struct mbuf *m = NULL; - struct mbuf **mp = &m; - size_t ovalsize = sopt->sopt_valsize; - caddr_t oval = (caddr_t)sopt->sopt_val; - - error = soopt_getm(sopt, &m); /* XXX */ - if (error != 0) - break; - error = soopt_mcopyin(sopt, m); /* XXX */ - if (error != 0) + if (IPSEC_ENABLED(ipv6)) { + error = IPSEC_PCBCTL(ipv6, in6p, sopt); break; - sopt->sopt_valsize = ovalsize; - sopt->sopt_val = oval; - if (m) { - req = mtod(m, caddr_t); - len = m->m_len; } - error = ipsec_get_policy(in6p, req, len, mp); - if (error == 0) - error = soopt_mcopyout(sopt, m); /* XXX */ - if (error == 0 && m) - m_freem(m); - break; - } + /* FALLTHROUGH */ #endif /* IPSEC */ - default: error = ENOPROTOOPT; break; } break; } } return (error); } int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) { int error = 0, optval, optlen; const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum); struct inpcb *in6p = sotoinpcb(so); int level, op, optname; level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; if (level != IPPROTO_IPV6) { return (EINVAL); } switch (optname) { case IPV6_CHECKSUM: /* * For ICMPv6 sockets, no modification allowed for checksum * offset, permit "no change" values to help existing apps. * * RFC3542 says: "An attempt to set IPV6_CHECKSUM * for an ICMPv6 socket will fail." * The current behavior does not meet RFC3542. */ switch (op) { case SOPT_SET: if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (error) break; if ((optval % 2) != 0) { /* the API assumes even offset values */ error = EINVAL; } else if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (optval != icmp6off) error = EINVAL; } else in6p->in6p_cksum = optval; break; case SOPT_GET: if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) optval = icmp6off; else optval = in6p->in6p_cksum; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; default: error = EINVAL; break; } break; default: error = ENOPROTOOPT; break; } return (error); } /* * Set up IP6 options in pcb for insertion in output packets or * specifying behavior of outgoing packets. */ static int ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so, struct sockopt *sopt) { struct ip6_pktopts *opt = *pktopt; int error = 0; struct thread *td = sopt->sopt_td; /* turn off any old options. */ if (opt) { #ifdef DIAGNOSTIC if (opt->ip6po_pktinfo || opt->ip6po_nexthop || opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 || opt->ip6po_rhinfo.ip6po_rhi_rthdr) printf("ip6_pcbopts: all specified options are cleared.\n"); #endif ip6_clearpktopts(opt, -1); } else opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK); *pktopt = NULL; if (!m || m->m_len == 0) { /* * Only turning off any previous options, regardless of * whether the opt is just created or given. */ free(opt, M_IP6OPT); return (0); } /* set options specified by user. */ if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ? td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) { ip6_clearpktopts(opt, -1); /* XXX: discard all options */ free(opt, M_IP6OPT); return (error); } *pktopt = opt; return (0); } /* * initialize ip6_pktopts. beware that there are non-zero default values in * the struct. */ void ip6_initpktopts(struct ip6_pktopts *opt) { bzero(opt, sizeof(*opt)); opt->ip6po_hlim = -1; /* -1 means default hop limit */ opt->ip6po_tclass = -1; /* -1 means default traffic class */ opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY; opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM; } static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, struct ucred *cred, int uproto) { struct ip6_pktopts *opt; if (*pktopt == NULL) { *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT, M_WAITOK); ip6_initpktopts(*pktopt); } opt = *pktopt; return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto)); } static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) { void *optdata = NULL; int optdatalen = 0; struct ip6_ext *ip6e; int error = 0; struct in6_pktinfo null_pktinfo; int deftclass = 0, on; int defminmtu = IP6PO_MINMTU_MCASTONLY; int defpreftemp = IP6PO_TEMPADDR_SYSTEM; switch (optname) { case IPV6_PKTINFO: optdata = (void *)&null_pktinfo; if (pktopt && pktopt->ip6po_pktinfo) { bcopy(pktopt->ip6po_pktinfo, &null_pktinfo, sizeof(null_pktinfo)); in6_clearscope(&null_pktinfo.ipi6_addr); } else { /* XXX: we don't have to do this every time... */ bzero(&null_pktinfo, sizeof(null_pktinfo)); } optdatalen = sizeof(struct in6_pktinfo); break; case IPV6_TCLASS: if (pktopt && pktopt->ip6po_tclass >= 0) optdata = (void *)&pktopt->ip6po_tclass; else optdata = (void *)&deftclass; optdatalen = sizeof(int); break; case IPV6_HOPOPTS: if (pktopt && pktopt->ip6po_hbh) { optdata = (void *)pktopt->ip6po_hbh; ip6e = (struct ip6_ext *)pktopt->ip6po_hbh; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_RTHDR: if (pktopt && pktopt->ip6po_rthdr) { optdata = (void *)pktopt->ip6po_rthdr; ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_RTHDRDSTOPTS: if (pktopt && pktopt->ip6po_dest1) { optdata = (void *)pktopt->ip6po_dest1; ip6e = (struct ip6_ext *)pktopt->ip6po_dest1; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_DSTOPTS: if (pktopt && pktopt->ip6po_dest2) { optdata = (void *)pktopt->ip6po_dest2; ip6e = (struct ip6_ext *)pktopt->ip6po_dest2; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_NEXTHOP: if (pktopt && pktopt->ip6po_nexthop) { optdata = (void *)pktopt->ip6po_nexthop; optdatalen = pktopt->ip6po_nexthop->sa_len; } break; case IPV6_USE_MIN_MTU: if (pktopt) optdata = (void *)&pktopt->ip6po_minmtu; else optdata = (void *)&defminmtu; optdatalen = sizeof(int); break; case IPV6_DONTFRAG: if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) on = 1; else on = 0; optdata = (void *)&on; optdatalen = sizeof(on); break; case IPV6_PREFER_TEMPADDR: if (pktopt) optdata = (void *)&pktopt->ip6po_prefer_tempaddr; else optdata = (void *)&defpreftemp; optdatalen = sizeof(int); break; default: /* should not happen */ #ifdef DIAGNOSTIC panic("ip6_getpcbopt: unexpected option\n"); #endif return (ENOPROTOOPT); } error = sooptcopyout(sopt, optdata, optdatalen); return (error); } void ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) { if (pktopt == NULL) return; if (optname == -1 || optname == IPV6_PKTINFO) { if (pktopt->ip6po_pktinfo) free(pktopt->ip6po_pktinfo, M_IP6OPT); pktopt->ip6po_pktinfo = NULL; } if (optname == -1 || optname == IPV6_HOPLIMIT) pktopt->ip6po_hlim = -1; if (optname == -1 || optname == IPV6_TCLASS) pktopt->ip6po_tclass = -1; if (optname == -1 || optname == IPV6_NEXTHOP) { if (pktopt->ip6po_nextroute.ro_rt) { RTFREE(pktopt->ip6po_nextroute.ro_rt); pktopt->ip6po_nextroute.ro_rt = NULL; } if (pktopt->ip6po_nexthop) free(pktopt->ip6po_nexthop, M_IP6OPT); pktopt->ip6po_nexthop = NULL; } if (optname == -1 || optname == IPV6_HOPOPTS) { if (pktopt->ip6po_hbh) free(pktopt->ip6po_hbh, M_IP6OPT); pktopt->ip6po_hbh = NULL; } if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) { if (pktopt->ip6po_dest1) free(pktopt->ip6po_dest1, M_IP6OPT); pktopt->ip6po_dest1 = NULL; } if (optname == -1 || optname == IPV6_RTHDR) { if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; if (pktopt->ip6po_route.ro_rt) { RTFREE(pktopt->ip6po_route.ro_rt); pktopt->ip6po_route.ro_rt = NULL; } } if (optname == -1 || optname == IPV6_DSTOPTS) { if (pktopt->ip6po_dest2) free(pktopt->ip6po_dest2, M_IP6OPT); pktopt->ip6po_dest2 = NULL; } } #define PKTOPT_EXTHDRCPY(type) \ do {\ if (src->type) {\ int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ dst->type = malloc(hlen, M_IP6OPT, canwait);\ if (dst->type == NULL && canwait == M_NOWAIT)\ goto bad;\ bcopy(src->type, dst->type, hlen);\ }\ } while (/*CONSTCOND*/ 0) static int copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait) { if (dst == NULL || src == NULL) { printf("ip6_clearpktopts: invalid argument\n"); return (EINVAL); } dst->ip6po_hlim = src->ip6po_hlim; dst->ip6po_tclass = src->ip6po_tclass; dst->ip6po_flags = src->ip6po_flags; dst->ip6po_minmtu = src->ip6po_minmtu; dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr; if (src->ip6po_pktinfo) { dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo), M_IP6OPT, canwait); if (dst->ip6po_pktinfo == NULL) goto bad; *dst->ip6po_pktinfo = *src->ip6po_pktinfo; } if (src->ip6po_nexthop) { dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len, M_IP6OPT, canwait); if (dst->ip6po_nexthop == NULL) goto bad; bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, src->ip6po_nexthop->sa_len); } PKTOPT_EXTHDRCPY(ip6po_hbh); PKTOPT_EXTHDRCPY(ip6po_dest1); PKTOPT_EXTHDRCPY(ip6po_dest2); PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ return (0); bad: ip6_clearpktopts(dst, -1); return (ENOBUFS); } #undef PKTOPT_EXTHDRCPY struct ip6_pktopts * ip6_copypktopts(struct ip6_pktopts *src, int canwait) { int error; struct ip6_pktopts *dst; dst = malloc(sizeof(*dst), M_IP6OPT, canwait); if (dst == NULL) return (NULL); ip6_initpktopts(dst); if ((error = copypktopts(dst, src, canwait)) != 0) { free(dst, M_IP6OPT); return (NULL); } return (dst); } void ip6_freepcbopts(struct ip6_pktopts *pktopt) { if (pktopt == NULL) return; ip6_clearpktopts(pktopt, -1); free(pktopt, M_IP6OPT); } /* * Set IPv6 outgoing packet options based on advanced API. */ int ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto) { struct cmsghdr *cm = NULL; if (control == NULL || opt == NULL) return (EINVAL); ip6_initpktopts(opt); if (stickyopt) { int error; /* * If stickyopt is provided, make a local copy of the options * for this particular packet, then override them by ancillary * objects. * XXX: copypktopts() does not copy the cached route to a next * hop (if any). This is not very good in terms of efficiency, * but we can allow this since this option should be rarely * used. */ if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) return (error); } /* * XXX: Currently, we assume all the optional information is stored * in a single mbuf. */ if (control->m_next) return (EINVAL); for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len), control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { int error; if (control->m_len < CMSG_LEN(0)) return (EINVAL); cm = mtod(control, struct cmsghdr *); if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) return (EINVAL); if (cm->cmsg_level != IPPROTO_IPV6) continue; error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm), cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto); if (error) return (error); } return (0); } /* * Set a particular packet option, as a sticky option or an ancillary data * item. "len" can be 0 only when it's a sticky option. * We have 4 cases of combination of "sticky" and "cmsg": * "sticky=0, cmsg=0": impossible * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data * "sticky=1, cmsg=0": RFC3542 socket option * "sticky=1, cmsg=1": RFC2292 socket option */ static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, struct ucred *cred, int sticky, int cmsg, int uproto) { int minmtupolicy, preftemp; int error; if (!sticky && !cmsg) { #ifdef DIAGNOSTIC printf("ip6_setpktopt: impossible case\n"); #endif return (EINVAL); } /* * IPV6_2292xxx is for backward compatibility to RFC2292, and should * not be specified in the context of RFC3542. Conversely, * RFC3542 types should not be specified in the context of RFC2292. */ if (!cmsg) { switch (optname) { case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292NEXTHOP: case IPV6_2292HOPOPTS: case IPV6_2292DSTOPTS: case IPV6_2292RTHDR: case IPV6_2292PKTOPTIONS: return (ENOPROTOOPT); } } if (sticky && cmsg) { switch (optname) { case IPV6_PKTINFO: case IPV6_HOPLIMIT: case IPV6_NEXTHOP: case IPV6_HOPOPTS: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_USE_MIN_MTU: case IPV6_DONTFRAG: case IPV6_TCLASS: case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */ return (ENOPROTOOPT); } } switch (optname) { case IPV6_2292PKTINFO: case IPV6_PKTINFO: { struct ifnet *ifp = NULL; struct in6_pktinfo *pktinfo; if (len != sizeof(struct in6_pktinfo)) return (EINVAL); pktinfo = (struct in6_pktinfo *)buf; /* * An application can clear any sticky IPV6_PKTINFO option by * doing a "regular" setsockopt with ipi6_addr being * in6addr_any and ipi6_ifindex being zero. * [RFC 3542, Section 6] */ if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo && pktinfo->ipi6_ifindex == 0 && IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { ip6_clearpktopts(opt, optname); break; } if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO && sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { return (EINVAL); } if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr)) return (EINVAL); /* validate the interface index if specified. */ if (pktinfo->ipi6_ifindex > V_if_index) return (ENXIO); if (pktinfo->ipi6_ifindex) { ifp = ifnet_byindex(pktinfo->ipi6_ifindex); if (ifp == NULL) return (ENXIO); } if (ifp != NULL && (ifp->if_afdata[AF_INET6] == NULL || (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0)) return (ENETDOWN); if (ifp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { struct in6_ifaddr *ia; in6_setscope(&pktinfo->ipi6_addr, ifp, NULL); ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr); if (ia == NULL) return (EADDRNOTAVAIL); ifa_free(&ia->ia_ifa); } /* * We store the address anyway, and let in6_selectsrc() * validate the specified address. This is because ipi6_addr * may not have enough information about its scope zone, and * we may need additional information (such as outgoing * interface or the scope zone of a destination address) to * disambiguate the scope. * XXX: the delay of the validation may confuse the * application when it is used as a sticky option. */ if (opt->ip6po_pktinfo == NULL) { opt->ip6po_pktinfo = malloc(sizeof(*pktinfo), M_IP6OPT, M_NOWAIT); if (opt->ip6po_pktinfo == NULL) return (ENOBUFS); } bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo)); break; } case IPV6_2292HOPLIMIT: case IPV6_HOPLIMIT: { int *hlimp; /* * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT * to simplify the ordering among hoplimit options. */ if (optname == IPV6_HOPLIMIT && sticky) return (ENOPROTOOPT); if (len != sizeof(int)) return (EINVAL); hlimp = (int *)buf; if (*hlimp < -1 || *hlimp > 255) return (EINVAL); opt->ip6po_hlim = *hlimp; break; } case IPV6_TCLASS: { int tclass; if (len != sizeof(int)) return (EINVAL); tclass = *(int *)buf; if (tclass < -1 || tclass > 255) return (EINVAL); opt->ip6po_tclass = tclass; break; } case IPV6_2292NEXTHOP: case IPV6_NEXTHOP: if (cred != NULL) { error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { /* just remove the option */ ip6_clearpktopts(opt, IPV6_NEXTHOP); break; } /* check if cmsg_len is large enough for sa_len */ if (len < sizeof(struct sockaddr) || len < *buf) return (EINVAL); switch (((struct sockaddr *)buf)->sa_family) { case AF_INET6: { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf; int error; if (sa6->sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) || IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) { return (EINVAL); } if ((error = sa6_embedscope(sa6, V_ip6_use_defzone)) != 0) { return (error); } break; } case AF_LINK: /* should eventually be supported */ default: return (EAFNOSUPPORT); } /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, IPV6_NEXTHOP); opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT); if (opt->ip6po_nexthop == NULL) return (ENOBUFS); bcopy(buf, opt->ip6po_nexthop, *buf); break; case IPV6_2292HOPOPTS: case IPV6_HOPOPTS: { struct ip6_hbh *hbh; int hbhlen; /* * XXX: We don't allow a non-privileged user to set ANY HbH * options, since per-option restriction has too much * overhead. */ if (cred != NULL) { error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { ip6_clearpktopts(opt, IPV6_HOPOPTS); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_hbh)) return (EINVAL); hbh = (struct ip6_hbh *)buf; hbhlen = (hbh->ip6h_len + 1) << 3; if (len != hbhlen) return (EINVAL); /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, IPV6_HOPOPTS); opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT); if (opt->ip6po_hbh == NULL) return (ENOBUFS); bcopy(hbh, opt->ip6po_hbh, hbhlen); break; } case IPV6_2292DSTOPTS: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: { struct ip6_dest *dest, **newdest = NULL; int destlen; if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */ error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { ip6_clearpktopts(opt, optname); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_dest)) return (EINVAL); dest = (struct ip6_dest *)buf; destlen = (dest->ip6d_len + 1) << 3; if (len != destlen) return (EINVAL); /* * Determine the position that the destination options header * should be inserted; before or after the routing header. */ switch (optname) { case IPV6_2292DSTOPTS: /* * The old advacned API is ambiguous on this point. * Our approach is to determine the position based * according to the existence of a routing header. * Note, however, that this depends on the order of the * extension headers in the ancillary data; the 1st * part of the destination options header must appear * before the routing header in the ancillary data, * too. * RFC3542 solved the ambiguity by introducing * separate ancillary data or option types. */ if (opt->ip6po_rthdr == NULL) newdest = &opt->ip6po_dest1; else newdest = &opt->ip6po_dest2; break; case IPV6_RTHDRDSTOPTS: newdest = &opt->ip6po_dest1; break; case IPV6_DSTOPTS: newdest = &opt->ip6po_dest2; break; } /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, optname); *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT); if (*newdest == NULL) return (ENOBUFS); bcopy(dest, *newdest, destlen); break; } case IPV6_2292RTHDR: case IPV6_RTHDR: { struct ip6_rthdr *rth; int rthlen; if (len == 0) { ip6_clearpktopts(opt, IPV6_RTHDR); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_rthdr)) return (EINVAL); rth = (struct ip6_rthdr *)buf; rthlen = (rth->ip6r_len + 1) << 3; if (len != rthlen) return (EINVAL); switch (rth->ip6r_type) { case IPV6_RTHDR_TYPE_0: if (rth->ip6r_len == 0) /* must contain one addr */ return (EINVAL); if (rth->ip6r_len % 2) /* length must be even */ return (EINVAL); if (rth->ip6r_len / 2 != rth->ip6r_segleft) return (EINVAL); break; default: return (EINVAL); /* not supported */ } /* turn off the previous option */ ip6_clearpktopts(opt, IPV6_RTHDR); opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT); if (opt->ip6po_rthdr == NULL) return (ENOBUFS); bcopy(rth, opt->ip6po_rthdr, rthlen); break; } case IPV6_USE_MIN_MTU: if (len != sizeof(int)) return (EINVAL); minmtupolicy = *(int *)buf; if (minmtupolicy != IP6PO_MINMTU_MCASTONLY && minmtupolicy != IP6PO_MINMTU_DISABLE && minmtupolicy != IP6PO_MINMTU_ALL) { return (EINVAL); } opt->ip6po_minmtu = minmtupolicy; break; case IPV6_DONTFRAG: if (len != sizeof(int)) return (EINVAL); if (uproto == IPPROTO_TCP || *(int *)buf == 0) { /* * we ignore this option for TCP sockets. * (RFC3542 leaves this case unspecified.) */ opt->ip6po_flags &= ~IP6PO_DONTFRAG; } else opt->ip6po_flags |= IP6PO_DONTFRAG; break; case IPV6_PREFER_TEMPADDR: if (len != sizeof(int)) return (EINVAL); preftemp = *(int *)buf; if (preftemp != IP6PO_TEMPADDR_SYSTEM && preftemp != IP6PO_TEMPADDR_NOTPREFER && preftemp != IP6PO_TEMPADDR_PREFER) { return (EINVAL); } opt->ip6po_prefer_tempaddr = preftemp; break; default: return (ENOPROTOOPT); } /* end of switch */ return (0); } /* * Routine called from ip6_output() to loop back a copy of an IP6 multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be &loif -- easier than replicating that code here. */ void ip6_mloopback(struct ifnet *ifp, struct mbuf *m) { struct mbuf *copym; struct ip6_hdr *ip6; copym = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (copym == NULL) return; /* * Make sure to deep-copy IPv6 header portion in case the data * is in an mbuf cluster, so that we can safely override the IPv6 * header portion later. */ if (!M_WRITABLE(copym) || copym->m_len < sizeof(struct ip6_hdr)) { copym = m_pullup(copym, sizeof(struct ip6_hdr)); if (copym == NULL) return; } ip6 = mtod(copym, struct ip6_hdr *); /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; copym->m_pkthdr.csum_data = 0xffff; } if_simloop(ifp, copym, AF_INET6, 0); } /* * Chop IPv6 header off from the payload. */ static int ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) { struct mbuf *mh; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); if (m->m_len > sizeof(*ip6)) { mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { m_freem(m); return ENOBUFS; } m_move_pkthdr(mh, m); M_ALIGN(mh, sizeof(*ip6)); m->m_len -= sizeof(*ip6); m->m_data += sizeof(*ip6); mh->m_next = m; m = mh; m->m_len = sizeof(*ip6); bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); } exthdrs->ip6e_ip6 = m; return 0; } /* * Compute IPv6 extension header length. */ int ip6_optlen(struct inpcb *in6p) { int len; if (!in6p->in6p_outputopts) return 0; len = 0; #define elen(x) \ (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) len += elen(in6p->in6p_outputopts->ip6po_hbh); if (in6p->in6p_outputopts->ip6po_rthdr) /* dest1 is valid with rthdr only */ len += elen(in6p->in6p_outputopts->ip6po_dest1); len += elen(in6p->in6p_outputopts->ip6po_rthdr); len += elen(in6p->in6p_outputopts->ip6po_dest2); return len; #undef elen } diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index e827057c31c8..3ffadef2426b 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -1,905 +1,907 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ipsec.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ +#include #include #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) /* * Raw interface to IP6 protocol. */ VNET_DECLARE(struct inpcbhead, ripcb); VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcb VNET(ripcb) #define V_ripcbinfo VNET(ripcbinfo) extern u_long rip_sendspace; extern u_long rip_recvspace; VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat); VNET_PCPUSTAT_SYSINIT(rip6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(rip6stat); #endif /* VIMAGE */ /* * Hooks for multicast routing. They all default to NULL, so leave them not * initialized and rely on BSS being set to 0. */ /* * The socket used to communicate with the multicast routing daemon. */ VNET_DEFINE(struct socket *, ip6_mrouter); /* * The various mrouter functions. */ int (*ip6_mrouter_set)(struct socket *, struct sockopt *); int (*ip6_mrouter_get)(struct socket *, struct sockopt *); int (*ip6_mrouter_done)(void); int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *); int (*mrt6_ioctl)(u_long, caddr_t); /* * Setup generic address and protocol structures for raw_input routine, then * pass them along with mbuf chain. */ int rip6_input(struct mbuf **mp, int *offp, int proto) { struct ifnet *ifp; struct mbuf *m = *mp; register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); register struct inpcb *in6p; struct inpcb *last = NULL; struct mbuf *opts = NULL; struct sockaddr_in6 fromsa; RIP6STAT_INC(rip6s_ipackets); init_sin6(&fromsa, m); /* general init */ ifp = m->m_pkthdr.rcvif; INP_INFO_RLOCK(&V_ripcbinfo); LIST_FOREACH(in6p, &V_ripcb, inp_list) { /* XXX inp locking */ if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->inp_ip_p && in6p->inp_ip_p != proto) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; if (jailed_without_vnet(in6p->inp_cred)) { /* * Allow raw socket in jail to receive multicast; * assume process had PRIV_NETINET_RAW at attach, * and fall through into normal filter path if so. */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && prison_check_ip6(in6p->inp_cred, &ip6->ip6_dst) != 0) continue; } INP_RLOCK(in6p); if (in6p->in6p_cksum != -1) { RIP6STAT_INC(rip6s_isum); if (in6_cksum(m, proto, *offp, m->m_pkthdr.len - *offp)) { INP_RUNLOCK(in6p); RIP6STAT_INC(rip6s_badsum); continue; } } /* * If this raw socket has multicast state, and we * have received a multicast, check if this socket * should receive it, as multicast filtering is now * the responsibility of the transport layer. */ if (in6p->in6p_moptions && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { /* * If the incoming datagram is for MLD, allow it * through unconditionally to the raw socket. * * Use the M_RTALERT_MLD flag to check for MLD * traffic without having to inspect the mbuf chain * more deeply, as all MLDv1/v2 host messages MUST * contain the Router Alert option. * * In the case of MLDv1, we may not have explicitly * joined the group, and may have set IFF_ALLMULTI * on the interface. im6o_mc_filter() may discard * control traffic we actually need to see. * * Userland multicast routing daemons should continue * filter the control traffic appropriately. */ int blocked; blocked = MCAST_PASS; if ((m->m_flags & M_RTALERT_MLD) == 0) { struct sockaddr_in6 mcaddr; bzero(&mcaddr, sizeof(struct sockaddr_in6)); mcaddr.sin6_len = sizeof(struct sockaddr_in6); mcaddr.sin6_family = AF_INET6; mcaddr.sin6_addr = ip6->ip6_dst; blocked = im6o_mc_filter(in6p->in6p_moptions, ifp, (struct sockaddr *)&mcaddr, (struct sockaddr *)&fromsa); } if (blocked != MCAST_PASS) { IP6STAT_INC(ip6s_notmember); INP_RUNLOCK(in6p); continue; } } if (last != NULL) { struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. */ - if (n && ipsec6_in_reject(n, last)) { - m_freem(n); - /* Do not inject data into pcb. */ - } else + if (IPSEC_ENABLED(ipv6)) { + if (n != NULL && + IPSEC_CHECK_POLICY(ipv6, n, last) != 0) { + m_freem(n); + /* Do not inject data into pcb. */ + n = NULL; + } + } #endif /* IPSEC */ if (n) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, *offp); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); RIP6STAT_INC(rip6s_fullsock); } else sorwakeup(last->inp_socket); opts = NULL; } INP_RUNLOCK(last); } last = in6p; } INP_INFO_RUNLOCK(&V_ripcbinfo); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. */ - if ((last != NULL) && ipsec6_in_reject(m, last)) { + if (IPSEC_ENABLED(ipv6) && last != NULL && + IPSEC_CHECK_POLICY(ipv6, m, last) != 0) { m_freem(m); IP6STAT_DEC(ip6s_delivered); /* Do not inject data into pcb. */ INP_RUNLOCK(last); } else #endif /* IPSEC */ if (last != NULL) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, m, &opts); /* Strip intermediate headers. */ m_adj(m, *offp); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); RIP6STAT_INC(rip6s_fullsock); } else sorwakeup(last->inp_socket); INP_RUNLOCK(last); } else { RIP6STAT_INC(rip6s_nosock); if (m->m_flags & M_MCAST) RIP6STAT_INC(rip6s_nosockmcast); if (proto == IPPROTO_NONE) m_freem(m); else { char *prvnxtp = ip6_get_prevhdr(m, *offp); /* XXX */ icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, prvnxtp - mtod(m, char *)); } IP6STAT_DEC(ip6s_delivered); } return (IPPROTO_DONE); } void rip6_ctlinput(int cmd, struct sockaddr *sa, void *d) { struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify)(struct inpcb *, int) = in6_rtchange; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* * If the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; } (void) in6_pcbnotify(&V_ripcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } /* * Generate IPv6 header and pass packet to ip6_output. Tack on options user * may have setup with control call. */ int rip6_output(struct mbuf *m, struct socket *so, ...) { struct mbuf *control; struct m_tag *mtag; struct sockaddr_in6 *dstsock; struct in6_addr *dst; struct ip6_hdr *ip6; struct inpcb *in6p; u_int plen = m->m_pkthdr.len; int error = 0; struct ip6_pktopts opt, *optp; struct ifnet *oifp = NULL; int type = 0, code = 0; /* for ICMPv6 output statistics only */ int scope_ambiguous = 0; int use_defzone = 0; int hlim = 0; struct in6_addr in6a; va_list ap; va_start(ap, so); dstsock = va_arg(ap, struct sockaddr_in6 *); control = va_arg(ap, struct mbuf *); va_end(ap); in6p = sotoinpcb(so); INP_WLOCK(in6p); dst = &dstsock->sin6_addr; if (control != NULL) { if ((error = ip6_setpktopts(control, &opt, in6p->in6p_outputopts, so->so_cred, so->so_proto->pr_protocol)) != 0) { goto bad; } optp = &opt; } else optp = in6p->in6p_outputopts; /* * Check and convert scope zone ID into internal form. * * XXX: we may still need to determine the zone later. */ if (!(so->so_state & SS_ISCONNECTED)) { if (!optp || !optp->ip6po_pktinfo || !optp->ip6po_pktinfo->ipi6_ifindex) use_defzone = V_ip6_use_defzone; if (dstsock->sin6_scope_id == 0 && !use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(dstsock, use_defzone)) != 0) goto bad; } /* * For an ICMPv6 packet, we should know its type and code to update * statistics. */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { struct icmp6_hdr *icmp6; if (m->m_len < sizeof(struct icmp6_hdr) && (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) { error = ENOBUFS; goto bad; } icmp6 = mtod(m, struct icmp6_hdr *); type = icmp6->icmp6_type; code = icmp6->icmp6_code; } M_PREPEND(m, sizeof(*ip6), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto bad; } ip6 = mtod(m, struct ip6_hdr *); /* * Source address selection. */ error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred, scope_ambiguous, &in6a, &hlim); if (error) goto bad; error = prison_check_ip6(in6p->inp_cred, &in6a); if (error != 0) goto bad; ip6->ip6_src = in6a; ip6->ip6_dst = dstsock->sin6_addr; /* * Fill in the rest of the IPv6 header fields. */ ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (in6p->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); /* * ip6_plen will be filled in ip6_output, so not fill it here. */ ip6->ip6_nxt = in6p->inp_ip_p; ip6->ip6_hlim = hlim; if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) { struct mbuf *n; int off; u_int16_t *p; /* Compute checksum. */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) off = offsetof(struct icmp6_hdr, icmp6_cksum); else off = in6p->in6p_cksum; if (plen < off + 1) { error = EINVAL; goto bad; } off += sizeof(struct ip6_hdr); n = m; while (n && n->m_len <= off) { off -= n->m_len; n = n->m_next; } if (!n) goto bad; p = (u_int16_t *)(mtod(n, caddr_t) + off); *p = 0; *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen); } /* * Send RA/RS messages to user land for protection, before sending * them to rtadvd/rtsol. */ if ((send_sendso_input_hook != NULL) && so->so_proto->pr_protocol == IPPROTO_ICMPV6) { switch (type) { case ND_ROUTER_ADVERT: case ND_ROUTER_SOLICIT: mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; m_tag_prepend(m, mtag); } } error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p); if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (oifp) icmp6_ifoutstat_inc(oifp, type, code); ICMP6STAT_INC(icp6s_outhist[type]); } else RIP6STAT_INC(rip6s_opackets); goto freectl; bad: if (m) m_freem(m); freectl: if (control != NULL) { ip6_clearpktopts(&opt, -1); m_freem(control); } INP_WUNLOCK(in6p); return (error); } /* * Raw IPv6 socket option processing. */ int rip6_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp; int error; if (sopt->sopt_level == IPPROTO_ICMPV6) /* * XXX: is it better to call icmp6_ctloutput() directly * from protosw? */ return (icmp6_ctloutput(so, sopt)); else if (sopt->sopt_level != IPPROTO_IPV6) { if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_SETFIB) { inp = sotoinpcb(so); INP_WLOCK(inp); inp->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(inp); return (0); } return (EINVAL); } error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_get ? ip6_mrouter_get(so, sopt) : EOPNOTSUPP; break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_set ? ip6_mrouter_set(so, sopt) : EOPNOTSUPP; break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; } return (error); } static int rip6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct icmp6_filter *filter; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("rip6_attach: inp != NULL")); error = priv_check(td, PRIV_NETINET_RAW); if (error) return (error); error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return (error); filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT); if (filter == NULL) return (ENOMEM); INP_INFO_WLOCK(&V_ripcbinfo); error = in_pcballoc(so, &V_ripcbinfo); if (error) { INP_INFO_WUNLOCK(&V_ripcbinfo); free(filter, M_PCB); return (error); } inp = (struct inpcb *)so->so_pcb; INP_INFO_WUNLOCK(&V_ripcbinfo); inp->inp_vflag |= INP_IPV6; inp->inp_ip_p = (long)proto; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; inp->in6p_icmp6filt = filter; ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt); INP_WUNLOCK(inp); return (0); } static void rip6_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_detach: inp == NULL")); if (so == V_ip6_mrouter && ip6_mrouter_done) ip6_mrouter_done(); /* xxx: RSVP */ INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); free(inp->in6p_icmp6filt, M_PCB); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); } /* XXXRW: This can't ever be called. */ static void rip6_abort(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_abort: inp == NULL")); soisdisconnected(so); } static void rip6_close(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_close: inp == NULL")); soisdisconnected(so); } static int rip6_disconnect(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_disconnect: inp == NULL")); if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); inp->in6p_faddr = in6addr_any; rip6_abort(so); return (0); } static int rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct ifaddr *ifa = NULL; int error = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_bind: inp == NULL")); if (nam->sa_len != sizeof(*addr)) return (EINVAL); if ((error = prison_check_ip6(td->td_ucred, &addr->sin6_addr)) != 0) return (error); if (TAILQ_EMPTY(&V_ifnet) || addr->sin6_family != AF_INET6) return (EADDRNOTAVAIL); if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) return (error); if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) && (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL) return (EADDRNOTAVAIL); if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { ifa_free(ifa); return (EADDRNOTAVAIL); } if (ifa != NULL) ifa_free(ifa); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); inp->in6p_laddr = addr->sin6_addr; INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct in6_addr in6a; int error = 0, scope_ambiguous = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_connect: inp == NULL")); if (nam->sa_len != sizeof(*addr)) return (EINVAL); if (TAILQ_EMPTY(&V_ifnet)) return (EADDRNOTAVAIL); if (addr->sin6_family != AF_INET6) return (EAFNOSUPPORT); /* * Application should provide a proper zone ID or the use of default * zone IDs should be enabled. Unfortunately, some applications do * not behave as it should, so we need a workaround. Even if an * appropriate ID is not determined, we'll see if we can determine * the outgoing interface. If we can, determine the zone ID based on * the interface below. */ if (addr->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) return (error); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); /* Source address selection. XXX: need pcblookup? */ error = in6_selectsrc_socket(addr, inp->in6p_outputopts, inp, so->so_cred, scope_ambiguous, &in6a, NULL); if (error) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } inp->in6p_faddr = addr->sin6_addr; inp->in6p_laddr = in6a; soisconnected(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip6_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return (0); } static int rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 tmp; struct sockaddr_in6 *dst; int ret; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_send: inp == NULL")); /* Always copy sockaddr to avoid overwrites. */ /* Unlocked read. */ if (so->so_state & SS_ISCONNECTED) { if (nam) { m_freem(m); return (EISCONN); } /* XXX */ bzero(&tmp, sizeof(tmp)); tmp.sin6_family = AF_INET6; tmp.sin6_len = sizeof(struct sockaddr_in6); INP_RLOCK(inp); bcopy(&inp->in6p_faddr, &tmp.sin6_addr, sizeof(struct in6_addr)); INP_RUNLOCK(inp); dst = &tmp; } else { if (nam == NULL) { m_freem(m); return (ENOTCONN); } if (nam->sa_len != sizeof(struct sockaddr_in6)) { m_freem(m); return (EINVAL); } tmp = *(struct sockaddr_in6 *)nam; dst = &tmp; if (dst->sin6_family == AF_UNSPEC) { /* * XXX: we allow this case for backward * compatibility to buggy applications that * rely on old (and wrong) kernel behavior. */ log(LOG_INFO, "rip6 SEND: address family is " "unspec. Assume AF_INET6\n"); dst->sin6_family = AF_INET6; } else if (dst->sin6_family != AF_INET6) { m_freem(m); return(EAFNOSUPPORT); } } ret = rip6_output(m, so, dst, control); return (ret); } struct pr_usrreqs rip6_usrreqs = { .pru_abort = rip6_abort, .pru_attach = rip6_attach, .pru_bind = rip6_bind, .pru_connect = rip6_connect, .pru_control = in6_control, .pru_detach = rip6_detach, .pru_disconnect = rip6_disconnect, .pru_peeraddr = in6_getpeeraddr, .pru_send = rip6_send, .pru_shutdown = rip6_shutdown, .pru_sockaddr = in6_getsockaddr, .pru_close = rip6_close, }; diff --git a/sys/netinet6/sctp6_usrreq.c b/sys/netinet6/sctp6_usrreq.c index 35ec1f392bf8..2f5db2c2c1d8 100644 --- a/sys/netinet6/sctp6_usrreq.c +++ b/sys/netinet6/sctp6_usrreq.c @@ -1,1195 +1,1190 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #ifdef INET6 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ - extern struct protosw inetsw[]; int sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port) { struct mbuf *m; int iphlen; uint32_t vrf_id; uint8_t ecn_bits; struct sockaddr_in6 src, dst; struct ip6_hdr *ip6; struct sctphdr *sh; struct sctp_chunkhdr *ch; int length, offset; #if !defined(SCTP_WITH_NO_CSUM) uint8_t compute_crc; #endif uint32_t mflowid; uint8_t mflowtype; uint16_t fibnum; iphlen = *offp; if (SCTP_GET_PKT_VRFID(*i_pak, vrf_id)) { SCTP_RELEASE_PKT(*i_pak); return (IPPROTO_DONE); } m = SCTP_HEADER_TO_CHAIN(*i_pak); #ifdef SCTP_MBUF_LOGGING /* Log in any input mbufs */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(m, SCTP_MBUF_INPUT); } #endif #ifdef SCTP_PACKET_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) { sctp_packet_log(m); } #endif SCTPDBG(SCTP_DEBUG_CRCOFFLOAD, "sctp6_input(): Packet of length %d received on %s with csum_flags 0x%b.\n", m->m_pkthdr.len, if_name(m->m_pkthdr.rcvif), (int)m->m_pkthdr.csum_flags, CSUM_BITS); mflowid = m->m_pkthdr.flowid; mflowtype = M_HASHTYPE_GET(m); fibnum = M_GETFIB(m); SCTP_STAT_INCR(sctps_recvpackets); SCTP_STAT_INCR_COUNTER64(sctps_inpackets); /* Get IP, SCTP, and first chunk header together in the first mbuf. */ offset = iphlen + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr); ip6 = mtod(m, struct ip6_hdr *); IP6_EXTHDR_GET(sh, struct sctphdr *, m, iphlen, (int)(sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr))); if (sh == NULL) { SCTP_STAT_INCR(sctps_hdrops); return (IPPROTO_DONE); } ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr)); offset -= sizeof(struct sctp_chunkhdr); memset(&src, 0, sizeof(struct sockaddr_in6)); src.sin6_family = AF_INET6; src.sin6_len = sizeof(struct sockaddr_in6); src.sin6_port = sh->src_port; src.sin6_addr = ip6->ip6_src; if (in6_setscope(&src.sin6_addr, m->m_pkthdr.rcvif, NULL) != 0) { goto out; } memset(&dst, 0, sizeof(struct sockaddr_in6)); dst.sin6_family = AF_INET6; dst.sin6_len = sizeof(struct sockaddr_in6); dst.sin6_port = sh->dest_port; dst.sin6_addr = ip6->ip6_dst; if (in6_setscope(&dst.sin6_addr, m->m_pkthdr.rcvif, NULL) != 0) { goto out; } length = ntohs(ip6->ip6_plen) + iphlen; /* Validate mbuf chain length with IP payload length. */ if (SCTP_HEADER_LEN(m) != length) { SCTPDBG(SCTP_DEBUG_INPUT1, "sctp6_input() length:%d reported length:%d\n", length, SCTP_HEADER_LEN(m)); SCTP_STAT_INCR(sctps_hdrops); goto out; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { goto out; } ecn_bits = ((ntohl(ip6->ip6_flow) >> 20) & 0x000000ff); #if defined(SCTP_WITH_NO_CSUM) SCTP_STAT_INCR(sctps_recvnocrc); #else if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) { SCTP_STAT_INCR(sctps_recvhwcrc); compute_crc = 0; } else { SCTP_STAT_INCR(sctps_recvswcrc); compute_crc = 1; } #endif sctp_common_input_processing(&m, iphlen, offset, length, (struct sockaddr *)&src, (struct sockaddr *)&dst, sh, ch, #if !defined(SCTP_WITH_NO_CSUM) compute_crc, #endif ecn_bits, mflowtype, mflowid, fibnum, vrf_id, port); out: if (m) { sctp_m_freem(m); } return (IPPROTO_DONE); } int sctp6_input(struct mbuf **i_pak, int *offp, int proto SCTP_UNUSED) { return (sctp6_input_with_port(i_pak, offp, 0)); } void sctp6_notify(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net, uint8_t icmp6_type, uint8_t icmp6_code, uint16_t next_mtu) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif int timer_stopped; switch (icmp6_type) { case ICMP6_DST_UNREACH: if ((icmp6_code == ICMP6_DST_UNREACH_NOROUTE) || (icmp6_code == ICMP6_DST_UNREACH_ADMIN) || (icmp6_code == ICMP6_DST_UNREACH_BEYONDSCOPE) || (icmp6_code == ICMP6_DST_UNREACH_ADDR)) { /* Mark the net unreachable. */ if (net->dest_state & SCTP_ADDR_REACHABLE) { /* Ok that destination is not reachable */ net->dest_state &= ~SCTP_ADDR_REACHABLE; net->dest_state &= ~SCTP_ADDR_PF; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED); } } SCTP_TCB_UNLOCK(stcb); break; case ICMP6_PARAM_PROB: /* Treat it like an ABORT. */ if (icmp6_code == ICMP6_PARAMPROB_NEXTHEADER) { sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) so = SCTP_INP_SO(inp); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 1); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_SOCKET_UNLOCK(so, 1); #endif } else { SCTP_TCB_UNLOCK(stcb); } break; case ICMP6_PACKET_TOO_BIG: if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { timer_stopped = 1; sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_1); } else { timer_stopped = 0; } /* Update the path MTU. */ if (net->mtu > next_mtu) { net->mtu = next_mtu; if (net->port) { net->mtu -= sizeof(struct udphdr); } } /* Update the association MTU */ if (stcb->asoc.smallest_mtu > next_mtu) { sctp_pathmtu_adjustment(stcb, next_mtu); } /* Finally, start the PMTU timer if it was running before. */ if (timer_stopped) { sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net); } SCTP_TCB_UNLOCK(stcb); break; default: SCTP_TCB_UNLOCK(stcb); break; } } void sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d) { struct ip6ctlparam *ip6cp; struct sctp_inpcb *inp; struct sctp_tcb *stcb; struct sctp_nets *net; struct sctphdr sh; struct sockaddr_in6 src, dst; if (pktdst->sa_family != AF_INET6 || pktdst->sa_len != sizeof(struct sockaddr_in6)) { return; } if ((unsigned)cmd >= PRC_NCMDS) { return; } if (PRC_IS_REDIRECT(cmd)) { d = NULL; } else if (inet6ctlerrmap[cmd] == 0) { return; } /* If the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; } else { ip6cp = (struct ip6ctlparam *)NULL; } if (ip6cp != NULL) { /* * XXX: We assume that when IPV6 is non NULL, M and OFF are * valid. */ if (ip6cp->ip6c_m == NULL) { return; } /* * Check if we can safely examine the ports and the * verification tag of the SCTP common header. */ if (ip6cp->ip6c_m->m_pkthdr.len < (int32_t)(ip6cp->ip6c_off + offsetof(struct sctphdr, checksum))) { return; } /* Copy out the port numbers and the verification tag. */ bzero(&sh, sizeof(sh)); m_copydata(ip6cp->ip6c_m, ip6cp->ip6c_off, sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t), (caddr_t)&sh); memset(&src, 0, sizeof(struct sockaddr_in6)); src.sin6_family = AF_INET6; src.sin6_len = sizeof(struct sockaddr_in6); src.sin6_port = sh.src_port; src.sin6_addr = ip6cp->ip6c_ip6->ip6_src; if (in6_setscope(&src.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) { return; } memset(&dst, 0, sizeof(struct sockaddr_in6)); dst.sin6_family = AF_INET6; dst.sin6_len = sizeof(struct sockaddr_in6); dst.sin6_port = sh.dest_port; dst.sin6_addr = ip6cp->ip6c_ip6->ip6_dst; if (in6_setscope(&dst.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) { return; } inp = NULL; net = NULL; stcb = sctp_findassociation_addr_sa((struct sockaddr *)&dst, (struct sockaddr *)&src, &inp, &net, 1, SCTP_DEFAULT_VRFID); if ((stcb != NULL) && (net != NULL) && (inp != NULL)) { /* Check the verification tag */ if (ntohl(sh.v_tag) != 0) { /* * This must be the verification tag used * for sending out packets. We don't * consider packets reflecting the * verification tag. */ if (ntohl(sh.v_tag) != stcb->asoc.peer_vtag) { SCTP_TCB_UNLOCK(stcb); return; } } else { if (ip6cp->ip6c_m->m_pkthdr.len >= ip6cp->ip6c_off + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) + offsetof(struct sctp_init, a_rwnd)) { /* * In this case we can check if we * got an INIT chunk and if the * initiate tag matches. */ uint32_t initiate_tag; uint8_t chunk_type; m_copydata(ip6cp->ip6c_m, ip6cp->ip6c_off + sizeof(struct sctphdr), sizeof(uint8_t), (caddr_t)&chunk_type); m_copydata(ip6cp->ip6c_m, ip6cp->ip6c_off + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr), sizeof(uint32_t), (caddr_t)&initiate_tag); if ((chunk_type != SCTP_INITIATION) || (ntohl(initiate_tag) != stcb->asoc.my_vtag)) { SCTP_TCB_UNLOCK(stcb); return; } } else { SCTP_TCB_UNLOCK(stcb); return; } } sctp6_notify(inp, stcb, net, ip6cp->ip6c_icmp6->icmp6_type, ip6cp->ip6c_icmp6->icmp6_code, (uint16_t)ntohl(ip6cp->ip6c_icmp6->icmp6_mtu)); } else { if ((stcb == NULL) && (inp != NULL)) { /* reduce inp's ref-count */ SCTP_INP_WLOCK(inp); SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); } if (stcb) { SCTP_TCB_UNLOCK(stcb); } } } } /* * this routine can probably be collasped into the one in sctp_userreq.c * since they do the same thing and now we lookup with a sockaddr */ static int sctp6_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in6 addrs[2]; struct sctp_inpcb *inp; struct sctp_nets *net; struct sctp_tcb *stcb; int error; uint32_t vrf_id; vrf_id = SCTP_DEFAULT_VRFID; error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); if (req->newlen != sizeof(addrs)) { SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } if (req->oldlen != sizeof(struct ucred)) { SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); stcb = sctp_findassociation_addr_sa(sin6tosa(&addrs[1]), sin6tosa(&addrs[0]), &inp, &net, 1, vrf_id); if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) { if ((inp != NULL) && (stcb == NULL)) { /* reduce ref-count */ SCTP_INP_WLOCK(inp); SCTP_INP_DECR_REF(inp); goto cred_can_cont; } SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT); error = ENOENT; goto out; } SCTP_TCB_UNLOCK(stcb); /* * We use the write lock here, only since in the error leg we need * it. If we used RLOCK, then we would have to * wlock/decr/unlock/rlock. Which in theory could create a hole. * Better to use higher wlock. */ SCTP_INP_WLOCK(inp); cred_can_cont: error = cr_canseesocket(req->td->td_ucred, inp->sctp_socket); if (error) { SCTP_INP_WUNLOCK(inp); goto out; } cru2x(inp->sctp_socket->so_cred, &xuc); SCTP_INP_WUNLOCK(inp); error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); out: return (error); } SYSCTL_PROC(_net_inet6_sctp6, OID_AUTO, getcred, CTLTYPE_OPAQUE | CTLFLAG_RW, 0, 0, sctp6_getcred, "S,ucred", "Get the ucred of a SCTP6 connection"); /* This is the same as the sctp_abort() could be made common */ static void sctp6_abort(struct socket *so) { struct sctp_inpcb *inp; uint32_t flags; inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return; } sctp_must_try_again: flags = inp->sctp_flags; #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 17); #endif if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) && (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) { #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 16); #endif sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT, SCTP_CALLED_AFTER_CMPSET_OFCLOSE); SOCK_LOCK(so); SCTP_SB_CLEAR(so->so_snd); /* * same for the rcv ones, they are only here for the * accounting/select. */ SCTP_SB_CLEAR(so->so_rcv); /* Now null out the reference, we are completely detached. */ so->so_pcb = NULL; SOCK_UNLOCK(so); } else { flags = inp->sctp_flags; if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) { goto sctp_must_try_again; } } return; } static int sctp6_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNUSED) { struct in6pcb *inp6; int error; struct sctp_inpcb *inp; uint32_t vrf_id = SCTP_DEFAULT_VRFID; inp = (struct sctp_inpcb *)so->so_pcb; if (inp != NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = SCTP_SORESERVE(so, SCTP_BASE_SYSCTL(sctp_sendspace), SCTP_BASE_SYSCTL(sctp_recvspace)); if (error) return (error); } error = sctp_inpcb_alloc(so, vrf_id); if (error) return (error); inp = (struct sctp_inpcb *)so->so_pcb; SCTP_INP_WLOCK(inp); inp->sctp_flags |= SCTP_PCB_FLAGS_BOUND_V6; /* I'm v6! */ inp6 = (struct in6pcb *)inp; inp6->inp_vflag |= INP_IPV6; inp6->in6p_hops = -1; /* use kernel default */ inp6->in6p_cksum = -1; /* just to be sure */ #ifdef INET /* * XXX: ugly!! IPv4 TTL initialization is necessary for an IPv6 * socket as well, because the socket may be bound to an IPv6 * wildcard address, which may match an IPv4-mapped IPv6 address. */ inp6->inp_ip_ttl = MODULE_GLOBAL(ip_defttl); #endif /* * Hmm what about the IPSEC stuff that is missing here but in * sctp_attach()? */ SCTP_INP_WUNLOCK(inp); return (0); } static int sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p) { struct sctp_inpcb *inp; struct in6pcb *inp6; int error; inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } if (addr) { switch (addr->sa_family) { #ifdef INET case AF_INET: if (addr->sa_len != sizeof(struct sockaddr_in)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } break; #endif #ifdef INET6 case AF_INET6: if (addr->sa_len != sizeof(struct sockaddr_in6)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } break; #endif default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } } inp6 = (struct in6pcb *)inp; inp6->inp_vflag &= ~INP_IPV4; inp6->inp_vflag |= INP_IPV6; if ((addr != NULL) && (SCTP_IPV6_V6ONLY(inp6) == 0)) { switch (addr->sa_family) { #ifdef INET case AF_INET: /* binding v4 addr to v6 socket, so reset flags */ inp6->inp_vflag |= INP_IPV4; inp6->inp_vflag &= ~INP_IPV6; break; #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6_p; sin6_p = (struct sockaddr_in6 *)addr; if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) { inp6->inp_vflag |= INP_IPV4; } #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6_p); inp6->inp_vflag |= INP_IPV4; inp6->inp_vflag &= ~INP_IPV6; error = sctp_inpcb_bind(so, (struct sockaddr *)&sin, NULL, p); return (error); } #endif break; } #endif default: break; } } else if (addr != NULL) { struct sockaddr_in6 *sin6_p; /* IPV6_V6ONLY socket */ #ifdef INET if (addr->sa_family == AF_INET) { /* can't bind v4 addr to v6 only socket! */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } #endif sin6_p = (struct sockaddr_in6 *)addr; if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { /* can't bind v4-mapped addrs either! */ /* NOTE: we don't support SIIT */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } } error = sctp_inpcb_bind(so, addr, NULL, p); return (error); } static void sctp6_close(struct socket *so) { sctp_close(so); } /* This could be made common with sctp_detach() since they are identical */ static int sctp6_disconnect(struct socket *so) { return (sctp_disconnect(so)); } int sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *p); static int sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *p) { struct sctp_inpcb *inp; struct in6pcb *inp6; #ifdef INET struct sockaddr_in6 *sin6; #endif /* INET */ /* No SPL needed since sctp_output does this */ inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { if (control) { SCTP_RELEASE_PKT(control); control = NULL; } SCTP_RELEASE_PKT(m); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } inp6 = (struct in6pcb *)inp; /* * For the TCP model we may get a NULL addr, if we are a connected * socket thats ok. */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) && (addr == NULL)) { goto connected_type; } if (addr == NULL) { SCTP_RELEASE_PKT(m); if (control) { SCTP_RELEASE_PKT(control); control = NULL; } SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EDESTADDRREQ); return (EDESTADDRREQ); } #ifdef INET sin6 = (struct sockaddr_in6 *)addr; if (SCTP_IPV6_V6ONLY(inp6)) { /* * if IPV6_V6ONLY flag, we discard datagrams destined to a * v4 addr or v4-mapped addr */ if (addr->sa_family == AF_INET) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } } if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { struct sockaddr_in sin; /* convert v4-mapped into v4 addr and send */ in6_sin6_2_sin(&sin, sin6); return (sctp_sendm(so, flags, m, (struct sockaddr *)&sin, control, p)); } #endif /* INET */ connected_type: /* now what about control */ if (control) { if (inp->control) { SCTP_PRINTF("huh? control set?\n"); SCTP_RELEASE_PKT(inp->control); inp->control = NULL; } inp->control = control; } /* Place the data */ if (inp->pkt) { SCTP_BUF_NEXT(inp->pkt_last) = m; inp->pkt_last = m; } else { inp->pkt_last = inp->pkt = m; } if ( /* FreeBSD and MacOSX uses a flag passed */ ((flags & PRUS_MORETOCOME) == 0) ) { /* * note with the current version this code will only be used * by OpenBSD, NetBSD and FreeBSD have methods for * re-defining sosend() to use sctp_sosend(). One can * optionaly switch back to this code (by changing back the * defininitions but this is not advisable. */ int ret; ret = sctp_output(inp, inp->pkt, addr, inp->control, p, flags); inp->pkt = NULL; inp->control = NULL; return (ret); } else { return (0); } } static int sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) { uint32_t vrf_id; int error = 0; struct sctp_inpcb *inp; struct sctp_tcb *stcb; #ifdef INET struct in6pcb *inp6; struct sockaddr_in6 *sin6; union sctp_sockstore store; #endif #ifdef INET inp6 = (struct in6pcb *)so->so_pcb; #endif inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET); return (ECONNRESET); /* I made the same as TCP since we are * not setup? */ } if (addr == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } switch (addr->sa_family) { #ifdef INET case AF_INET: if (addr->sa_len != sizeof(struct sockaddr_in)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } break; #endif #ifdef INET6 case AF_INET6: if (addr->sa_len != sizeof(struct sockaddr_in6)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } break; #endif default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } vrf_id = inp->def_vrf_id; SCTP_ASOC_CREATE_LOCK(inp); SCTP_INP_RLOCK(inp); if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == SCTP_PCB_FLAGS_UNBOUND) { /* Bind a ephemeral port */ SCTP_INP_RUNLOCK(inp); error = sctp6_bind(so, NULL, p); if (error) { SCTP_ASOC_CREATE_UNLOCK(inp); return (error); } SCTP_INP_RLOCK(inp); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) { /* We are already connected AND the TCP model */ SCTP_INP_RUNLOCK(inp); SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EADDRINUSE); return (EADDRINUSE); } #ifdef INET sin6 = (struct sockaddr_in6 *)addr; if (SCTP_IPV6_V6ONLY(inp6)) { /* * if IPV6_V6ONLY flag, ignore connections destined to a v4 * addr or v4-mapped addr */ if (addr->sa_family == AF_INET) { SCTP_INP_RUNLOCK(inp); SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { SCTP_INP_RUNLOCK(inp); SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } } if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { /* convert v4-mapped into v4 addr */ in6_sin6_2_sin(&store.sin, sin6); addr = &store.sa; } #endif /* INET */ /* Now do we connect? */ if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb) { SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } else { SCTP_INP_RUNLOCK(inp); SCTP_INP_WLOCK(inp); SCTP_INP_INCR_REF(inp); SCTP_INP_WUNLOCK(inp); stcb = sctp_findassociation_ep_addr(&inp, addr, NULL, NULL, NULL); if (stcb == NULL) { SCTP_INP_WLOCK(inp); SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); } } if (stcb != NULL) { /* Already have or am bring up an association */ SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EALREADY); return (EALREADY); } /* We are GOOD to go */ stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, inp->sctp_ep.pre_open_stream_count, inp->sctp_ep.port, p); SCTP_ASOC_CREATE_UNLOCK(inp); if (stcb == NULL) { /* Gak! no memory */ return (error); } if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; /* Set the connected flag so we can queue data */ soisconnecting(so); } stcb->asoc.state = SCTP_STATE_COOKIE_WAIT; (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); /* initialize authentication parameters for the assoc */ sctp_initialize_auth_params(inp, stcb); sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED); SCTP_TCB_UNLOCK(stcb); return (error); } static int sctp6_getaddr(struct socket *so, struct sockaddr **addr) { struct sockaddr_in6 *sin6; struct sctp_inpcb *inp; uint32_t vrf_id; struct sctp_ifa *sctp_ifa; int error; /* * Do the malloc first in case it blocks. */ SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof(*sin6)); if (sin6 == NULL) return (ENOMEM); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_FREE_SONAME(sin6); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET); return (ECONNRESET); } SCTP_INP_RLOCK(inp); sin6->sin6_port = inp->sctp_lport; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* For the bound all case you get back 0 */ if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { struct sctp_tcb *stcb; struct sockaddr_in6 *sin_a6; struct sctp_nets *net; int fnd; stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { SCTP_INP_RUNLOCK(inp); SCTP_FREE_SONAME(sin6); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT); return (ENOENT); } fnd = 0; sin_a6 = NULL; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { sin_a6 = (struct sockaddr_in6 *)&net->ro._l_addr; if (sin_a6 == NULL) /* this will make coverity happy */ continue; if (sin_a6->sin6_family == AF_INET6) { fnd = 1; break; } } if ((!fnd) || (sin_a6 == NULL)) { /* punt */ SCTP_INP_RUNLOCK(inp); SCTP_FREE_SONAME(sin6); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT); return (ENOENT); } vrf_id = inp->def_vrf_id; sctp_ifa = sctp_source_address_selection(inp, stcb, (sctp_route_t *)&net->ro, net, 0, vrf_id); if (sctp_ifa) { sin6->sin6_addr = sctp_ifa->address.sin6.sin6_addr; } } else { /* For the bound all case you get back 0 */ memset(&sin6->sin6_addr, 0, sizeof(sin6->sin6_addr)); } } else { /* Take the first IPv6 address in the list */ struct sctp_laddr *laddr; int fnd = 0; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa->address.sa.sa_family == AF_INET6) { struct sockaddr_in6 *sin_a; sin_a = &laddr->ifa->address.sin6; sin6->sin6_addr = sin_a->sin6_addr; fnd = 1; break; } } if (!fnd) { SCTP_FREE_SONAME(sin6); SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT); return (ENOENT); } } SCTP_INP_RUNLOCK(inp); /* Scoping things for v6 */ if ((error = sa6_recoverscope(sin6)) != 0) { SCTP_FREE_SONAME(sin6); return (error); } (*addr) = (struct sockaddr *)sin6; return (0); } static int sctp6_peeraddr(struct socket *so, struct sockaddr **addr) { struct sockaddr_in6 *sin6; int fnd; struct sockaddr_in6 *sin_a6; struct sctp_inpcb *inp; struct sctp_tcb *stcb; struct sctp_nets *net; int error; /* Do the malloc first in case it blocks. */ SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6); if (sin6 == NULL) return (ENOMEM); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); inp = (struct sctp_inpcb *)so->so_pcb; if ((inp == NULL) || ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) { /* UDP type and listeners will drop out here */ SCTP_FREE_SONAME(sin6); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOTCONN); return (ENOTCONN); } SCTP_INP_RLOCK(inp); stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb) { SCTP_TCB_LOCK(stcb); } SCTP_INP_RUNLOCK(inp); if (stcb == NULL) { SCTP_FREE_SONAME(sin6); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET); return (ECONNRESET); } fnd = 0; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { sin_a6 = (struct sockaddr_in6 *)&net->ro._l_addr; if (sin_a6->sin6_family == AF_INET6) { fnd = 1; sin6->sin6_port = stcb->rport; sin6->sin6_addr = sin_a6->sin6_addr; break; } } SCTP_TCB_UNLOCK(stcb); if (!fnd) { /* No IPv4 address */ SCTP_FREE_SONAME(sin6); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT); return (ENOENT); } if ((error = sa6_recoverscope(sin6)) != 0) { SCTP_FREE_SONAME(sin6); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, error); return (error); } *addr = (struct sockaddr *)sin6; return (0); } static int sctp6_in6getaddr(struct socket *so, struct sockaddr **nam) { struct in6pcb *inp6 = sotoin6pcb(so); int error; if (inp6 == NULL) { SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } /* allow v6 addresses precedence */ error = sctp6_getaddr(so, nam); #ifdef INET if (error) { struct sockaddr_in6 *sin6; /* try v4 next if v6 failed */ error = sctp_ingetaddr(so, nam); if (error) { return (error); } SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6); if (sin6 == NULL) { SCTP_FREE_SONAME(*nam); return (ENOMEM); } in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6); SCTP_FREE_SONAME(*nam); *nam = (struct sockaddr *)sin6; } #endif return (error); } static int sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam) { struct in6pcb *inp6 = sotoin6pcb(so); int error; if (inp6 == NULL) { SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } /* allow v6 addresses precedence */ error = sctp6_peeraddr(so, nam); #ifdef INET if (error) { struct sockaddr_in6 *sin6; /* try v4 next if v6 failed */ error = sctp_peeraddr(so, nam); if (error) { return (error); } SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6); if (sin6 == NULL) { SCTP_FREE_SONAME(*nam); return (ENOMEM); } in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6); SCTP_FREE_SONAME(*nam); *nam = (struct sockaddr *)sin6; } #endif return (error); } struct pr_usrreqs sctp6_usrreqs = { .pru_abort = sctp6_abort, .pru_accept = sctp_accept, .pru_attach = sctp6_attach, .pru_bind = sctp6_bind, .pru_connect = sctp6_connect, .pru_control = in6_control, .pru_close = sctp6_close, .pru_detach = sctp6_close, .pru_sopoll = sopoll_generic, .pru_flush = sctp_flush, .pru_disconnect = sctp6_disconnect, .pru_listen = sctp_listen, .pru_peeraddr = sctp6_getpeeraddr, .pru_send = sctp6_send, .pru_shutdown = sctp_shutdown, .pru_sockaddr = sctp6_in6getaddr, .pru_sosend = sctp_sosend, .pru_soreceive = sctp_soreceive }; #endif diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index 144989445a0d..911d1de51945 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -1,1302 +1,1301 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2010-2011 Juniper Networks, Inc. * Copyright (c) 2014 Kevin Lo * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $ * $KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ +#include #include /* * UDP protocol implementation. * Per RFC 768, August, 1980. */ extern struct protosw inetsw[]; static void udp6_detach(struct socket *so); static int udp6_append(struct inpcb *inp, struct mbuf *n, int off, struct sockaddr_in6 *fromsa) { struct socket *so; struct mbuf *opts; struct udpcb *up; INP_LOCK_ASSERT(inp); /* * Engage the tunneling protocol. */ up = intoudpcb(inp); if (up->u_tun_func != NULL) { in_pcbref(inp); INP_RUNLOCK(inp); (*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa, up->u_tun_ctx); INP_RLOCK(inp); return (in_pcbrele_rlocked(inp)); } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* Check AH/ESP integrity. */ - if (ipsec6_in_reject(n, inp)) { - m_freem(n); - return (0); + if (IPSEC_ENABLED(ipv6)) { + if (IPSEC_CHECK_POLICY(ipv6, n, inp) != 0) { + m_freem(n); + return (0); + } } #endif /* IPSEC */ #ifdef MAC if (mac_inpcb_check_deliver(inp, n) != 0) { m_freem(n); return (0); } #endif opts = NULL; if (inp->inp_flags & INP_CONTROLOPTS || inp->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(inp, n, &opts); m_adj(n, off + sizeof(struct udphdr)); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_rcv); if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n, opts) == 0) { SOCKBUF_UNLOCK(&so->so_rcv); m_freem(n); if (opts) m_freem(opts); UDPSTAT_INC(udps_fullsock); } else sorwakeup_locked(so); return (0); } int udp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ifnet *ifp; struct ip6_hdr *ip6; struct udphdr *uh; struct inpcb *inp; struct inpcbinfo *pcbinfo; struct udpcb *up; int off = *offp; int cscov_partial; int plen, ulen; struct sockaddr_in6 fromsa; struct m_tag *fwd_tag; uint16_t uh_sum; uint8_t nxt; ifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); uh = (struct udphdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh)); if (!uh) return (IPPROTO_DONE); #endif UDPSTAT_INC(udps_ipackets); /* * Destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) goto badunlocked; plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6); ulen = ntohs((u_short)uh->uh_ulen); nxt = proto; cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0; if (nxt == IPPROTO_UDPLITE) { /* Zero means checksum over the complete packet. */ if (ulen == 0) ulen = plen; if (ulen == plen) cscov_partial = 0; if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) { /* XXX: What is the right UDPLite MIB counter? */ goto badunlocked; } if (uh->uh_sum == 0) { /* XXX: What is the right UDPLite MIB counter? */ goto badunlocked; } } else { if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) { UDPSTAT_INC(udps_badlen); goto badunlocked; } if (uh->uh_sum == 0) { UDPSTAT_INC(udps_nosum); goto badunlocked; } } if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) && !cscov_partial) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh_sum = m->m_pkthdr.csum_data; else uh_sum = in6_cksum_pseudo(ip6, ulen, nxt, m->m_pkthdr.csum_data); uh_sum ^= 0xffff; } else uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen); if (uh_sum != 0) { UDPSTAT_INC(udps_badsum); goto badunlocked; } /* * Construct sockaddr format source address. */ init_sin6(&fromsa, m); fromsa.sin6_port = uh->uh_sport; pcbinfo = udp_get_inpcbinfo(nxt); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct inpcb *last; struct inpcbhead *pcblist; struct ip6_moptions *imo; INP_INFO_RLOCK(pcbinfo); /* * In the event that laddr should be set to the link-local * address (this happens in RIPng), the multicast address * specified in the received packet will not match laddr. To * handle this situation, matching is relaxed if the * receiving interface is the same as one specified in the * socket and if the destination multicast address matches * one of the multicast groups specified in the socket. */ /* * KAME note: traditionally we dropped udpiphdr from mbuf * here. We need udphdr for IPsec processing so we do that * later. */ pcblist = udp_get_pcblist(nxt); last = NULL; LIST_FOREACH(inp, pcblist, inp_list) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (inp->inp_lport != uh->uh_dport) continue; if (inp->inp_fport != 0 && inp->inp_fport != uh->uh_sport) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst)) continue; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src) || inp->inp_fport != uh->uh_sport) continue; } /* * XXXRW: Because we weren't holding either the inpcb * or the hash lock when we checked for a match * before, we should probably recheck now that the * inpcb lock is (supposed to be) held. */ /* * Handle socket delivery policy for any-source * and source-specific multicast. [RFC3678] */ imo = inp->in6p_moptions; if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct sockaddr_in6 mcaddr; int blocked; INP_RLOCK(inp); bzero(&mcaddr, sizeof(struct sockaddr_in6)); mcaddr.sin6_len = sizeof(struct sockaddr_in6); mcaddr.sin6_family = AF_INET6; mcaddr.sin6_addr = ip6->ip6_dst; blocked = im6o_mc_filter(imo, ifp, (struct sockaddr *)&mcaddr, (struct sockaddr *)&fromsa); if (blocked != MCAST_PASS) { if (blocked == MCAST_NOTGMEMBER) IP6STAT_INC(ip6s_notmember); if (blocked == MCAST_NOTSMEMBER || blocked == MCAST_MUTED) UDPSTAT_INC(udps_filtermcast); INP_RUNLOCK(inp); /* XXX */ continue; } INP_RUNLOCK(inp); } if (last != NULL) { struct mbuf *n; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { INP_RLOCK(last); UDP_PROBE(receive, NULL, last, ip6, last, uh); if (udp6_append(last, n, off, &fromsa)) goto inp_lost; INP_RUNLOCK(last); } } last = inp; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids * searching through all pcbs in the common case of a * non-shared port. It assumes that an application * will never clear these options after setting them. */ if ((last->inp_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. (No need * to send an ICMP Port Unreachable for a broadcast * or multicast datgram.) */ UDPSTAT_INC(udps_noport); UDPSTAT_INC(udps_noportmcast); goto badheadlocked; } INP_RLOCK(last); INP_INFO_RUNLOCK(pcbinfo); UDP_PROBE(receive, NULL, last, ip6, last, uh); if (udp6_append(last, m, off, &fromsa) == 0) INP_RUNLOCK(last); inp_lost: return (IPPROTO_DONE); } /* * Locate pcb for datagram. */ /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { struct sockaddr_in6 *next_hop6; next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1); /* * Transparently forwarded. Pretend to be the destination. * Already got one like this? */ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (!inp) { /* * It's new. Try to find the ambushing socket. * Because we've rewritten the destination address, * any hardware-generated hash is ignored. */ inp = in6_pcblookup(pcbinfo, &ip6->ip6_src, uh->uh_sport, &next_hop6->sin6_addr, next_hop6->sin6_port ? htons(next_hop6->sin6_port) : uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif); } /* Remove the tag from the packet. We don't need it anymore. */ m_tag_delete(m, fwd_tag); m->m_flags &= ~M_IP6_NEXTHOP; } else inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (inp == NULL) { if (udp_log_in_vain) { char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; log(LOG_INFO, "Connection attempt to UDP [%s]:%d from [%s]:%d\n", ip6_sprintf(ip6bufd, &ip6->ip6_dst), ntohs(uh->uh_dport), ip6_sprintf(ip6bufs, &ip6->ip6_src), ntohs(uh->uh_sport)); } UDPSTAT_INC(udps_noport); if (m->m_flags & M_MCAST) { printf("UDP6: M_MCAST is set in a unicast packet.\n"); UDPSTAT_INC(udps_noportmcast); goto badunlocked; } if (V_udp_blackhole) goto badunlocked; if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0) goto badunlocked; icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); return (IPPROTO_DONE); } INP_RLOCK_ASSERT(inp); up = intoudpcb(inp); if (cscov_partial) { if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) { INP_RUNLOCK(inp); m_freem(m); return (IPPROTO_DONE); } } UDP_PROBE(receive, NULL, inp, ip6, inp, uh); if (udp6_append(inp, m, off, &fromsa) == 0) INP_RUNLOCK(inp); return (IPPROTO_DONE); badheadlocked: INP_INFO_RUNLOCK(pcbinfo); badunlocked: if (m) m_freem(m); return (IPPROTO_DONE); } static void udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d, struct inpcbinfo *pcbinfo) { struct udphdr uh; struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify)(struct inpcb *, int) = udp_notify; struct udp_portonly { u_int16_t uh_sport; u_int16_t uh_dport; } *uhp; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; } if (ip6) { /* * XXX: We assume that when IPV6 is non NULL, * M and OFF are valid. */ /* Check if we can safely examine src and dst ports. */ if (m->m_pkthdr.len < off + sizeof(*uhp)) return; bzero(&uh, sizeof(uh)); m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh); if (!PRC_IS_REDIRECT(cmd)) { /* Check to see if its tunneled */ struct inpcb *inp; inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_dst, uh.uh_dport, &ip6->ip6_src, uh.uh_sport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (inp != NULL) { struct udpcb *up; up = intoudpcb(inp); if (up->u_icmp_func) { /* Yes it is. */ INP_RUNLOCK(inp); (*up->u_icmp_func)(cmd, (struct sockaddr *)ip6cp->ip6c_src, d, up->u_tun_ctx); return; } else { /* Can't find it. */ INP_RUNLOCK(inp); } } } (void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport, (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd, cmdarg, notify); } else (void)in6_pcbnotify(pcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } void udp6_ctlinput(int cmd, struct sockaddr *sa, void *d) { return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo)); } void udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d) { return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo)); } static int udp6_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error; error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); if (req->newlen != sizeof(addrs)) return (EINVAL); if (req->oldlen != sizeof(struct xucred)) return (EINVAL); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 || (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { return (error); } inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { INP_RLOCK_ASSERT(inp); if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseesocket(req->td->td_ucred, inp->inp_socket); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection"); static int udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, struct mbuf *control, struct thread *td) { u_int32_t ulen = m->m_pkthdr.len; u_int32_t plen = sizeof(struct udphdr) + ulen; struct ip6_hdr *ip6; struct udphdr *udp6; struct in6_addr *laddr, *faddr, in6a; struct sockaddr_in6 *sin6 = NULL; int cscov_partial = 0; int scope_ambiguous = 0; u_short fport; int error = 0; uint8_t nxt; uint16_t cscov = 0; struct ip6_pktopts *optp, opt; int af = AF_INET6, hlen = sizeof(struct ip6_hdr); int flags; struct sockaddr_in6 tmp; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); if (addr6) { /* addr6 has been validated in udp6_send(). */ sin6 = (struct sockaddr_in6 *)addr6; /* protect *sin6 from overwrites */ tmp = *sin6; sin6 = &tmp; /* * Application should provide a proper zone ID or the use of * default zone IDs should be enabled. Unfortunately, some * applications do not behave as it should, so we need a * workaround. Even if an appropriate ID is not determined, * we'll see if we can determine the outgoing interface. If we * can, determine the zone ID based on the interface below. */ if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return (error); } nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; if (control) { if ((error = ip6_setpktopts(control, &opt, inp->in6p_outputopts, td->td_ucred, nxt)) != 0) goto release; optp = &opt; } else optp = inp->in6p_outputopts; if (sin6) { faddr = &sin6->sin6_addr; /* * Since we saw no essential reason for calling in_pcbconnect, * we get rid of such kind of logic, and call in6_selectsrc * and in6_pcbsetport in order to fill in the local address * and the local port. */ if (sin6->sin6_port == 0) { error = EADDRNOTAVAIL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { /* how about ::ffff:0.0.0.0 case? */ error = EISCONN; goto release; } fport = sin6->sin6_port; /* allow 0 port */ if (IN6_IS_ADDR_V4MAPPED(faddr)) { if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * I believe we should explicitly discard the * packet when mapped addresses are disabled, * rather than send the packet as an IPv6 one. * If we chose the latter approach, the packet * might be sent out on the wire based on the * default route, the situation which we'd * probably want to avoid. * (20010421 jinmei@kame.net) */ error = EINVAL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) { /* * when remote addr is an IPv4-mapped address, * local addr should not be an IPv6 address, * since you cannot determine how to map IPv6 * source address to IPv4. */ error = EINVAL; goto release; } af = AF_INET; } if (!IN6_IS_ADDR_V4MAPPED(faddr)) { error = in6_selectsrc_socket(sin6, optp, inp, td->td_ucred, scope_ambiguous, &in6a, NULL); if (error) goto release; laddr = &in6a; } else laddr = &inp->in6p_laddr; /* XXX */ if (laddr == NULL) { if (error == 0) error = EADDRNOTAVAIL; goto release; } if (inp->inp_lport == 0 && (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) { /* Undo an address bind that may have occurred. */ inp->in6p_laddr = in6addr_any; goto release; } } else { if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto release; } if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) { if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * XXX: this case would happen when the * application sets the V6ONLY flag after * connecting the foreign address. * Such applications should be fixed, * so we bark here. */ log(LOG_INFO, "udp6_output: IPV6_V6ONLY " "option was set for a connected socket\n"); error = EINVAL; goto release; } else af = AF_INET; } laddr = &inp->in6p_laddr; faddr = &inp->in6p_faddr; fport = inp->inp_fport; } if (af == AF_INET) hlen = sizeof(struct ip); /* * Calculate data length and get a mbuf * for UDP and IP6 headers. */ M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto release; } /* * Stuff checksum and output datagram. */ udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen); udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */ udp6->uh_dport = fport; if (nxt == IPPROTO_UDPLITE) { struct udpcb *up; up = intoudpcb(inp); cscov = up->u_txcslen; if (cscov >= plen) cscov = 0; udp6->uh_ulen = htons(cscov); /* * For UDP-Lite, checksum coverage length of zero means * the entire UDPLite packet is covered by the checksum. */ cscov_partial = (cscov == 0) ? 0 : 1; } else if (plen <= 0xffff) udp6->uh_ulen = htons((u_short)plen); else udp6->uh_ulen = 0; udp6->uh_sum = 0; switch (af) { case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_plen = htons((u_short)plen); ip6->ip6_nxt = nxt; ip6->ip6_hlim = in6_selecthlim(inp, NULL); ip6->ip6_src = *laddr; ip6->ip6_dst = *faddr; if (cscov_partial) { if ((udp6->uh_sum = in6_cksum_partial(m, nxt, sizeof(struct ip6_hdr), plen, cscov)) == 0) udp6->uh_sum = 0xffff; } else { udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0); m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } #ifdef RSS { uint32_t hash_val, hash_type; uint8_t pr; pr = inp->inp_socket->so_proto->pr_protocol; /* * Calculate an appropriate RSS hash for UDP and * UDP Lite. * * The called function will take care of figuring out * whether a 2-tuple or 4-tuple hash is required based * on the currently configured scheme. * * Later later on connected socket values should be * cached in the inpcb and reused, rather than constantly * re-calculating it. * * UDP Lite is a different protocol number and will * likely end up being hashed as a 2-tuple until * RSS / NICs grow UDP Lite protocol awareness. */ if (rss_proto_software_hash_v6(faddr, laddr, fport, inp->inp_lport, pr, &hash_val, &hash_type) == 0) { m->m_pkthdr.flowid = hash_val; M_HASHTYPE_SET(m, hash_type); } } #endif flags = 0; #ifdef RSS /* * Don't override with the inp cached flowid. * * Until the whole UDP path is vetted, it may actually * be incorrect. */ flags |= IP_NODEFAULTFLOWID; #endif UDP_PROBE(send, NULL, inp, ip6, inp, udp6); UDPSTAT_INC(udps_opackets); error = ip6_output(m, optp, &inp->inp_route6, flags, inp->in6p_moptions, NULL, inp); break; case AF_INET: error = EAFNOSUPPORT; goto release; } goto releaseopt; release: m_freem(m); releaseopt: if (control) { ip6_clearpktopts(&opt, -1); m_freem(control); } return (error); } static void udp6_abort(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_abort: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (*pru->pru_abort)(so); return; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); } static int udp6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp == NULL, ("udp6_attach: inp != NULL")); if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, udp_sendspace, udp_recvspace); if (error) return (error); } INP_INFO_WLOCK(pcbinfo); error = in_pcballoc(so, pcbinfo); if (error) { INP_INFO_WUNLOCK(pcbinfo); return (error); } inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) inp->inp_vflag |= INP_IPV4; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; /* just to be sure */ /* * XXX: ugly!! * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = V_ip_defttl; error = udp_newudpcb(inp); if (error) { in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(pcbinfo); return (error); } INP_WUNLOCK(inp); INP_INFO_WUNLOCK(pcbinfo); return (0); } static int udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_bind: inp == NULL")); INP_WLOCK(inp); INP_HASH_WLOCK(pcbinfo); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { struct sockaddr_in6 *sin6_p; sin6_p = (struct sockaddr_in6 *)nam; if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) inp->inp_vflag |= INP_IPV4; #ifdef INET else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6_p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = in_pcbbind(inp, (struct sockaddr *)&sin, td->td_ucred); goto out; } #endif } error = in6_pcbbind(inp, nam, td->td_ucred); #ifdef INET out: #endif INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); return (error); } static void udp6_close(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_close: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (*pru->pru_disconnect)(so); return; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); } static int udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; struct sockaddr_in6 *sin6; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); sin6 = (struct sockaddr_in6 *)nam; KASSERT(inp != NULL, ("udp6_connect: inp == NULL")); /* * XXXRW: Need to clarify locking of v4/v6 flags. */ INP_WLOCK(inp); #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { struct sockaddr_in sin; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { error = EINVAL; goto out; } if (inp->inp_faddr.s_addr != INADDR_ANY) { error = EISCONN; goto out; } in6_sin6_2_sin(&sin, sin6); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = prison_remote_ip4(td->td_ucred, &sin.sin_addr); if (error != 0) goto out; INP_HASH_WLOCK(pcbinfo); error = in_pcbconnect(inp, (struct sockaddr *)&sin, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); goto out; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = EISCONN; goto out; } inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr); if (error != 0) goto out; INP_HASH_WLOCK(pcbinfo); error = in6_pcbconnect(inp, nam, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); out: INP_WUNLOCK(inp); return (error); } static void udp6_detach(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; struct udpcb *up; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_detach: inp == NULL")); INP_INFO_WLOCK(pcbinfo); INP_WLOCK(inp); up = intoudpcb(inp); KASSERT(up != NULL, ("%s: up == NULL", __func__)); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(pcbinfo); udp_discardcb(up); } static int udp6_disconnect(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (void)(*pru->pru_disconnect)(so); return (0); } #endif if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto out; } INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; /* XXX */ SOCK_UNLOCK(so); out: INP_WUNLOCK(inp); return (0); } static int udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error = 0; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_send: inp == NULL")); INP_WLOCK(inp); if (addr) { if (addr->sa_len != sizeof(struct sockaddr_in6)) { error = EINVAL; goto bad; } if (addr->sa_family != AF_INET6) { error = EAFNOSUPPORT; goto bad; } } #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { int hasv4addr; struct sockaddr_in6 *sin6 = NULL; if (addr == NULL) hasv4addr = (inp->inp_vflag & INP_IPV4); else { sin6 = (struct sockaddr_in6 *)addr; hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ? 1 : 0; } if (hasv4addr) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; /* * XXXRW: We release UDP-layer locks before calling * udp_send() in order to avoid recursion. However, * this does mean there is a short window where inp's * fields are unstable. Could this lead to a * potential race in which the factors causing us to * select the UDPv4 output routine are invalidated? */ INP_WUNLOCK(inp); if (sin6) in6_sin6_2_sin_in_sock(addr); pru = inetsw[ip_protox[nxt]].pr_usrreqs; /* addr will just be freed in sendit(). */ return ((*pru->pru_send)(so, flags, m, addr, control, td)); } } #endif #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif INP_HASH_WLOCK(pcbinfo); error = udp6_output(inp, m, addr, control, td); INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); return (error); bad: INP_WUNLOCK(inp); m_freem(m); return (error); } struct pr_usrreqs udp6_usrreqs = { .pru_abort = udp6_abort, .pru_attach = udp6_attach, .pru_bind = udp6_bind, .pru_connect = udp6_connect, .pru_control = in6_control, .pru_detach = udp6_detach, .pru_disconnect = udp6_disconnect, .pru_peeraddr = in6_mapped_peeraddr, .pru_send = udp6_send, .pru_shutdown = udp_shutdown, .pru_sockaddr = in6_mapped_sockaddr, .pru_soreceive = soreceive_dgram, .pru_sosend = sosend_dgram, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = udp6_close }; diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index dd7be14cb5f0..e120f654aac0 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -1,1734 +1,1393 @@ /* $FreeBSD$ */ /* $KAME: ipsec.c,v 1.103 2001/05/24 07:14:18 sakane Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * IPsec controller part. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #include #ifdef INET6 #include #endif #include #include #include /*XXX*/ #include +#include #include #include #include #include #include #include -#ifdef IPSEC_DEBUG -VNET_DEFINE(int, ipsec_debug) = 1; -#else -VNET_DEFINE(int, ipsec_debug) = 0; -#endif - /* NB: name changed so netstat doesn't use it. */ VNET_PCPUSTAT_DEFINE(struct ipsecstat, ipsec4stat); VNET_PCPUSTAT_SYSINIT(ipsec4stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(ipsec4stat); #endif /* VIMAGE */ VNET_DEFINE(int, ip4_ah_offsetmask) = 0; /* maybe IP_DF? */ /* DF bit on encap. 0: clear 1: set 2: copy */ VNET_DEFINE(int, ip4_ipsec_dfbit) = 0; VNET_DEFINE(int, ip4_esp_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_esp_net_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_ah_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_ah_net_deflev) = IPSEC_LEVEL_USE; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ VNET_DEFINE(int, ip4_ipsec_ecn) = 0; VNET_DEFINE(int, ip4_esp_randpad) = -1; -static VNET_DEFINE(struct secpolicy, def_policy); +static VNET_DEFINE(int, ip4_filtertunnel) = 0; +#define V_ip4_filtertunnel VNET(ip4_filtertunnel) +static VNET_DEFINE(int, check_policy_history) = 0; +#define V_check_policy_history VNET(check_policy_history) +static VNET_DEFINE(struct secpolicy *, def_policy) = NULL; #define V_def_policy VNET(def_policy) +static int +sysctl_def_policy(SYSCTL_HANDLER_ARGS) +{ + int error, value; + + value = V_def_policy->policy; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error == 0) { + if (value != IPSEC_POLICY_DISCARD && + value != IPSEC_POLICY_NONE) + return (EINVAL); + V_def_policy->policy = value; + } + return (error); +} + /* * Crypto support requirements: * * 1 require hardware support * -1 require software support * 0 take anything */ VNET_DEFINE(int, crypto_support) = CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE; +/* + * TCP/UDP checksum handling policy for transport mode NAT-T (RFC3948) + * + * 0 - auto: incrementally recompute, when checksum delta is known; + * if checksum delta isn't known, reset checksum to zero for UDP, + * and mark csum_flags as valid for TCP. + * 1 - fully recompute TCP/UDP checksum. + */ +VNET_DEFINE(int, natt_cksum_policy) = 0; FEATURE(ipsec, "Internet Protocol Security (IPsec)"); -#ifdef IPSEC_NAT_T FEATURE(ipsec_natt, "UDP Encapsulation of IPsec ESP Packets ('NAT-T')"); -#endif SYSCTL_DECL(_net_inet_ipsec); /* net.inet.ipsec */ -SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(def_policy).policy, 0, +SYSCTL_PROC(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy, + CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW, 0, 0, sysctl_def_policy, "I", "IPsec default policy."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_esp_trans_deflev), 0, "Default ESP transport mode level"); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_esp_net_deflev), 0, "Default ESP tunnel mode level."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_trans_deflev), 0, "AH transfer mode default level."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_net_deflev), 0, "AH tunnel mode default level."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_CLEARTOS, ah_cleartos, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ah_cleartos), 0, - "If set clear type-of-service field when doing AH computation."); + "If set, clear type-of-service field when doing AH computation."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_OFFSETMASK, ah_offsetmask, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_offsetmask), 0, - "If not set clear offset field mask when doing AH computation."); + "If not set, clear offset field mask when doing AH computation."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DFBIT, dfbit, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_dfbit), 0, "Do not fragment bit on encap."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ECN, ecn, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_ecn), 0, "Explicit Congestion Notification handling."); -SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEBUG, debug, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0, - "Enable IPsec debugging output when set."); SYSCTL_INT(_net_inet_ipsec, OID_AUTO, crypto_support, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(crypto_support), 0, "Crypto driver selection."); +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, check_policy_history, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(check_policy_history), 0, + "Use strict check of inbound packets to security policy compliance."); +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, natt_cksum_policy, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(natt_cksum_policy), 0, + "Method to fix TCP/UDP checksum for transport mode IPsec after NAT."); +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, filtertunnel, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_filtertunnel), 0, + "If set, filter packets from an IPsec tunnel."); SYSCTL_VNET_PCPUSTAT(_net_inet_ipsec, OID_AUTO, ipsecstats, struct ipsecstat, ipsec4stat, "IPsec IPv4 statistics."); #ifdef REGRESSION /* * When set to 1, IPsec will send packets with the same sequence number. * This allows to verify if the other side has proper replay attacks detection. */ VNET_DEFINE(int, ipsec_replay) = 0; SYSCTL_INT(_net_inet_ipsec, OID_AUTO, test_replay, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_replay), 0, "Emulate replay attack"); /* * When set 1, IPsec will send packets with corrupted HMAC. * This allows to verify if the other side properly detects modified packets. */ VNET_DEFINE(int, ipsec_integrity) = 0; SYSCTL_INT(_net_inet_ipsec, OID_AUTO, test_integrity, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_integrity), 0, "Emulate man-in-the-middle attack"); #endif #ifdef INET6 VNET_PCPUSTAT_DEFINE(struct ipsecstat, ipsec6stat); VNET_PCPUSTAT_SYSINIT(ipsec6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(ipsec6stat); #endif /* VIMAGE */ VNET_DEFINE(int, ip6_esp_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_esp_net_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_ah_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_ah_net_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_ipsec_ecn) = 0; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ +static VNET_DEFINE(int, ip6_filtertunnel) = 0; +#define V_ip6_filtertunnel VNET(ip6_filtertunnel) + SYSCTL_DECL(_net_inet6_ipsec6); /* net.inet6.ipsec6 */ -SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY, def_policy, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(def_policy).policy, 0, +SYSCTL_PROC(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY, def_policy, + CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW, 0, 0, sysctl_def_policy, "I", "IPsec default policy."); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_esp_trans_deflev), 0, "Default ESP transport mode level."); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_esp_net_deflev), 0, "Default ESP tunnel mode level."); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ah_trans_deflev), 0, "AH transfer mode default level."); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ah_net_deflev), 0, "AH tunnel mode default level."); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ECN, ecn, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ipsec_ecn), 0, "Explicit Congestion Notification handling."); -SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG, debug, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0, - "Enable IPsec debugging output when set."); +SYSCTL_INT(_net_inet6_ipsec6, OID_AUTO, filtertunnel, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_filtertunnel), 0, + "If set, filter packets from an IPsec tunnel."); SYSCTL_VNET_PCPUSTAT(_net_inet6_ipsec6, IPSECCTL_STATS, ipsecstats, struct ipsecstat, ipsec6stat, "IPsec IPv6 statistics."); #endif /* INET6 */ -static int ipsec_in_reject(struct secpolicy *, const struct mbuf *); -static int ipsec_setspidx_inpcb(const struct mbuf *, struct inpcb *, u_int); -static int ipsec_setspidx(const struct mbuf *, struct secpolicyindex *, int); -static void ipsec4_get_ulp(const struct mbuf *m, struct secpolicyindex *, int); -static int ipsec4_setspidx_ipaddr(const struct mbuf *, struct secpolicyindex *); +static int ipsec_in_reject(struct secpolicy *, struct inpcb *, + const struct mbuf *); + +#ifdef INET +static void ipsec4_get_ulp(const struct mbuf *, struct secpolicyindex *, int); +static void ipsec4_setspidx_ipaddr(const struct mbuf *, + struct secpolicyindex *); +#endif #ifdef INET6 static void ipsec6_get_ulp(const struct mbuf *m, struct secpolicyindex *, int); -static int ipsec6_setspidx_ipaddr(const struct mbuf *, struct secpolicyindex *); +static void ipsec6_setspidx_ipaddr(const struct mbuf *, + struct secpolicyindex *); #endif -static void ipsec_delpcbpolicy(struct inpcbpolicy *); -static struct secpolicy *ipsec_deepcopy_policy(struct secpolicy *src); - -MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy"); /* * Return a held reference to the default SP. */ static struct secpolicy * -key_allocsp_default(const char* where, int tag) +key_allocsp_default(void) { - struct secpolicy *sp; - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP key_allocsp_default from %s:%u\n", where, tag)); - sp = &V_def_policy; - if (sp->policy != IPSEC_POLICY_DISCARD && - sp->policy != IPSEC_POLICY_NONE) { - ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n", - sp->policy, IPSEC_POLICY_NONE)); - sp->policy = IPSEC_POLICY_NONE; - } - key_addref(sp); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP key_allocsp_default returns SP:%p (%u)\n", - sp, sp->refcnt)); - return (sp); + key_addref(V_def_policy); + return (V_def_policy); } -#define KEY_ALLOCSP_DEFAULT() \ - key_allocsp_default(__FILE__, __LINE__) -/* - * For OUTBOUND packet having a socket. Searching SPD for packet, - * and return a pointer to SP. - * OUT: NULL: no apropreate SP found, the following value is set to error. - * 0 : bypass - * EACCES : discard packet. - * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occurred. - * others: a pointer to SP - * - * NOTE: IPv6 mapped adddress concern is implemented here. - */ -struct secpolicy * -ipsec_getpolicy(struct tdb_ident *tdbi, u_int dir) +static void +ipsec_invalidate_cache(struct inpcb *inp, u_int dir) { struct secpolicy *sp; - IPSEC_ASSERT(tdbi != NULL, ("null tdbi")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); - - sp = KEY_ALLOCSP2(tdbi->spi, &tdbi->dst, tdbi->proto, dir); - if (sp == NULL) /*XXX????*/ - sp = KEY_ALLOCSP_DEFAULT(); - IPSEC_ASSERT(sp != NULL, ("null SP")); - return (sp); + INP_WLOCK_ASSERT(inp); + if (dir == IPSEC_DIR_OUTBOUND) { + if (inp->inp_sp->flags & INP_INBOUND_POLICY) + return; + sp = inp->inp_sp->sp_in; + inp->inp_sp->sp_in = NULL; + } else { + if (inp->inp_sp->flags & INP_OUTBOUND_POLICY) + return; + sp = inp->inp_sp->sp_out; + inp->inp_sp->sp_out = NULL; + } + if (sp != NULL) + key_freesp(&sp); /* release extra reference */ } -/* - * For OUTBOUND packet having a socket. Searching SPD for packet, - * and return a pointer to SP. - * OUT: NULL: no apropreate SP found, the following value is set to error. - * 0 : bypass - * EACCES : discard packet. - * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occurred. - * others: a pointer to SP - * - * NOTE: IPv6 mapped adddress concern is implemented here. - */ -static struct secpolicy * -ipsec_getpolicybysock(const struct mbuf *m, u_int dir, struct inpcb *inp, - int *error) +static void +ipsec_cachepolicy(struct inpcb *inp, struct secpolicy *sp, u_int dir) { - struct inpcbpolicy *pcbsp; - struct secpolicy *currsp = NULL; /* Policy on socket. */ - struct secpolicy *sp; + uint32_t genid; + int downgrade; - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(inp != NULL, ("null inpcb")); - IPSEC_ASSERT(error != NULL, ("null error")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); + INP_LOCK_ASSERT(inp); - if (!key_havesp(dir)) { - /* No SP found, use system default. */ - sp = KEY_ALLOCSP_DEFAULT(); - return (sp); + if (dir == IPSEC_DIR_OUTBOUND) { + /* Do we have configured PCB policy? */ + if (inp->inp_sp->flags & INP_OUTBOUND_POLICY) + return; + /* Another thread has already set cached policy */ + if (inp->inp_sp->sp_out != NULL) + return; + /* + * Do not cache OUTBOUND policy if PCB isn't connected, + * i.e. foreign address is INADDR_ANY/UNSPECIFIED. + */ +#ifdef INET + if ((inp->inp_vflag & INP_IPV4) != 0 && + inp->inp_faddr.s_addr == INADDR_ANY) + return; +#endif +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0 && + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + return; +#endif + } else { + /* Do we have configured PCB policy? */ + if (inp->inp_sp->flags & INP_INBOUND_POLICY) + return; + /* Another thread has already set cached policy */ + if (inp->inp_sp->sp_in != NULL) + return; + /* + * Do not cache INBOUND policy for listen socket, + * that is bound to INADDR_ANY/UNSPECIFIED address. + */ +#ifdef INET + if ((inp->inp_vflag & INP_IPV4) != 0 && + inp->inp_faddr.s_addr == INADDR_ANY) + return; +#endif +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0 && + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + return; +#endif } - - /* Set spidx in pcb. */ - *error = ipsec_setspidx_inpcb(m, inp, dir); - if (*error) - return (NULL); - - pcbsp = inp->inp_sp; - IPSEC_ASSERT(pcbsp != NULL, ("null pcbsp")); - switch (dir) { - case IPSEC_DIR_INBOUND: - currsp = pcbsp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - currsp = pcbsp->sp_out; - break; + downgrade = 0; + if (!INP_WLOCKED(inp)) { + if ((downgrade = INP_TRY_UPGRADE(inp)) == 0) + return; } - IPSEC_ASSERT(currsp != NULL, ("null currsp")); - - if (pcbsp->priv) { /* When privilieged socket. */ - switch (currsp->policy) { - case IPSEC_POLICY_BYPASS: - case IPSEC_POLICY_IPSEC: - key_addref(currsp); - sp = currsp; - break; - - case IPSEC_POLICY_ENTRUST: - /* Look for a policy in SPD. */ - sp = KEY_ALLOCSP(&currsp->spidx, dir); - if (sp == NULL) /* No SP found. */ - sp = KEY_ALLOCSP_DEFAULT(); - break; - - default: - ipseclog((LOG_ERR, "%s: Invalid policy for PCB %d\n", - __func__, currsp->policy)); - *error = EINVAL; - return (NULL); - } - } else { /* Unpriv, SPD has policy. */ - sp = KEY_ALLOCSP(&currsp->spidx, dir); - if (sp == NULL) { /* No SP found. */ - switch (currsp->policy) { - case IPSEC_POLICY_BYPASS: - ipseclog((LOG_ERR, "%s: Illegal policy for " - "non-priviliged defined %d\n", - __func__, currsp->policy)); - *error = EINVAL; - return (NULL); - - case IPSEC_POLICY_ENTRUST: - sp = KEY_ALLOCSP_DEFAULT(); - break; - - case IPSEC_POLICY_IPSEC: - key_addref(currsp); - sp = currsp; - break; - - default: - ipseclog((LOG_ERR, "%s: Invalid policy for " - "PCB %d\n", __func__, currsp->policy)); - *error = EINVAL; - return (NULL); - } - } + if (dir == IPSEC_DIR_OUTBOUND) + inp->inp_sp->sp_out = sp; + else + inp->inp_sp->sp_in = sp; + /* + * SP is already referenced by the lookup code. + * We take extra reference here to avoid race in the + * ipsec_getpcbpolicy() function - SP will not be freed in the + * time between we take SP pointer from the cache and key_addref() + * call. + */ + key_addref(sp); + genid = key_getspgen(); + if (genid != inp->inp_sp->genid) { + ipsec_invalidate_cache(inp, dir); + inp->inp_sp->genid = genid; } - IPSEC_ASSERT(sp != NULL, - ("null SP (priv %u policy %u", pcbsp->priv, currsp->policy)); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s (priv %u policy %u) allocate SP:%p (refcnt %u)\n", - __func__, pcbsp->priv, currsp->policy, sp, sp->refcnt)); - return (sp); + KEYDBG(IPSEC_STAMP, + printf("%s: PCB(%p): cached %s SP(%p)\n", + __func__, inp, dir == IPSEC_DIR_OUTBOUND ? "OUTBOUND": + "INBOUND", sp)); + if (downgrade != 0) + INP_DOWNGRADE(inp); } -/* - * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet, - * and return a pointer to SP. - * OUT: positive: a pointer to the entry for security policy leaf matched. - * NULL: no apropreate SP found, the following value is set to error. - * 0 : bypass - * EACCES : discard packet. - * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occurred. - */ -struct secpolicy * -ipsec_getpolicybyaddr(const struct mbuf *m, u_int dir, int *error) +static struct secpolicy * +ipsec_checkpolicy(struct secpolicy *sp, struct inpcb *inp, int *error) { - struct secpolicyindex spidx; - struct secpolicy *sp; - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(error != NULL, ("null error")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); + /* Save found OUTBOUND policy into PCB SP cache. */ + if (inp != NULL && inp->inp_sp != NULL && inp->inp_sp->sp_out == NULL) + ipsec_cachepolicy(inp, sp, IPSEC_DIR_OUTBOUND); - sp = NULL; - *error = 0; - if (key_havesp(dir)) { - /* Make an index to look for a policy. */ - *error = ipsec_setspidx(m, &spidx, 0); - if (*error != 0) { - DPRINTF(("%s: setpidx failed, dir %u\n", - __func__, dir)); - return (NULL); - } - spidx.dir = dir; - sp = KEY_ALLOCSP(&spidx, dir); - } - if (sp == NULL) /* No SP found, use system default. */ - sp = KEY_ALLOCSP_DEFAULT(); - IPSEC_ASSERT(sp != NULL, ("null SP")); - return (sp); -} - -struct secpolicy * -ipsec4_checkpolicy(const struct mbuf *m, u_int dir, int *error, - struct inpcb *inp) -{ - struct secpolicy *sp; - - *error = 0; - if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, dir, error); - else - sp = ipsec_getpolicybysock(m, dir, inp, error); - if (sp == NULL) { - IPSEC_ASSERT(*error != 0, ("getpolicy failed w/o error")); - IPSECSTAT_INC(ips_out_inval); - return (NULL); - } - IPSEC_ASSERT(*error == 0, ("sp w/ error set to %u", *error)); switch (sp->policy) { - case IPSEC_POLICY_ENTRUST: default: printf("%s: invalid policy %u\n", __func__, sp->policy); /* FALLTHROUGH */ case IPSEC_POLICY_DISCARD: - IPSECSTAT_INC(ips_out_polvio); *error = -EINVAL; /* Packet is discarded by caller. */ - break; + /* FALLTHROUGH */ case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: - KEY_FREESP(&sp); + key_freesp(&sp); sp = NULL; /* NB: force NULL result. */ break; case IPSEC_POLICY_IPSEC: - if (sp->req == NULL) /* Acquire a SA. */ - *error = key_spdacquire(sp); + /* XXXAE: handle LARVAL SP */ break; } - if (*error != 0) { - KEY_FREESP(&sp); - sp = NULL; - } + KEYDBG(IPSEC_DUMP, + printf("%s: get SP(%p), error %d\n", __func__, sp, *error)); return (sp); } -static int -ipsec_setspidx_inpcb(const struct mbuf *m, struct inpcb *inp, u_int dir) +static struct secpolicy * +ipsec_getpcbpolicy(struct inpcb *inp, u_int dir) { - struct secpolicyindex *spidx; - int error; + struct secpolicy *sp; + int flags, downgrade; - IPSEC_ASSERT(inp != NULL, ("null inp")); - IPSEC_ASSERT(inp->inp_sp != NULL, ("null inp_sp")); - IPSEC_ASSERT(inp->inp_sp->sp_out != NULL && inp->inp_sp->sp_in != NULL, - ("null sp_in || sp_out")); + if (inp == NULL || inp->inp_sp == NULL) + return (NULL); - if (dir == IPSEC_DIR_INBOUND) - spidx = &inp->inp_sp->sp_in->spidx; - else - spidx = &inp->inp_sp->sp_out->spidx; - error = ipsec_setspidx(m, spidx, 1); - if (error == 0) { - spidx->dir = dir; + INP_LOCK_ASSERT(inp); + + flags = inp->inp_sp->flags; + if (dir == IPSEC_DIR_OUTBOUND) { + sp = inp->inp_sp->sp_out; + flags &= INP_OUTBOUND_POLICY; } else { - bzero(&inp->inp_sp->sp_in->spidx, - sizeof (inp->inp_sp->sp_in->spidx)); - bzero(&inp->inp_sp->sp_out->spidx, - sizeof (inp->inp_sp->sp_in->spidx)); + sp = inp->inp_sp->sp_in; + flags &= INP_INBOUND_POLICY; } - return (error); -} - -/* - * Configure security policy index (src/dst/proto/sport/dport) - * by looking at the content of mbuf. - * The caller is responsible for error recovery (like clearing up spidx). - */ -static int -ipsec_setspidx(const struct mbuf *m, struct secpolicyindex *spidx, - int needport) -{ - struct ip ipbuf; - const struct ip *ip = NULL; - const struct mbuf *n; - u_int v; - int len; - int error; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - /* - * Validate m->m_pkthdr.len. We see incorrect length if we - * mistakenly call this function with inconsistent mbuf chain - * (like 4.4BSD tcp/udp processing). XXX Should we panic here? + * Check flags. If we have PCB SP, just return it. + * Otherwise we need to check that cached SP entry isn't stale. */ - len = 0; - for (n = m; n; n = n->m_next) - len += n->m_len; - if (m->m_pkthdr.len != len) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: pkthdr len(%d) mismatch (%d), ignored.\n", - __func__, len, m->m_pkthdr.len)); - return (EINVAL); - } - - if (m->m_pkthdr.len < sizeof(struct ip)) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: pkthdr len(%d) too small (v4), ignored.\n", - __func__, m->m_pkthdr.len)); - return (EINVAL); - } - - if (m->m_len >= sizeof(*ip)) - ip = mtod(m, const struct ip *); - else { - m_copydata(m, 0, sizeof(ipbuf), (caddr_t)&ipbuf); - ip = &ipbuf; - } - v = ip->ip_v; - switch (v) { - case 4: - error = ipsec4_setspidx_ipaddr(m, spidx); - if (error) - return (error); - ipsec4_get_ulp(m, spidx, needport); - return (0); -#ifdef INET6 - case 6: - if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: pkthdr len(%d) too small (v6), " - "ignored\n", __func__, m->m_pkthdr.len)); - return (EINVAL); + if (flags == 0) { + if (sp == NULL) + return (NULL); + if (inp->inp_sp->genid != key_getspgen()) { + /* Invalidate the cache. */ + downgrade = 0; + if (!INP_WLOCKED(inp)) { + if ((downgrade = INP_TRY_UPGRADE(inp)) == 0) + return (NULL); + } + ipsec_invalidate_cache(inp, IPSEC_DIR_OUTBOUND); + ipsec_invalidate_cache(inp, IPSEC_DIR_INBOUND); + if (downgrade != 0) + INP_DOWNGRADE(inp); + return (NULL); } - error = ipsec6_setspidx_ipaddr(m, spidx); - if (error) - return (error); - ipsec6_get_ulp(m, spidx, needport); - return (0); -#endif - default: - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: " "unknown IP version %u, ignored.\n", - __func__, v)); - return (EINVAL); + KEYDBG(IPSEC_STAMP, + printf("%s: PCB(%p): cache hit SP(%p)\n", + __func__, inp, sp)); + /* Return referenced cached policy */ } + key_addref(sp); + return (sp); } +#ifdef INET static void ipsec4_get_ulp(const struct mbuf *m, struct secpolicyindex *spidx, int needport) { - u_int8_t nxt; + uint8_t nxt; int off; /* Sanity check. */ - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip),("packet too short")); + IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip), + ("packet too short")); if (m->m_len >= sizeof (struct ip)) { const struct ip *ip = mtod(m, const struct ip *); if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) goto done; off = ip->ip_hl << 2; nxt = ip->ip_p; } else { struct ip ih; m_copydata(m, 0, sizeof (struct ip), (caddr_t) &ih); if (ih.ip_off & htons(IP_MF | IP_OFFMASK)) goto done; off = ih.ip_hl << 2; nxt = ih.ip_p; } while (off < m->m_pkthdr.len) { struct ip6_ext ip6e; struct tcphdr th; struct udphdr uh; switch (nxt) { case IPPROTO_TCP: spidx->ul_proto = nxt; if (!needport) goto done_proto; if (off + sizeof(struct tcphdr) > m->m_pkthdr.len) goto done; m_copydata(m, off, sizeof (th), (caddr_t) &th); spidx->src.sin.sin_port = th.th_sport; spidx->dst.sin.sin_port = th.th_dport; return; case IPPROTO_UDP: spidx->ul_proto = nxt; if (!needport) goto done_proto; if (off + sizeof(struct udphdr) > m->m_pkthdr.len) goto done; m_copydata(m, off, sizeof (uh), (caddr_t) &uh); spidx->src.sin.sin_port = uh.uh_sport; spidx->dst.sin.sin_port = uh.uh_dport; return; case IPPROTO_AH: if (off + sizeof(ip6e) > m->m_pkthdr.len) goto done; /* XXX Sigh, this works but is totally bogus. */ m_copydata(m, off, sizeof(ip6e), (caddr_t) &ip6e); off += (ip6e.ip6e_len + 2) << 2; nxt = ip6e.ip6e_nxt; break; case IPPROTO_ICMP: default: /* XXX Intermediate headers??? */ spidx->ul_proto = nxt; goto done_proto; } } done: spidx->ul_proto = IPSEC_ULPROTO_ANY; done_proto: spidx->src.sin.sin_port = IPSEC_PORT_ANY; spidx->dst.sin.sin_port = IPSEC_PORT_ANY; + KEYDBG(IPSEC_DUMP, + printf("%s: ", __func__); kdebug_secpolicyindex(spidx, NULL)); } -/* Assumes that m is sane. */ -static int +static void ipsec4_setspidx_ipaddr(const struct mbuf *m, struct secpolicyindex *spidx) { - static const struct sockaddr_in template = { - sizeof (struct sockaddr_in), - AF_INET, - 0, { 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 } - }; - spidx->src.sin = template; - spidx->dst.sin = template; + ipsec4_setsockaddrs(m, &spidx->src, &spidx->dst); + spidx->prefs = sizeof(struct in_addr) << 3; + spidx->prefd = sizeof(struct in_addr) << 3; +} - if (m->m_len < sizeof (struct ip)) { - m_copydata(m, offsetof(struct ip, ip_src), - sizeof (struct in_addr), - (caddr_t) &spidx->src.sin.sin_addr); - m_copydata(m, offsetof(struct ip, ip_dst), - sizeof (struct in_addr), - (caddr_t) &spidx->dst.sin.sin_addr); - } else { - const struct ip *ip = mtod(m, const struct ip *); - spidx->src.sin.sin_addr = ip->ip_src; - spidx->dst.sin.sin_addr = ip->ip_dst; +static struct secpolicy * +ipsec4_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) +{ + struct secpolicyindex spidx; + struct secpolicy *sp; + + sp = ipsec_getpcbpolicy(inp, dir); + if (sp == NULL && key_havesp(dir)) { + /* Make an index to look for a policy. */ + ipsec4_setspidx_ipaddr(m, &spidx); + /* Fill ports in spidx if we have inpcb. */ + ipsec4_get_ulp(m, &spidx, inp != NULL); + spidx.dir = dir; + sp = key_allocsp(&spidx, dir); } + if (sp == NULL) /* No SP found, use system default. */ + sp = key_allocsp_default(); + return (sp); +} - spidx->prefs = sizeof(struct in_addr) << 3; - spidx->prefd = sizeof(struct in_addr) << 3; +/* + * Check security policy for *OUTBOUND* IPv4 packet. + */ +struct secpolicy * +ipsec4_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error) +{ + struct secpolicy *sp; - return (0); + *error = 0; + sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_OUTBOUND); + if (sp != NULL) + sp = ipsec_checkpolicy(sp, inp, error); + if (sp == NULL) { + switch (*error) { + case 0: /* No IPsec required: BYPASS or NONE */ + break; + case -EINVAL: + IPSECSTAT_INC(ips_out_polvio); + break; + default: + IPSECSTAT_INC(ips_out_inval); + } + } + KEYDBG(IPSEC_STAMP, + printf("%s: using SP(%p), error %d\n", __func__, sp, *error)); + if (sp != NULL) + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); + return (sp); } +/* + * Check IPv4 packet against *INBOUND* security policy. + * This function is called from tcp_input(), udp_input(), + * rip_input() and sctp_input(). + */ +int +ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) +{ + struct secpolicy *sp; + int result; + + sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_INBOUND); + result = ipsec_in_reject(sp, inp, m); + key_freesp(&sp); + if (result != 0) + IPSECSTAT_INC(ips_in_polvio); + return (result); +} + +/* + * IPSEC_CAP() method implementation for IPv4. + */ +int +ipsec4_capability(struct mbuf *m, u_int cap) +{ + + switch (cap) { + case IPSEC_CAP_BYPASS_FILTER: + /* + * Bypass packet filtering for packets previously handled + * by IPsec. + */ + if (!V_ip4_filtertunnel && + m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) + return (1); + return (0); + case IPSEC_CAP_OPERABLE: + /* Do we have active security policies? */ + if (key_havesp(IPSEC_DIR_INBOUND) != 0 || + key_havesp(IPSEC_DIR_OUTBOUND) != 0) + return (1); + return (0); + }; + return (EOPNOTSUPP); +} + +#endif /* INET */ + #ifdef INET6 static void ipsec6_get_ulp(const struct mbuf *m, struct secpolicyindex *spidx, int needport) { - int off, nxt; struct tcphdr th; struct udphdr uh; struct icmp6_hdr ih; + int off, nxt; - /* Sanity check. */ - if (m == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s:\n", __func__); kdebug_mbuf(m)); + IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip6_hdr), + ("packet too short")); /* Set default. */ spidx->ul_proto = IPSEC_ULPROTO_ANY; - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = IPSEC_PORT_ANY; - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = IPSEC_PORT_ANY; + spidx->src.sin6.sin6_port = IPSEC_PORT_ANY; + spidx->dst.sin6.sin6_port = IPSEC_PORT_ANY; nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); if (off < 0 || m->m_pkthdr.len < off) return; switch (nxt) { case IPPROTO_TCP: spidx->ul_proto = nxt; if (!needport) break; if (off + sizeof(struct tcphdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(th), (caddr_t)&th); - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = th.th_sport; - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = th.th_dport; + spidx->src.sin6.sin6_port = th.th_sport; + spidx->dst.sin6.sin6_port = th.th_dport; break; case IPPROTO_UDP: spidx->ul_proto = nxt; if (!needport) break; if (off + sizeof(struct udphdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(uh), (caddr_t)&uh); - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = uh.uh_sport; - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = uh.uh_dport; + spidx->src.sin6.sin6_port = uh.uh_sport; + spidx->dst.sin6.sin6_port = uh.uh_dport; break; case IPPROTO_ICMPV6: spidx->ul_proto = nxt; if (off + sizeof(struct icmp6_hdr) > m->m_pkthdr.len) break; - m_copydata(m, off, sizeof(ih), (caddr_t)&ih); - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = - htons((uint16_t)ih.icmp6_type); - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = - htons((uint16_t)ih.icmp6_code); - break; - default: - /* XXX Intermediate headers??? */ - spidx->ul_proto = nxt; - break; - } -} - -/* Assumes that m is sane. */ -static int -ipsec6_setspidx_ipaddr(const struct mbuf *m, struct secpolicyindex *spidx) -{ - struct ip6_hdr ip6buf; - const struct ip6_hdr *ip6 = NULL; - struct sockaddr_in6 *sin6; - - if (m->m_len >= sizeof(*ip6)) - ip6 = mtod(m, const struct ip6_hdr *); - else { - m_copydata(m, 0, sizeof(ip6buf), (caddr_t)&ip6buf); - ip6 = &ip6buf; - } - - sin6 = (struct sockaddr_in6 *)&spidx->src; - bzero(sin6, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(struct sockaddr_in6); - bcopy(&ip6->ip6_src, &sin6->sin6_addr, sizeof(ip6->ip6_src)); - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); - } - spidx->prefs = sizeof(struct in6_addr) << 3; - - sin6 = (struct sockaddr_in6 *)&spidx->dst; - bzero(sin6, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(struct sockaddr_in6); - bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(ip6->ip6_dst)); - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); - } - spidx->prefd = sizeof(struct in6_addr) << 3; - - return (0); -} -#endif - -int -ipsec_run_hhooks(struct ipsec_ctx_data *ctx, int type) -{ - int idx; - - switch (ctx->af) { -#ifdef INET - case AF_INET: - idx = HHOOK_IPSEC_INET; - break; -#endif -#ifdef INET6 - case AF_INET6: - idx = HHOOK_IPSEC_INET6; - break; -#endif - default: - return (EPFNOSUPPORT); - } - if (type == HHOOK_TYPE_IPSEC_IN) - HHOOKS_RUN_IF(V_ipsec_hhh_in[idx], ctx, NULL); - else - HHOOKS_RUN_IF(V_ipsec_hhh_out[idx], ctx, NULL); - if (*ctx->mp == NULL) - return (EACCES); - return (0); -} - -static void -ipsec_delpcbpolicy(struct inpcbpolicy *p) -{ - - free(p, M_IPSEC_INPCB); -} - -/* Initialize policy in PCB. */ -int -ipsec_init_policy(struct socket *so, struct inpcbpolicy **pcb_sp) -{ - struct inpcbpolicy *new; - - /* Sanity check. */ - if (so == NULL || pcb_sp == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - - new = (struct inpcbpolicy *) malloc(sizeof(struct inpcbpolicy), - M_IPSEC_INPCB, M_NOWAIT|M_ZERO); - if (new == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - return (ENOBUFS); - } - - new->priv = IPSEC_IS_PRIVILEGED_SO(so); - - if ((new->sp_in = KEY_NEWSP()) == NULL) { - ipsec_delpcbpolicy(new); - return (ENOBUFS); - } - new->sp_in->policy = IPSEC_POLICY_ENTRUST; - if ((new->sp_out = KEY_NEWSP()) == NULL) { - KEY_FREESP(&new->sp_in); - ipsec_delpcbpolicy(new); - return (ENOBUFS); - } - new->sp_out->policy = IPSEC_POLICY_ENTRUST; - *pcb_sp = new; - - return (0); -} - -/* Copy old IPsec policy into new. */ -int -ipsec_copy_policy(struct inpcbpolicy *old, struct inpcbpolicy *new) -{ - struct secpolicy *sp; - - sp = ipsec_deepcopy_policy(old->sp_in); - if (sp) { - KEY_FREESP(&new->sp_in); - new->sp_in = sp; - } else - return (ENOBUFS); - - sp = ipsec_deepcopy_policy(old->sp_out); - if (sp) { - KEY_FREESP(&new->sp_out); - new->sp_out = sp; - } else - return (ENOBUFS); - - new->priv = old->priv; - - return (0); -} - -struct ipsecrequest * -ipsec_newisr(void) -{ - struct ipsecrequest *p; - - p = malloc(sizeof(struct ipsecrequest), M_IPSEC_SR, M_NOWAIT|M_ZERO); - if (p != NULL) - IPSECREQUEST_LOCK_INIT(p); - return (p); + m_copydata(m, off, sizeof(ih), (caddr_t)&ih); + spidx->src.sin6.sin6_port = htons((uint16_t)ih.icmp6_type); + spidx->dst.sin6.sin6_port = htons((uint16_t)ih.icmp6_code); + break; + default: + /* XXX Intermediate headers??? */ + spidx->ul_proto = nxt; + break; + } + KEYDBG(IPSEC_DUMP, + printf("%s: ", __func__); kdebug_secpolicyindex(spidx, NULL)); } -void -ipsec_delisr(struct ipsecrequest *p) +static void +ipsec6_setspidx_ipaddr(const struct mbuf *m, struct secpolicyindex *spidx) { - IPSECREQUEST_LOCK_DESTROY(p); - free(p, M_IPSEC_SR); + ipsec6_setsockaddrs(m, &spidx->src, &spidx->dst); + spidx->prefs = sizeof(struct in6_addr) << 3; + spidx->prefd = sizeof(struct in6_addr) << 3; } -/* Deep-copy a policy in PCB. */ static struct secpolicy * -ipsec_deepcopy_policy(struct secpolicy *src) +ipsec6_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) { - struct ipsecrequest *newchain = NULL; - struct ipsecrequest *p; - struct ipsecrequest **q; - struct ipsecrequest *r; - struct secpolicy *dst; - - if (src == NULL) - return (NULL); - dst = KEY_NEWSP(); - if (dst == NULL) - return (NULL); - - /* - * Deep-copy IPsec request chain. This is required since struct - * ipsecrequest is not reference counted. - */ - q = &newchain; - for (p = src->req; p; p = p->next) { - *q = ipsec_newisr(); - if (*q == NULL) - goto fail; - (*q)->saidx.proto = p->saidx.proto; - (*q)->saidx.mode = p->saidx.mode; - (*q)->level = p->level; - (*q)->saidx.reqid = p->saidx.reqid; - - bcopy(&p->saidx.src, &(*q)->saidx.src, sizeof((*q)->saidx.src)); - bcopy(&p->saidx.dst, &(*q)->saidx.dst, sizeof((*q)->saidx.dst)); - - (*q)->sp = dst; - - q = &((*q)->next); - } - - dst->req = newchain; - dst->policy = src->policy; - /* Do not touch the refcnt fields. */ - - return (dst); + struct secpolicyindex spidx; + struct secpolicy *sp; -fail: - for (p = newchain; p; p = r) { - r = p->next; - ipsec_delisr(p); - p = NULL; + sp = ipsec_getpcbpolicy(inp, dir); + if (sp == NULL && key_havesp(dir)) { + /* Make an index to look for a policy. */ + ipsec6_setspidx_ipaddr(m, &spidx); + /* Fill ports in spidx if we have inpcb. */ + ipsec6_get_ulp(m, &spidx, inp != NULL); + spidx.dir = dir; + sp = key_allocsp(&spidx, dir); } - KEY_FREESP(&dst); - return (NULL); + if (sp == NULL) /* No SP found, use system default. */ + sp = key_allocsp_default(); + return (sp); } -/* Set policy and IPsec request if present. */ -static int -ipsec_set_policy_internal(struct secpolicy **pcb_sp, int optname, - caddr_t request, size_t len, struct ucred *cred) +/* + * Check security policy for *OUTBOUND* IPv6 packet. + */ +struct secpolicy * +ipsec6_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error) { - struct sadb_x_policy *xpl; - struct secpolicy *newsp = NULL; - int error; + struct secpolicy *sp; - /* Sanity check. */ - if (pcb_sp == NULL || *pcb_sp == NULL || request == NULL) - return (EINVAL); - if (len < sizeof(*xpl)) - return (EINVAL); - xpl = (struct sadb_x_policy *)request; - - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: passed policy\n", __func__); - kdebug_sadb_x_policy((struct sadb_ext *)xpl)); - - /* Check policy type. */ - /* ipsec_set_policy_internal() accepts IPSEC, ENTRUST and BYPASS. */ - if (xpl->sadb_x_policy_type == IPSEC_POLICY_DISCARD - || xpl->sadb_x_policy_type == IPSEC_POLICY_NONE) - return (EINVAL); - - /* Check privileged socket. */ - if (cred != NULL && xpl->sadb_x_policy_type == IPSEC_POLICY_BYPASS) { - error = priv_check_cred(cred, PRIV_NETINET_IPSEC, 0); - if (error) - return (EACCES); + *error = 0; + sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_OUTBOUND); + if (sp != NULL) + sp = ipsec_checkpolicy(sp, inp, error); + if (sp == NULL) { + switch (*error) { + case 0: /* No IPsec required: BYPASS or NONE */ + break; + case -EINVAL: + IPSEC6STAT_INC(ips_out_polvio); + break; + default: + IPSEC6STAT_INC(ips_out_inval); + } } + KEYDBG(IPSEC_STAMP, + printf("%s: using SP(%p), error %d\n", __func__, sp, *error)); + if (sp != NULL) + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); + return (sp); +} - /* Allocating new SP entry. */ - if ((newsp = key_msg2sp(xpl, len, &error)) == NULL) - return (error); - - /* Clear old SP and set new SP. */ - KEY_FREESP(pcb_sp); - *pcb_sp = newsp; - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: new policy\n", __func__); - kdebug_secpolicy(newsp)); +/* + * Check IPv6 packet against inbound security policy. + * This function is called from tcp6_input(), udp6_input(), + * rip6_input() and sctp_input(). + */ +int +ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) +{ + struct secpolicy *sp; + int result; - return (0); + sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_INBOUND); + result = ipsec_in_reject(sp, inp, m); + key_freesp(&sp); + if (result) + IPSEC6STAT_INC(ips_in_polvio); + return (result); } +/* + * IPSEC_CAP() method implementation for IPv6. + */ int -ipsec_set_policy(struct inpcb *inp, int optname, caddr_t request, - size_t len, struct ucred *cred) +ipsec6_capability(struct mbuf *m, u_int cap) { - struct sadb_x_policy *xpl; - struct secpolicy **pcb_sp; - - /* Sanity check. */ - if (inp == NULL || request == NULL) - return (EINVAL); - if (len < sizeof(*xpl)) - return (EINVAL); - xpl = (struct sadb_x_policy *)request; - - /* Select direction. */ - switch (xpl->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - pcb_sp = &inp->inp_sp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - pcb_sp = &inp->inp_sp->sp_out; - break; - default: - ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, - xpl->sadb_x_policy_dir)); - return (EINVAL); - } - return (ipsec_set_policy_internal(pcb_sp, optname, request, len, cred)); + switch (cap) { + case IPSEC_CAP_BYPASS_FILTER: + /* + * Bypass packet filtering for packets previously handled + * by IPsec. + */ + if (!V_ip6_filtertunnel && + m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) + return (1); + return (0); + case IPSEC_CAP_OPERABLE: + /* Do we have active security policies? */ + if (key_havesp(IPSEC_DIR_INBOUND) != 0 || + key_havesp(IPSEC_DIR_OUTBOUND) != 0) + return (1); + return (0); + }; + return (EOPNOTSUPP); } +#endif /* INET6 */ int -ipsec_get_policy(struct inpcb *inp, caddr_t request, size_t len, - struct mbuf **mp) +ipsec_run_hhooks(struct ipsec_ctx_data *ctx, int type) { - struct sadb_x_policy *xpl; - struct secpolicy *pcb_sp; + int idx; - /* Sanity check. */ - if (inp == NULL || request == NULL || mp == NULL) - return (EINVAL); - IPSEC_ASSERT(inp->inp_sp != NULL, ("null inp_sp")); - if (len < sizeof(*xpl)) - return (EINVAL); - xpl = (struct sadb_x_policy *)request; - - /* Select direction. */ - switch (xpl->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - pcb_sp = inp->inp_sp->sp_in; + switch (ctx->af) { +#ifdef INET + case AF_INET: + idx = HHOOK_IPSEC_INET; break; - case IPSEC_DIR_OUTBOUND: - pcb_sp = inp->inp_sp->sp_out; +#endif +#ifdef INET6 + case AF_INET6: + idx = HHOOK_IPSEC_INET6; break; +#endif default: - ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, - xpl->sadb_x_policy_dir)); - return (EINVAL); - } - - /* Sanity check. Should be an IPSEC_ASSERT. */ - if (pcb_sp == NULL) - return (EINVAL); - - *mp = key_sp2msg(pcb_sp); - if (!*mp) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - return (ENOBUFS); + return (EPFNOSUPPORT); } - - (*mp)->m_type = MT_DATA; - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s:\n", __func__); kdebug_mbuf(*mp)); - - return (0); -} - -/* Delete policy in PCB. */ -int -ipsec_delete_pcbpolicy(struct inpcb *inp) -{ - IPSEC_ASSERT(inp != NULL, ("null inp")); - - if (inp->inp_sp == NULL) - return (0); - - if (inp->inp_sp->sp_in != NULL) - KEY_FREESP(&inp->inp_sp->sp_in); - - if (inp->inp_sp->sp_out != NULL) - KEY_FREESP(&inp->inp_sp->sp_out); - - ipsec_delpcbpolicy(inp->inp_sp); - inp->inp_sp = NULL; - + if (type == HHOOK_TYPE_IPSEC_IN) + HHOOKS_RUN_IF(V_ipsec_hhh_in[idx], ctx, NULL); + else + HHOOKS_RUN_IF(V_ipsec_hhh_out[idx], ctx, NULL); + if (*ctx->mp == NULL) + return (EACCES); return (0); } /* * Return current level. * Either IPSEC_LEVEL_USE or IPSEC_LEVEL_REQUIRE are always returned. */ u_int -ipsec_get_reqlevel(struct ipsecrequest *isr) +ipsec_get_reqlevel(struct secpolicy *sp, u_int idx) { - u_int level = 0; + struct ipsecrequest *isr; u_int esp_trans_deflev, esp_net_deflev; u_int ah_trans_deflev, ah_net_deflev; + u_int level = 0; - IPSEC_ASSERT(isr != NULL && isr->sp != NULL, ("null argument")); - IPSEC_ASSERT(isr->sp->spidx.src.sa.sa_family == isr->sp->spidx.dst.sa.sa_family, - ("af family mismatch, src %u, dst %u", - isr->sp->spidx.src.sa.sa_family, - isr->sp->spidx.dst.sa.sa_family)); - + IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); /* XXX Note that we have ipseclog() expanded here - code sync issue. */ #define IPSEC_CHECK_DEFAULT(lev) \ - (((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE \ - && (lev) != IPSEC_LEVEL_UNIQUE) \ - ? (V_ipsec_debug \ - ? log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\ - (lev), IPSEC_LEVEL_REQUIRE) \ - : 0), \ - (lev) = IPSEC_LEVEL_REQUIRE, \ - (lev) \ - : (lev)) + (((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE && \ + (lev) != IPSEC_LEVEL_UNIQUE) \ + ? (V_ipsec_debug ? \ + log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\ + (lev), IPSEC_LEVEL_REQUIRE) : 0), \ + (lev) = IPSEC_LEVEL_REQUIRE, (lev) : (lev)) + + /* + * IPsec VTI uses unique security policy with fake spidx filled + * with zeroes. Just return IPSEC_LEVEL_REQUIRE instead of doing + * full level lookup for such policies. + */ + if (sp->state == IPSEC_SPSTATE_IFNET) { + IPSEC_ASSERT(sp->req[idx]->level == IPSEC_LEVEL_UNIQUE, + ("Wrong IPsec request level %d", sp->req[idx]->level)); + return (IPSEC_LEVEL_REQUIRE); + } /* Set default level. */ - switch (((struct sockaddr *)&isr->sp->spidx.src)->sa_family) { + switch (sp->spidx.src.sa.sa_family) { #ifdef INET case AF_INET: esp_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip4_esp_trans_deflev); esp_net_deflev = IPSEC_CHECK_DEFAULT(V_ip4_esp_net_deflev); ah_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip4_ah_trans_deflev); ah_net_deflev = IPSEC_CHECK_DEFAULT(V_ip4_ah_net_deflev); break; #endif #ifdef INET6 case AF_INET6: esp_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip6_esp_trans_deflev); esp_net_deflev = IPSEC_CHECK_DEFAULT(V_ip6_esp_net_deflev); ah_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip6_ah_trans_deflev); ah_net_deflev = IPSEC_CHECK_DEFAULT(V_ip6_ah_net_deflev); break; #endif /* INET6 */ default: panic("%s: unknown af %u", - __func__, isr->sp->spidx.src.sa.sa_family); + __func__, sp->spidx.src.sa.sa_family); } #undef IPSEC_CHECK_DEFAULT + isr = sp->req[idx]; /* Set level. */ switch (isr->level) { case IPSEC_LEVEL_DEFAULT: switch (isr->saidx.proto) { case IPPROTO_ESP: if (isr->saidx.mode == IPSEC_MODE_TUNNEL) level = esp_net_deflev; else level = esp_trans_deflev; break; case IPPROTO_AH: if (isr->saidx.mode == IPSEC_MODE_TUNNEL) level = ah_net_deflev; else level = ah_trans_deflev; break; case IPPROTO_IPCOMP: /* * We don't really care, as IPcomp document says that * we shouldn't compress small packets. */ level = IPSEC_LEVEL_USE; break; default: panic("%s: Illegal protocol defined %u\n", __func__, isr->saidx.proto); } break; case IPSEC_LEVEL_USE: case IPSEC_LEVEL_REQUIRE: level = isr->level; break; case IPSEC_LEVEL_UNIQUE: level = IPSEC_LEVEL_REQUIRE; break; default: panic("%s: Illegal IPsec level %u\n", __func__, isr->level); } return (level); } +static int +ipsec_check_history(const struct mbuf *m, struct secpolicy *sp, u_int idx) +{ + struct xform_history *xh; + struct m_tag *mtag; + + mtag = NULL; + while ((mtag = m_tag_find(__DECONST(struct mbuf *, m), + PACKET_TAG_IPSEC_IN_DONE, mtag)) != NULL) { + xh = (struct xform_history *)(mtag + 1); + KEYDBG(IPSEC_DATA, + char buf[IPSEC_ADDRSTRLEN]; + printf("%s: mode %s proto %u dst %s\n", __func__, + kdebug_secasindex_mode(xh->mode), xh->proto, + ipsec_address(&xh->dst, buf, sizeof(buf)))); + if (xh->proto != sp->req[idx]->saidx.proto) + continue; + /* If SA had IPSEC_MODE_ANY, consider this as match. */ + if (xh->mode != sp->req[idx]->saidx.mode && + xh->mode != IPSEC_MODE_ANY) + continue; + /* + * For transport mode IPsec request doesn't contain + * addresses. We need to use address from spidx. + */ + if (sp->req[idx]->saidx.mode == IPSEC_MODE_TRANSPORT) { + if (key_sockaddrcmp_withmask(&xh->dst.sa, + &sp->spidx.dst.sa, sp->spidx.prefd) != 0) + continue; + } else { + if (key_sockaddrcmp(&xh->dst.sa, + &sp->req[idx]->saidx.dst.sa, 0) != 0) + continue; + } + return (0); /* matched */ + } + return (1); +} + /* * Check security policy requirements against the actual * packet contents. Return one if the packet should be * reject as "invalid"; otherwiser return zero to have the * packet treated as "valid". * * OUT: * 0: valid * 1: invalid */ static int -ipsec_in_reject(struct secpolicy *sp, const struct mbuf *m) +ipsec_in_reject(struct secpolicy *sp, struct inpcb *inp, const struct mbuf *m) { - struct ipsecrequest *isr; - int need_auth; + int i; + + KEYDBG(IPSEC_STAMP, + printf("%s: PCB(%p): using SP(%p)\n", __func__, inp, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("%s: using SP\n", __func__); kdebug_secpolicy(sp)); + if (inp != NULL && inp->inp_sp != NULL && inp->inp_sp->sp_in == NULL) + ipsec_cachepolicy(inp, sp, IPSEC_DIR_INBOUND); /* Check policy. */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: return (1); case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: return (0); } IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC, ("invalid policy %u", sp->policy)); - /* XXX Should compare policy against IPsec header history. */ - - need_auth = 0; - for (isr = sp->req; isr != NULL; isr = isr->next) { - if (ipsec_get_reqlevel(isr) != IPSEC_LEVEL_REQUIRE) + /* + * ipsec[46]_common_input_cb after each transform adds + * PACKET_TAG_IPSEC_IN_DONE mbuf tag. It contains SPI, proto, mode + * and destination address from saidx. We can compare info from + * these tags with requirements in SP. + */ + for (i = 0; i < sp->tcount; i++) { + /* + * Do not check IPcomp, since IPcomp document + * says that we shouldn't compress small packets. + * IPComp policy should always be treated as being + * in "use" level. + */ + if (sp->req[i]->saidx.proto == IPPROTO_IPCOMP || + ipsec_get_reqlevel(sp, i) != IPSEC_LEVEL_REQUIRE) continue; - switch (isr->saidx.proto) { + if (V_check_policy_history != 0 && + ipsec_check_history(m, sp, i) != 0) + return (1); + else switch (sp->req[i]->saidx.proto) { case IPPROTO_ESP: if ((m->m_flags & M_DECRYPTED) == 0) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, + KEYDBG(IPSEC_DUMP, printf("%s: ESP m_flags:%x\n", __func__, m->m_flags)); return (1); } - - if (!need_auth && - isr->sav != NULL && - isr->sav->tdb_authalgxform != NULL && - (m->m_flags & M_AUTHIPDGM) == 0) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: ESP/AH m_flags:%x\n", __func__, - m->m_flags)); - return (1); - } break; case IPPROTO_AH: - need_auth = 1; if ((m->m_flags & M_AUTHIPHDR) == 0) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, + KEYDBG(IPSEC_DUMP, printf("%s: AH m_flags:%x\n", __func__, m->m_flags)); return (1); } break; - case IPPROTO_IPCOMP: - /* - * We don't really care, as IPcomp document - * says that we shouldn't compress small - * packets. IPComp policy should always be - * treated as being in "use" level. - */ - break; } } return (0); /* Valid. */ } -/* - * Non zero return value means security policy DISCARD or policy violation. - */ -static int -ipsec46_in_reject(const struct mbuf *m, struct inpcb *inp) -{ - struct secpolicy *sp; - int error; - int result; - - if (!key_havesp(IPSEC_DIR_INBOUND)) - return 0; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - - /* Get SP for this packet. */ - if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, &error); - else - sp = ipsec_getpolicybysock(m, IPSEC_DIR_INBOUND, inp, &error); - - if (sp != NULL) { - result = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - } else { - result = 1; /* treat errors as policy violation */ - } - return (result); -} - -/* - * Check AH/ESP integrity. - * This function is called from tcp_input(), udp_input(), - * and {ah,esp}4_input for tunnel mode. - */ -int -ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) -{ - int result; - - result = ipsec46_in_reject(m, inp); - if (result) - IPSECSTAT_INC(ips_in_polvio); - - return (result); -} - -#ifdef INET6 -/* - * Check AH/ESP integrity. - * This function is called from tcp6_input(), udp6_input(), - * and {ah,esp}6_input for tunnel mode. - */ -int -ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) -{ - int result; - - result = ipsec46_in_reject(m, inp); - if (result) - IPSEC6STAT_INC(ips_in_polvio); - - return (result); -} -#endif - /* * Compute the byte size to be occupied by IPsec header. * In case it is tunnelled, it includes the size of outer IP header. - * NOTE: SP passed is freed in this function. */ static size_t ipsec_hdrsiz_internal(struct secpolicy *sp) { - struct ipsecrequest *isr; size_t size; + int i; - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("%s: using SP\n", __func__); kdebug_secpolicy(sp)); + KEYDBG(IPSEC_STAMP, printf("%s: using SP(%p)\n", __func__, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); switch (sp->policy) { case IPSEC_POLICY_DISCARD: case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: return (0); } IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC, ("invalid policy %u", sp->policy)); + /* + * XXX: for each transform we need to lookup suitable SA + * and use info from SA to calculate headers size. + * XXX: for NAT-T we need to cosider UDP header size. + */ size = 0; - for (isr = sp->req; isr != NULL; isr = isr->next) { - size_t clen = 0; - - switch (isr->saidx.proto) { + for (i = 0; i < sp->tcount; i++) { + switch (sp->req[i]->saidx.proto) { case IPPROTO_ESP: - clen = esp_hdrsiz(isr->sav); + size += esp_hdrsiz(NULL); break; case IPPROTO_AH: - clen = ah_hdrsiz(isr->sav); + size += ah_hdrsiz(NULL); break; case IPPROTO_IPCOMP: - clen = sizeof(struct ipcomp); + size += sizeof(struct ipcomp); break; } - if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { - switch (isr->saidx.dst.sa.sa_family) { + if (sp->req[i]->saidx.mode == IPSEC_MODE_TUNNEL) { + switch (sp->req[i]->saidx.dst.sa.sa_family) { +#ifdef INET case AF_INET: - clen += sizeof(struct ip); + size += sizeof(struct ip); break; +#endif #ifdef INET6 case AF_INET6: - clen += sizeof(struct ip6_hdr); + size += sizeof(struct ip6_hdr); break; #endif default: ipseclog((LOG_ERR, "%s: unknown AF %d in " "IPsec tunnel SA\n", __func__, - ((struct sockaddr *)&isr->saidx.dst)->sa_family)); + sp->req[i]->saidx.dst.sa.sa_family)); break; } } - size += clen; } - return (size); } -/* - * This function is called from ipsec_hdrsiz_tcp(), ip_ipsec_mtu(), - * disabled ip6_ipsec_mtu() and ip6_forward(). +/* + * Compute ESP/AH header size for protocols with PCB, including + * outer IP header. Currently only tcp_output() uses it. */ size_t -ipsec_hdrsiz(const struct mbuf *m, u_int dir, struct inpcb *inp) +ipsec_hdrsiz_inpcb(struct inpcb *inp) { + struct secpolicyindex spidx; struct secpolicy *sp; - int error; - size_t size; - - if (!key_havesp(dir)) - return 0; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - - /* Get SP for this packet. */ - if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, dir, &error); - else - sp = ipsec_getpolicybysock(m, dir, inp, &error); + size_t sz; - if (sp != NULL) { - size = ipsec_hdrsiz_internal(sp); - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("%s: size:%lu.\n", __func__, - (unsigned long)size)); - - KEY_FREESP(&sp); - } else { - size = 0; /* XXX Should be panic? - * -> No, we are called w/o knowing if - * IPsec processing is needed. */ + sp = ipsec_getpcbpolicy(inp, IPSEC_DIR_OUTBOUND); + if (sp == NULL && key_havesp(IPSEC_DIR_OUTBOUND)) { + ipsec_setspidx_inpcb(inp, &spidx, IPSEC_DIR_OUTBOUND); + sp = key_allocsp(&spidx, IPSEC_DIR_OUTBOUND); } - return (size); + if (sp == NULL) + sp = key_allocsp_default(); + sz = ipsec_hdrsiz_internal(sp); + key_freesp(&sp); + return (sz); } /* * Check the variable replay window. * ipsec_chkreplay() performs replay check before ICV verification. * ipsec_updatereplay() updates replay bitmap. This must be called after * ICV verification (it also performs replay check, which is usually done * beforehand). * 0 (zero) is returned if packet disallowed, 1 if packet permitted. * * Based on RFC 6479. Blocks are 32 bits unsigned integers */ #define IPSEC_BITMAP_INDEX_MASK(w) (w - 1) #define IPSEC_REDUNDANT_BIT_SHIFTS 5 #define IPSEC_REDUNDANT_BITS (1 << IPSEC_REDUNDANT_BIT_SHIFTS) #define IPSEC_BITMAP_LOC_MASK (IPSEC_REDUNDANT_BITS - 1) int -ipsec_chkreplay(u_int32_t seq, struct secasvar *sav) +ipsec_chkreplay(uint32_t seq, struct secasvar *sav) { const struct secreplay *replay; - u_int32_t wsizeb; /* Constant: window size. */ - int ret, index, bit_location; + uint32_t wsizeb; /* Constant: window size. */ + int index, bit_location; IPSEC_ASSERT(sav != NULL, ("Null SA")); IPSEC_ASSERT(sav->replay != NULL, ("Null replay state")); - SECASVAR_LOCK(sav); - - ret = 0; replay = sav->replay; /* No need to check replay if disabled. */ if (replay->wsize == 0) - goto allowed; + return (1); /* Constant. */ wsizeb = replay->wsize << 3; /* Sequence number of 0 is invalid. */ if (seq == 0) - goto end; + return (0); /* First time is always okay. */ if (replay->count == 0) - goto allowed; + return (1); /* Larger sequences are okay. */ if (seq > replay->lastseq) - goto allowed; + return (1); /* Over range to check, i.e. too old or wrapped. */ if (replay->lastseq - seq >= wsizeb) - goto end; + return (0); /* The sequence is inside the sliding window * now check the bit in the bitmap * bit location only depends on the sequence number */ bit_location = seq & IPSEC_BITMAP_LOC_MASK; index = (seq >> IPSEC_REDUNDANT_BIT_SHIFTS) & IPSEC_BITMAP_INDEX_MASK(replay->bitmap_size); /* This packet already seen? */ if ((replay->bitmap)[index] & (1 << bit_location)) - goto end; - -allowed: - ret = 1; -end: - SECASVAR_UNLOCK(sav); - - return (ret); + return (0); + return (1); } /* * Check replay counter whether to update or not. * OUT: 0: OK * 1: NG */ int -ipsec_updatereplay(u_int32_t seq, struct secasvar *sav) +ipsec_updatereplay(uint32_t seq, struct secasvar *sav) { char buf[128]; struct secreplay *replay; - u_int32_t wsizeb; /* Constant: window size. */ - int ret, diff, index, bit_location; + uint32_t wsizeb; /* Constant: window size. */ + int diff, index, bit_location; IPSEC_ASSERT(sav != NULL, ("Null SA")); IPSEC_ASSERT(sav->replay != NULL, ("Null replay state")); - SECASVAR_LOCK(sav); - - ret = 1; replay = sav->replay; if (replay->wsize == 0) goto ok; /* No need to check replay. */ /* Constant. */ wsizeb = replay->wsize << 3; /* Sequence number of 0 is invalid. */ if (seq == 0) - goto end; + return (1); /* The packet is too old, no need to update */ if (wsizeb + seq < replay->lastseq) goto ok; /* Now update the bit */ index = (seq >> IPSEC_REDUNDANT_BIT_SHIFTS); /* First check if the sequence number is in the range */ if (seq > replay->lastseq) { int id; int index_cur = replay->lastseq >> IPSEC_REDUNDANT_BIT_SHIFTS; diff = index - index_cur; if (diff > replay->bitmap_size) { /* something unusual in this case */ diff = replay->bitmap_size; } for (id = 0; id < diff; ++id) { replay->bitmap[(id + index_cur + 1) & IPSEC_BITMAP_INDEX_MASK(replay->bitmap_size)] = 0; } replay->lastseq = seq; } index &= IPSEC_BITMAP_INDEX_MASK(replay->bitmap_size); bit_location = seq & IPSEC_BITMAP_LOC_MASK; /* this packet has already been received */ if (replay->bitmap[index] & (1 << bit_location)) - goto end; + return (1); replay->bitmap[index] |= (1 << bit_location); ok: if (replay->count == ~0) { /* Set overflow flag. */ replay->overflow++; /* Don't increment, no more packets accepted. */ - if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0) - goto end; + if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0) { + if (sav->sah->saidx.proto == IPPROTO_AH) + AHSTAT_INC(ahs_wrap); + else if (sav->sah->saidx.proto == IPPROTO_ESP) + ESPSTAT_INC(esps_wrap); + return (1); + } ipseclog((LOG_WARNING, "%s: replay counter made %d cycle. %s\n", __func__, replay->overflow, - ipsec_logsastr(sav, buf, sizeof(buf)))); + ipsec_sa2str(sav, buf, sizeof(buf)))); } - - ret = 0; - -end: - SECASVAR_UNLOCK(sav); - return (ret); + return (0); } -/* Return a printable string for the address. */ -char* -ipsec_address(union sockaddr_union* sa, char *buf, socklen_t size) +int +ipsec_updateid(struct secasvar *sav, uint64_t *new, uint64_t *old) { + uint64_t tmp; - switch (sa->sa.sa_family) { -#ifdef INET - case AF_INET: - return (inet_ntop(AF_INET, &sa->sin.sin_addr, buf, size)); -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - return (inet_ntop(AF_INET6, &sa->sin6.sin6_addr, buf, size)); -#endif /* INET6 */ - default: - return ("(unknown address family)"); + /* + * tdb_cryptoid is initialized by xform_init(). + * Then it can be changed only when some crypto error occurred or + * when SA is deleted. We stored used cryptoid in the xform_data + * structure. In case when crypto error occurred and crypto + * subsystem has reinited the session, it returns new cryptoid + * and EAGAIN error code. + * + * This function will be called when we got EAGAIN from crypto + * subsystem. + * *new is cryptoid that was returned by crypto subsystem in + * the crp_sid. + * *old is the original cryptoid that we stored in xform_data. + * + * For first failed request *old == sav->tdb_cryptoid, then + * we update sav->tdb_cryptoid and redo crypto_dispatch(). + * For next failed request *old != sav->tdb_cryptoid, then + * we store cryptoid from first request into the *new variable + * and crp_sid from this second session will be returned via + * *old pointer, so caller can release second session. + * + * XXXAE: check this more carefully. + */ + KEYDBG(IPSEC_STAMP, + printf("%s: SA(%p) moves cryptoid %jd -> %jd\n", + __func__, sav, (uintmax_t)(*old), (uintmax_t)(*new))); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + SECASVAR_LOCK(sav); + if (sav->tdb_cryptoid != *old) { + /* cryptoid was already updated */ + tmp = *new; + *new = sav->tdb_cryptoid; + *old = tmp; + SECASVAR_UNLOCK(sav); + return (1); } + sav->tdb_cryptoid = *new; + SECASVAR_UNLOCK(sav); + return (0); } -char * -ipsec_logsastr(struct secasvar *sav, char *buf, size_t size) -{ - char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; - - IPSEC_ASSERT(sav->sah->saidx.src.sa.sa_family == - sav->sah->saidx.dst.sa.sa_family, ("address family mismatch")); - - snprintf(buf, size, "SA(SPI=%08lx src=%s dst=%s)", - (u_long)ntohl(sav->spi), - ipsec_address(&sav->sah->saidx.src, sbuf, sizeof(sbuf)), - ipsec_address(&sav->sah->saidx.dst, dbuf, sizeof(dbuf))); - return (buf); -} - -void -ipsec_dumpmbuf(const struct mbuf *m) +int +ipsec_initialized(void) { - const u_char *p; - int totlen; - int i; - totlen = 0; - printf("---\n"); - while (m) { - p = mtod(m, const u_char *); - for (i = 0; i < m->m_len; i++) { - printf("%02x ", p[i]); - totlen++; - if (totlen % 16 == 0) - printf("\n"); - } - m = m->m_next; - } - if (totlen % 16 != 0) - printf("\n"); - printf("---\n"); + return (V_def_policy != NULL); } static void def_policy_init(const void *unused __unused) { - bzero(&V_def_policy, sizeof(struct secpolicy)); - V_def_policy.policy = IPSEC_POLICY_NONE; - V_def_policy.refcnt = 1; + V_def_policy = key_newsp(); + if (V_def_policy != NULL) { + V_def_policy->policy = IPSEC_POLICY_NONE; + /* Force INPCB SP cache invalidation */ + key_bumpspgen(); + } else + printf("%s: failed to initialize default policy\n", __func__); } -VNET_SYSINIT(def_policy_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, - def_policy_init, NULL); - -/* XXX This stuff doesn't belong here... */ -static struct xformsw* xforms = NULL; - -/* - * Register a transform; typically at system startup. - */ -void -xform_register(struct xformsw* xsp) +static void +def_policy_uninit(const void *unused __unused) { - xsp->xf_next = xforms; - xforms = xsp; + if (V_def_policy != NULL) { + key_freesp(&V_def_policy); + key_bumpspgen(); + } } -/* - * Initialize transform support in an sav. - */ -int -xform_init(struct secasvar *sav, int xftype) -{ - struct xformsw *xsp; - - if (sav->tdb_xform != NULL) /* Previously initialized. */ - return (0); - for (xsp = xforms; xsp; xsp = xsp->xf_next) - if (xsp->xf_type == xftype) - return ((*xsp->xf_init)(sav, xsp)); - return (EINVAL); -} +VNET_SYSINIT(def_policy_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, + def_policy_init, NULL); +VNET_SYSUNINIT(def_policy_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, + def_policy_uninit, NULL); diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index 0e427e3ced25..64b9e0a7df1b 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -1,373 +1,355 @@ /* $FreeBSD$ */ /* $KAME: ipsec.h,v 1.53 2001/11/20 08:32:38 itojun Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * IPsec controller part. */ #ifndef _NETIPSEC_IPSEC_H_ #define _NETIPSEC_IPSEC_H_ #if defined(_KERNEL) && !defined(_LKM) && !defined(KLD_MODULE) #include "opt_inet.h" #include "opt_ipsec.h" #endif #include #include #ifdef _KERNEL #include #include #include #define IPSEC_ASSERT(_c,_m) KASSERT(_c, _m) -#define IPSEC_IS_PRIVILEGED_SO(_so) \ - ((_so)->so_cred != NULL && \ - priv_check_cred((_so)->so_cred, PRIV_NETINET_IPSEC, 0) \ - == 0) - /* * Security Policy Index * Ensure that both address families in the "src" and "dst" are same. * When the value of the ul_proto is ICMPv6, the port field in "src" * specifies ICMPv6 type, and the port field in "dst" specifies ICMPv6 code. */ struct secpolicyindex { - u_int8_t dir; /* direction of packet flow, see below */ union sockaddr_union src; /* IP src address for SP */ union sockaddr_union dst; /* IP dst address for SP */ - u_int8_t prefs; /* prefix length in bits for src */ - u_int8_t prefd; /* prefix length in bits for dst */ - u_int16_t ul_proto; /* upper layer Protocol */ -#ifdef notyet - uid_t uids; - uid_t uidd; - gid_t gids; - gid_t gidd; -#endif + uint8_t ul_proto; /* upper layer Protocol */ + uint8_t dir; /* direction of packet flow */ + uint8_t prefs; /* prefix length in bits for src */ + uint8_t prefd; /* prefix length in bits for dst */ +}; + +/* Request for IPsec */ +struct ipsecrequest { + struct secasindex saidx;/* hint for search proper SA */ + /* if __ss_len == 0 then no address specified.*/ + u_int level; /* IPsec level defined below. */ }; /* Security Policy Data Base */ struct secpolicy { TAILQ_ENTRY(secpolicy) chain; + LIST_ENTRY(secpolicy) idhash; + LIST_ENTRY(secpolicy) drainq; struct secpolicyindex spidx; /* selector */ - struct ipsecrequest *req; - /* pointer to the ipsec request tree, */ - /* if policy == IPSEC else this value == NULL.*/ - u_int refcnt; /* reference count */ +#define IPSEC_MAXREQ 4 + struct ipsecrequest *req[IPSEC_MAXREQ]; + u_int tcount; /* IPsec transforms count */ + volatile u_int refcnt; /* reference count */ u_int policy; /* policy_type per pfkeyv2.h */ u_int state; #define IPSEC_SPSTATE_DEAD 0 -#define IPSEC_SPSTATE_ALIVE 1 - u_int32_t priority; /* priority of this policy */ - u_int32_t id; /* It's unique number on the system. */ +#define IPSEC_SPSTATE_LARVAL 1 +#define IPSEC_SPSTATE_ALIVE 2 +#define IPSEC_SPSTATE_PCB 3 +#define IPSEC_SPSTATE_IFNET 4 + uint32_t priority; /* priority of this policy */ + uint32_t id; /* It's unique number on the system. */ /* * lifetime handler. * the policy can be used without limitiation if both lifetime and * validtime are zero. * "lifetime" is passed by sadb_lifetime.sadb_lifetime_addtime. * "validtime" is passed by sadb_lifetime.sadb_lifetime_usetime. */ time_t created; /* time created the policy */ time_t lastused; /* updated every when kernel sends a packet */ long lifetime; /* duration of the lifetime of this policy */ long validtime; /* duration this policy is valid without use */ }; -/* Request for IPsec */ -struct ipsecrequest { - struct ipsecrequest *next; - /* pointer to next structure */ - /* If NULL, it means the end of chain. */ - struct secasindex saidx;/* hint for search proper SA */ - /* if __ss_len == 0 then no address specified.*/ - u_int level; /* IPsec level defined below. */ - - struct secasvar *sav; /* place holder of SA for use */ - struct secpolicy *sp; /* back pointer to SP */ - struct rwlock lock; /* to interlock updates */ -}; - /* - * Need recursion for when crypto callbacks happen directly, - * as in the case of software crypto. Need to look at how - * hard it is to remove this... + * PCB security policies. + * Application can setup private security policies for socket. + * Such policies can have IPSEC, BYPASS and ENTRUST type. + * By default, policies are set to NULL. This means that they have ENTRUST type. + * When application sets BYPASS or IPSEC type policy, the flags field + * is also updated. When flags is not set, the system could store + * used security policy into the sp_in/sp_out pointer to speed up further + * lookups. */ -#define IPSECREQUEST_LOCK_INIT(_isr) \ - rw_init_flags(&(_isr)->lock, "ipsec request", RW_RECURSE) -#define IPSECREQUEST_LOCK(_isr) rw_rlock(&(_isr)->lock) -#define IPSECREQUEST_UNLOCK(_isr) rw_runlock(&(_isr)->lock) -#define IPSECREQUEST_WLOCK(_isr) rw_wlock(&(_isr)->lock) -#define IPSECREQUEST_WUNLOCK(_isr) rw_wunlock(&(_isr)->lock) -#define IPSECREQUEST_UPGRADE(_isr) rw_try_upgrade(&(_isr)->lock) -#define IPSECREQUEST_DOWNGRADE(_isr) rw_downgrade(&(_isr)->lock) -#define IPSECREQUEST_LOCK_DESTROY(_isr) rw_destroy(&(_isr)->lock) -#define IPSECREQUEST_LOCK_ASSERT(_isr) rw_assert(&(_isr)->lock, RA_LOCKED) - -/* security policy in PCB */ struct inpcbpolicy { - struct secpolicy *sp_in; - struct secpolicy *sp_out; - int priv; /* privileged socket ? */ + struct secpolicy *sp_in; + struct secpolicy *sp_out; + + uint32_t genid; + uint16_t flags; +#define INP_INBOUND_POLICY 0x0001 +#define INP_OUTBOUND_POLICY 0x0002 + uint16_t hdrsz; }; /* SP acquiring list table. */ struct secspacq { LIST_ENTRY(secspacq) chain; struct secpolicyindex spidx; time_t created; /* for lifetime */ int count; /* for lifetime */ /* XXX: here is mbuf place holder to be sent ? */ }; #endif /* _KERNEL */ +/* buffer size for formatted output of ipsec address */ +#define IPSEC_ADDRSTRLEN (INET6_ADDRSTRLEN + 11) + /* according to IANA assignment, port 0x0000 and proto 0xff are reserved. */ #define IPSEC_PORT_ANY 0 #define IPSEC_ULPROTO_ANY 255 #define IPSEC_PROTO_ANY 255 /* mode of security protocol */ /* NOTE: DON'T use IPSEC_MODE_ANY at SPD. It's only use in SAD */ #define IPSEC_MODE_ANY 0 /* i.e. wildcard. */ #define IPSEC_MODE_TRANSPORT 1 #define IPSEC_MODE_TUNNEL 2 #define IPSEC_MODE_TCPMD5 3 /* TCP MD5 mode */ /* * Direction of security policy. * NOTE: Since INVALID is used just as flag. * The other are used for loop counter too. */ #define IPSEC_DIR_ANY 0 #define IPSEC_DIR_INBOUND 1 #define IPSEC_DIR_OUTBOUND 2 #define IPSEC_DIR_MAX 3 #define IPSEC_DIR_INVALID 4 /* Policy level */ /* * IPSEC, ENTRUST and BYPASS are allowed for setsockopt() in PCB, * DISCARD, IPSEC and NONE are allowed for setkey() in SPD. * DISCARD and NONE are allowed for system default. */ #define IPSEC_POLICY_DISCARD 0 /* discarding packet */ #define IPSEC_POLICY_NONE 1 /* through IPsec engine */ #define IPSEC_POLICY_IPSEC 2 /* do IPsec */ #define IPSEC_POLICY_ENTRUST 3 /* consulting SPD if present. */ #define IPSEC_POLICY_BYPASS 4 /* only for privileged socket. */ /* Security protocol level */ #define IPSEC_LEVEL_DEFAULT 0 /* reference to system default */ #define IPSEC_LEVEL_USE 1 /* use SA if present. */ #define IPSEC_LEVEL_REQUIRE 2 /* require SA. */ #define IPSEC_LEVEL_UNIQUE 3 /* unique SA. */ #define IPSEC_MANUAL_REQID_MAX 0x3fff /* * if security policy level == unique, this id * indicate to a relative SA for use, else is * zero. * 1 - 0x3fff are reserved for manual keying. * 0 are reserved for above reason. Others is * for kernel use. * Note that this id doesn't identify SA * by only itself. */ #define IPSEC_REPLAYWSIZE 32 /* statistics for ipsec processing */ struct ipsecstat { uint64_t ips_in_polvio; /* input: sec policy violation */ uint64_t ips_in_nomem; /* input: no memory available */ uint64_t ips_in_inval; /* input: generic error */ uint64_t ips_out_polvio; /* output: sec policy violation */ uint64_t ips_out_nosa; /* output: SA unavailable */ uint64_t ips_out_nomem; /* output: no memory available */ uint64_t ips_out_noroute; /* output: no route available */ uint64_t ips_out_inval; /* output: generic error */ uint64_t ips_out_bundlesa; /* output: bundled SA processed */ uint64_t ips_mbcoalesced; /* mbufs coalesced during clone */ uint64_t ips_clcoalesced; /* clusters coalesced during clone */ uint64_t ips_clcopied; /* clusters copied during clone */ uint64_t ips_mbinserted; /* mbufs inserted during makespace */ /* * Temporary statistics for performance analysis. */ /* See where ESP/AH/IPCOMP header land in mbuf on input */ uint64_t ips_input_front; uint64_t ips_input_middle; uint64_t ips_input_end; }; /* * Definitions for IPsec & Key sysctl operations. */ #define IPSECCTL_STATS 1 /* stats */ #define IPSECCTL_DEF_POLICY 2 #define IPSECCTL_DEF_ESP_TRANSLEV 3 /* int; ESP transport mode */ #define IPSECCTL_DEF_ESP_NETLEV 4 /* int; ESP tunnel mode */ #define IPSECCTL_DEF_AH_TRANSLEV 5 /* int; AH transport mode */ #define IPSECCTL_DEF_AH_NETLEV 6 /* int; AH tunnel mode */ #if 0 /* obsolete, do not reuse */ #define IPSECCTL_INBOUND_CALL_IKE 7 #endif #define IPSECCTL_AH_CLEARTOS 8 #define IPSECCTL_AH_OFFSETMASK 9 #define IPSECCTL_DFBIT 10 #define IPSECCTL_ECN 11 #define IPSECCTL_DEBUG 12 #define IPSECCTL_ESP_RANDPAD 13 #ifdef _KERNEL #include struct ipsec_ctx_data; #define IPSEC_INIT_CTX(_ctx, _mp, _sav, _af, _enc) do { \ (_ctx)->mp = (_mp); \ (_ctx)->sav = (_sav); \ (_ctx)->af = (_af); \ (_ctx)->enc = (_enc); \ } while(0) int ipsec_run_hhooks(struct ipsec_ctx_data *ctx, int direction); VNET_DECLARE(int, ipsec_debug); #define V_ipsec_debug VNET(ipsec_debug) #ifdef REGRESSION VNET_DECLARE(int, ipsec_replay); VNET_DECLARE(int, ipsec_integrity); #define V_ipsec_replay VNET(ipsec_replay) #define V_ipsec_integrity VNET(ipsec_integrity) #endif VNET_PCPUSTAT_DECLARE(struct ipsecstat, ipsec4stat); VNET_DECLARE(int, ip4_esp_trans_deflev); VNET_DECLARE(int, ip4_esp_net_deflev); VNET_DECLARE(int, ip4_ah_trans_deflev); VNET_DECLARE(int, ip4_ah_net_deflev); VNET_DECLARE(int, ip4_ah_offsetmask); VNET_DECLARE(int, ip4_ipsec_dfbit); VNET_DECLARE(int, ip4_ipsec_ecn); VNET_DECLARE(int, ip4_esp_randpad); VNET_DECLARE(int, crypto_support); +VNET_DECLARE(int, natt_cksum_policy); #define IPSECSTAT_INC(name) \ VNET_PCPUSTAT_ADD(struct ipsecstat, ipsec4stat, name, 1) #define V_ip4_esp_trans_deflev VNET(ip4_esp_trans_deflev) #define V_ip4_esp_net_deflev VNET(ip4_esp_net_deflev) #define V_ip4_ah_trans_deflev VNET(ip4_ah_trans_deflev) #define V_ip4_ah_net_deflev VNET(ip4_ah_net_deflev) #define V_ip4_ah_offsetmask VNET(ip4_ah_offsetmask) #define V_ip4_ipsec_dfbit VNET(ip4_ipsec_dfbit) #define V_ip4_ipsec_ecn VNET(ip4_ipsec_ecn) #define V_ip4_esp_randpad VNET(ip4_esp_randpad) #define V_crypto_support VNET(crypto_support) +#define V_natt_cksum_policy VNET(natt_cksum_policy) #define ipseclog(x) do { if (V_ipsec_debug) log x; } while (0) /* for openbsd compatibility */ #define DPRINTF(x) do { if (V_ipsec_debug) printf x; } while (0) -extern struct ipsecrequest *ipsec_newisr(void); -extern void ipsec_delisr(struct ipsecrequest *); - -struct tdb_ident; -extern struct secpolicy *ipsec_getpolicy(struct tdb_ident*, u_int); struct inpcb; -extern struct secpolicy *ipsec4_checkpolicy(const struct mbuf *, u_int, - int *, struct inpcb *); -extern struct secpolicy * ipsec_getpolicybyaddr(const struct mbuf *, u_int, - int *); - -struct inpcb; -extern int ipsec_init_policy(struct socket *so, struct inpcbpolicy **); -extern int ipsec_copy_policy(struct inpcbpolicy *, struct inpcbpolicy *); -extern u_int ipsec_get_reqlevel(struct ipsecrequest *); - -extern int ipsec_set_policy(struct inpcb *inp, int optname, - caddr_t request, size_t len, struct ucred *cred); -extern int ipsec_get_policy(struct inpcb *inpcb, caddr_t request, - size_t len, struct mbuf **mp); -extern int ipsec_delete_pcbpolicy(struct inpcb *); -extern int ipsec4_in_reject(const struct mbuf *, struct inpcb *); - -struct secas; -struct tcpcb; -extern int ipsec_chkreplay(u_int32_t, struct secasvar *); -extern int ipsec_updatereplay(u_int32_t, struct secasvar *); - -extern size_t ipsec_hdrsiz(const struct mbuf *, u_int, struct inpcb *); -extern size_t ipsec_hdrsiz_tcp(struct tcpcb *); - +struct m_tag; +struct secasvar; +struct sockopt; +struct tcphdr; union sockaddr_union; -extern char *ipsec_address(union sockaddr_union *, char *, socklen_t); -extern char *ipsec_logsastr(struct secasvar *, char *, size_t); -extern void ipsec_dumpmbuf(const struct mbuf *); +int ipsec_if_input(struct mbuf *, struct secasvar *, uint32_t); -struct m_tag; -extern int ah4_input(struct mbuf **mp, int *offp, int proto); -extern void ah4_ctlinput(int cmd, struct sockaddr *sa, void *); -extern int esp4_input(struct mbuf **mp, int *offp, int proto); -extern void esp4_ctlinput(int cmd, struct sockaddr *sa, void *); -extern int ipcomp4_input(struct mbuf **mp, int *offp, int proto); -extern int ipsec_common_input(struct mbuf *m, int, int, int, int); -extern int ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, - int skip, int protoff); -extern int ipsec4_process_packet(struct mbuf *, struct ipsecrequest *); -extern int ipsec_process_done(struct mbuf *, struct ipsecrequest *); +struct ipsecrequest *ipsec_newisr(void); +void ipsec_delisr(struct ipsecrequest *); +struct secpolicy *ipsec4_checkpolicy(const struct mbuf *, struct inpcb *, + int *); + +u_int ipsec_get_reqlevel(struct secpolicy *, u_int); + +void udp_ipsec_adjust_cksum(struct mbuf *, struct secasvar *, int, int); +int udp_ipsec_output(struct mbuf *, struct secasvar *); +int udp_ipsec_input(struct mbuf *, int, int); +int udp_ipsec_pcbctl(struct inpcb *, struct sockopt *); + +int ipsec_chkreplay(uint32_t, struct secasvar *); +int ipsec_updatereplay(uint32_t, struct secasvar *); +int ipsec_updateid(struct secasvar *, uint64_t *, uint64_t *); +int ipsec_initialized(void); + +void ipsec_setspidx_inpcb(struct inpcb *, struct secpolicyindex *, u_int); + +void ipsec4_setsockaddrs(const struct mbuf *, union sockaddr_union *, + union sockaddr_union *); +int ipsec4_in_reject(const struct mbuf *, struct inpcb *); +int ipsec4_input(struct mbuf *, int, int); +int ipsec4_forward(struct mbuf *); +int ipsec4_pcbctl(struct inpcb *, struct sockopt *); +int ipsec4_output(struct mbuf *, struct inpcb *); +int ipsec4_capability(struct mbuf *, u_int); +int ipsec4_common_input_cb(struct mbuf *, struct secasvar *, int, int); +int ipsec4_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); +int ipsec_process_done(struct mbuf *, struct secpolicy *, struct secasvar *, + u_int); extern void m_checkalignment(const char* where, struct mbuf *m0, int off, int len); extern struct mbuf *m_makespace(struct mbuf *m0, int skip, int hlen, int *off); extern caddr_t m_pad(struct mbuf *m, int n); extern int m_striphdr(struct mbuf *m, int skip, int hlen); #endif /* _KERNEL */ #ifndef _KERNEL extern caddr_t ipsec_set_policy(char *, int); extern int ipsec_get_policylen(caddr_t); extern char *ipsec_dump_policy(caddr_t, char *); extern const char *ipsec_strerror(void); #endif /* ! KERNEL */ #endif /* _NETIPSEC_IPSEC_H_ */ diff --git a/sys/netipsec/ipsec6.h b/sys/netipsec/ipsec6.h index 02cf02552693..a5fae4d13378 100644 --- a/sys/netipsec/ipsec6.h +++ b/sys/netipsec/ipsec6.h @@ -1,72 +1,80 @@ /* $FreeBSD$ */ /* $KAME: ipsec.h,v 1.44 2001/03/23 08:08:47 itojun Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * IPsec controller part. */ #ifndef _NETIPSEC_IPSEC6_H_ #define _NETIPSEC_IPSEC6_H_ #include #include #ifdef _KERNEL #include VNET_PCPUSTAT_DECLARE(struct ipsecstat, ipsec6stat); VNET_DECLARE(int, ip6_esp_trans_deflev); VNET_DECLARE(int, ip6_esp_net_deflev); VNET_DECLARE(int, ip6_ah_trans_deflev); VNET_DECLARE(int, ip6_ah_net_deflev); VNET_DECLARE(int, ip6_ipsec_ecn); #define IPSEC6STAT_INC(name) \ VNET_PCPUSTAT_ADD(struct ipsecstat, ipsec6stat, name, 1) #define V_ip6_esp_trans_deflev VNET(ip6_esp_trans_deflev) #define V_ip6_esp_net_deflev VNET(ip6_esp_net_deflev) #define V_ip6_ah_trans_deflev VNET(ip6_ah_trans_deflev) #define V_ip6_ah_net_deflev VNET(ip6_ah_net_deflev) #define V_ip6_ipsec_ecn VNET(ip6_ipsec_ecn) struct inpcb; -extern int ipsec6_in_reject(const struct mbuf *, struct inpcb *); +struct secpolicy *ipsec6_checkpolicy(const struct mbuf *, + struct inpcb *, int *); -struct m_tag; -extern int ipsec6_common_input(struct mbuf **mp, int *offp, int proto); -extern int ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, - int skip, int protoff); -extern void esp6_ctlinput(int, struct sockaddr *, void *); -extern int ipsec6_process_packet(struct mbuf *, struct ipsecrequest *); +void ipsec6_setsockaddrs(const struct mbuf *, union sockaddr_union *, + union sockaddr_union *); +int ipsec6_input(struct mbuf *, int, int); +int ipsec6_in_reject(const struct mbuf *, struct inpcb *); +int ipsec6_forward(struct mbuf *); +int ipsec6_pcbctl(struct inpcb *, struct sockopt *); +int ipsec6_output(struct mbuf *, struct inpcb *); +int ipsec6_capability(struct mbuf *, u_int); +int ipsec6_common_input_cb(struct mbuf *, struct secasvar *, int, int); +int ipsec6_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); + +int ip6_ipsec_filtertunnel(struct mbuf *); +int ip6_ipsec_pcbctl(struct inpcb *, struct sockopt *); #endif /*_KERNEL*/ #endif /*_NETIPSEC_IPSEC6_H_*/ diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c index 0ec07d8424ac..c4acafecc243 100644 --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -1,908 +1,680 @@ -/* $FreeBSD$ */ /* $OpenBSD: ipsec_input.c,v 1.63 2003/02/20 18:35:43 deraadt Exp $ */ /*- * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and * Niels Provos (provos@physnet.uni-hamburg.de). * * This code was written by John Ioannidis for BSD/OS in Athens, Greece, * in November 1995. * * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, * by Angelos D. Keromytis. * * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis * and Niels Provos. * * Additional features in 1999 by Angelos D. Keromytis. * * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, * Angelos D. Keromytis and Niels Provos. * Copyright (c) 2001, Angelos D. Keromytis. + * Copyright (c) 2016 Andrey V. Elsukov * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all copies of any software which is or includes a copy or * modification of this software. * You may use this code under the GNU public license if you so wish. Please * contribute changes back to the authors under this freer than GPL license * so that we may further the use of strong encryption without limitations to * all. * * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR * PURPOSE. */ /* * IPsec input processing. */ +#include +__FBSDID("$FreeBSD$"); + #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #include #include #include #include #include +#include #include #include #include #include #define IPSEC_ISTAT(proto, name) do { \ if ((proto) == IPPROTO_ESP) \ ESPSTAT_INC(esps_##name); \ else if ((proto) == IPPROTO_AH) \ AHSTAT_INC(ahs_##name); \ else \ IPCOMPSTAT_INC(ipcomps_##name); \ } while (0) -#ifdef INET -static void ipsec4_common_ctlinput(int, struct sockaddr *, void *, int); -#endif - /* * ipsec_common_input gets called when an IPsec-protected packet * is received by IPv4 or IPv6. Its job is to find the right SA * and call the appropriate transform. The transform callback * takes care of further processing (like ingress filtering). */ -int +static int ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) { - char buf[INET6_ADDRSTRLEN]; + char buf[IPSEC_ADDRSTRLEN]; union sockaddr_union dst_address; struct secasvar *sav; - u_int32_t spi; + uint32_t spi; int error; -#ifdef INET -#ifdef IPSEC_NAT_T - struct m_tag *tag; -#endif -#endif IPSEC_ISTAT(sproto, input); IPSEC_ASSERT(m != NULL, ("null packet")); IPSEC_ASSERT(sproto == IPPROTO_ESP || sproto == IPPROTO_AH || sproto == IPPROTO_IPCOMP, ("unexpected security protocol %u", sproto)); if ((sproto == IPPROTO_ESP && !V_esp_enable) || (sproto == IPPROTO_AH && !V_ah_enable) || (sproto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { m_freem(m); IPSEC_ISTAT(sproto, pdrops); return EOPNOTSUPP; } if (m->m_pkthdr.len - skip < 2 * sizeof (u_int32_t)) { m_freem(m); IPSEC_ISTAT(sproto, hdrops); DPRINTF(("%s: packet too small\n", __func__)); return EINVAL; } /* Retrieve the SPI from the relevant IPsec header */ if (sproto == IPPROTO_ESP) m_copydata(m, skip, sizeof(u_int32_t), (caddr_t) &spi); else if (sproto == IPPROTO_AH) m_copydata(m, skip + sizeof(u_int32_t), sizeof(u_int32_t), (caddr_t) &spi); else if (sproto == IPPROTO_IPCOMP) { u_int16_t cpi; m_copydata(m, skip + sizeof(u_int16_t), sizeof(u_int16_t), (caddr_t) &cpi); spi = ntohl(htons(cpi)); } /* * Find the SA and (indirectly) call the appropriate * kernel crypto routine. The resulting mbuf chain is a valid * IP packet ready to go through input processing. */ bzero(&dst_address, sizeof (dst_address)); dst_address.sa.sa_family = af; switch (af) { #ifdef INET case AF_INET: dst_address.sin.sin_len = sizeof(struct sockaddr_in); m_copydata(m, offsetof(struct ip, ip_dst), sizeof(struct in_addr), (caddr_t) &dst_address.sin.sin_addr); -#ifdef IPSEC_NAT_T - /* Find the source port for NAT-T; see udp*_espdecap. */ - tag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL); - if (tag != NULL) - dst_address.sin.sin_port = ((u_int16_t *)(tag + 1))[1]; -#endif /* IPSEC_NAT_T */ break; #endif /* INET */ #ifdef INET6 case AF_INET6: dst_address.sin6.sin6_len = sizeof(struct sockaddr_in6); m_copydata(m, offsetof(struct ip6_hdr, ip6_dst), sizeof(struct in6_addr), (caddr_t) &dst_address.sin6.sin6_addr); /* We keep addresses in SADB without embedded scope id */ if (IN6_IS_SCOPE_LINKLOCAL(&dst_address.sin6.sin6_addr)) { /* XXX: sa6_recoverscope() */ dst_address.sin6.sin6_scope_id = ntohs(dst_address.sin6.sin6_addr.s6_addr16[1]); dst_address.sin6.sin6_addr.s6_addr16[1] = 0; } break; #endif /* INET6 */ default: DPRINTF(("%s: unsupported protocol family %u\n", __func__, af)); m_freem(m); IPSEC_ISTAT(sproto, nopf); return EPFNOSUPPORT; } /* NB: only pass dst since key_allocsa follows RFC2401 */ - sav = KEY_ALLOCSA(&dst_address, sproto, spi); + sav = key_allocsa(&dst_address, sproto, spi); if (sav == NULL) { DPRINTF(("%s: no key association found for SA %s/%08lx/%u\n", __func__, ipsec_address(&dst_address, buf, sizeof(buf)), (u_long) ntohl(spi), sproto)); IPSEC_ISTAT(sproto, notdb); m_freem(m); return ENOENT; } if (sav->tdb_xform == NULL) { DPRINTF(("%s: attempted to use uninitialized SA %s/%08lx/%u\n", __func__, ipsec_address(&dst_address, buf, sizeof(buf)), (u_long) ntohl(spi), sproto)); IPSEC_ISTAT(sproto, noxform); - KEY_FREESAV(&sav); + key_freesav(&sav); m_freem(m); return ENXIO; } /* * Call appropriate transform and return -- callback takes care of * everything else. */ error = (*sav->tdb_xform->xf_input)(m, sav, skip, protoff); - KEY_FREESAV(&sav); - return error; + if (error != 0) + key_freesav(&sav); + return (error); } #ifdef INET -int -ah4_input(struct mbuf **mp, int *offp, int proto) -{ - struct mbuf *m; - int off; - - m = *mp; - off = *offp; - *mp = NULL; - - ipsec_common_input(m, off, offsetof(struct ip, ip_p), - AF_INET, IPPROTO_AH); - return (IPPROTO_DONE); -} -void -ah4_ctlinput(int cmd, struct sockaddr *sa, void *v) -{ - if (sa->sa_family == AF_INET && - sa->sa_len == sizeof(struct sockaddr_in)) - ipsec4_common_ctlinput(cmd, sa, v, IPPROTO_AH); -} - -int -esp4_input(struct mbuf **mp, int *offp, int proto) -{ - struct mbuf *m; - int off; - - m = *mp; - off = *offp; - mp = NULL; - - ipsec_common_input(m, off, offsetof(struct ip, ip_p), - AF_INET, IPPROTO_ESP); - return (IPPROTO_DONE); -} - -void -esp4_ctlinput(int cmd, struct sockaddr *sa, void *v) -{ - if (sa->sa_family == AF_INET && - sa->sa_len == sizeof(struct sockaddr_in)) - ipsec4_common_ctlinput(cmd, sa, v, IPPROTO_ESP); -} +extern struct protosw inetsw[]; +/* + * IPSEC_INPUT() method implementation for IPv4. + * 0 - Permitted by inbound security policy for further processing. + * EACCES - Forbidden by inbound security policy. + * EINPROGRESS - consumed by IPsec. + */ int -ipcomp4_input(struct mbuf **mp, int *offp, int proto) +ipsec4_input(struct mbuf *m, int offset, int proto) { - struct mbuf *m; - int off; - m = *mp; - off = *offp; - mp = NULL; - - ipsec_common_input(m, off, offsetof(struct ip, ip_p), - AF_INET, IPPROTO_IPCOMP); - return (IPPROTO_DONE); + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_IPCOMP: + /* Do inbound IPsec processing for AH/ESP/IPCOMP */ + ipsec_common_input(m, offset, + offsetof(struct ip, ip_p), AF_INET, proto); + return (EINPROGRESS); /* mbuf consumed by IPsec */ + default: + /* + * Protocols with further headers get their IPsec treatment + * within the protocol specific processing. + */ + if ((inetsw[ip_protox[proto]].pr_flags & PR_LASTHDR) == 0) + return (0); + /* FALLTHROUGH */ + }; + /* + * Enforce IPsec policy checking if we are seeing last header. + */ + if (ipsec4_in_reject(m, NULL) != 0) { + /* Forbidden by inbound security policy */ + m_freem(m); + return (EACCES); + } + return (0); } /* * IPsec input callback for INET protocols. * This routine is called as the transform callback. * Takes care of filtering and other sanity checks on * the processed packet. */ int ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { - char buf[INET6_ADDRSTRLEN]; + char buf[IPSEC_ADDRSTRLEN]; struct ipsec_ctx_data ctx; - int prot, af, sproto, isr_prot; - struct ip *ip; - struct m_tag *mtag; - struct tdb_ident *tdbi; + struct xform_history *xh; struct secasindex *saidx; - int error; -#ifdef INET6 -#ifdef notyet - char ip6buf[INET6_ADDRSTRLEN]; -#endif -#endif + struct m_tag *mtag; + struct ip *ip; + int error, prot, af, sproto, isr_prot; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); saidx = &sav->sah->saidx; af = saidx->dst.sa.sa_family; IPSEC_ASSERT(af == AF_INET, ("unexpected af %u", af)); sproto = saidx->proto; IPSEC_ASSERT(sproto == IPPROTO_ESP || sproto == IPPROTO_AH || sproto == IPPROTO_IPCOMP, ("unexpected security protocol %u", sproto)); if (skip != 0) { /* * Fix IPv4 header - * XXXGL: do we need this entire block? */ if (m->m_len < skip && (m = m_pullup(m, skip)) == NULL) { DPRINTF(("%s: processing failed for SA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); IPSEC_ISTAT(sproto, hdrops); error = ENOBUFS; goto bad; } ip = mtod(m, struct ip *); ip->ip_len = htons(m->m_pkthdr.len); ip->ip_sum = 0; ip->ip_sum = in_cksum(m, ip->ip_hl << 2); } else { ip = mtod(m, struct ip *); } prot = ip->ip_p; + /* + * Check that we have NAT-T enabled and apply transport mode + * decapsulation NAT procedure (RFC3948). + * Do this before invoking into the PFIL. + */ + if (sav->natt != NULL && + (prot == IPPROTO_UDP || prot == IPPROTO_TCP)) + udp_ipsec_adjust_cksum(m, sav, prot, skip); IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; - ip = mtod(m, struct ip *); + ip = mtod(m, struct ip *); /* update pointer */ /* IP-in-IP encapsulation */ if (prot == IPPROTO_IPIP && saidx->mode != IPSEC_MODE_TRANSPORT) { - if (m->m_pkthdr.len - skip < sizeof(struct ip)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; goto bad; } /* enc0: strip outer IPv4 header */ m_striphdr(m, 0, ip->ip_hl << 2); - -#ifdef notyet - /* XXX PROXY address isn't recorded in SAH */ - /* - * Check that the inner source address is the same as - * the proxy address, if available. - */ - if ((saidx->proxy.sa.sa_family == AF_INET && - saidx->proxy.sin.sin_addr.s_addr != - INADDR_ANY && - ipn.ip_src.s_addr != - saidx->proxy.sin.sin_addr.s_addr) || - (saidx->proxy.sa.sa_family != AF_INET && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - inet_ntoa4(ipn.ip_src), - ipsp_address(saidx->proxy), - ipsp_address(saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } -#endif /* notyet */ } #ifdef INET6 /* IPv6-in-IP encapsulation. */ else if (prot == IPPROTO_IPV6 && saidx->mode != IPSEC_MODE_TRANSPORT) { - if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; goto bad; } /* enc0: strip IPv4 header, keep IPv6 header only */ m_striphdr(m, 0, ip->ip_hl << 2); -#ifdef notyet - /* - * Check that the inner source address is the same as - * the proxy address, if available. - */ - if ((saidx->proxy.sa.sa_family == AF_INET6 && - !IN6_IS_ADDR_UNSPECIFIED(&saidx->proxy.sin6.sin6_addr) && - !IN6_ARE_ADDR_EQUAL(&ip6n.ip6_src, - &saidx->proxy.sin6.sin6_addr)) || - (saidx->proxy.sa.sa_family != AF_INET6 && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - ip6_sprintf(ip6buf, &ip6n.ip6_src), - ipsec_address(&saidx->proxy), - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } -#endif /* notyet */ } #endif /* INET6 */ else if (prot != IPPROTO_IPV6 && saidx->mode == IPSEC_MODE_ANY) { /* * When mode is wildcard, inner protocol is IPv6 and * we have no INET6 support - drop this packet a bit later. - * In other cases we assume transport mode and outer - * header was already stripped in xform_xxx_cb. + * In other cases we assume transport mode. Set prot to + * correctly choose netisr. */ prot = IPPROTO_IPIP; } /* * Record what we've done to the packet (under what SA it was * processed). */ if (sproto != IPPROTO_IPCOMP) { mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE, - sizeof(struct tdb_ident), M_NOWAIT); + sizeof(struct xform_history), M_NOWAIT); if (mtag == NULL) { DPRINTF(("%s: failed to get tag\n", __func__)); IPSEC_ISTAT(sproto, hdrops); error = ENOMEM; goto bad; } - tdbi = (struct tdb_ident *)(mtag + 1); - bcopy(&saidx->dst, &tdbi->dst, saidx->dst.sa.sa_len); - tdbi->proto = sproto; - tdbi->spi = sav->spi; - /* Cache those two for enc(4) in xform_ipip. */ - tdbi->alg_auth = sav->alg_auth; - tdbi->alg_enc = sav->alg_enc; - + xh = (struct xform_history *)(mtag + 1); + bcopy(&saidx->dst, &xh->dst, saidx->dst.sa.sa_len); + xh->spi = sav->spi; + xh->proto = sproto; + xh->mode = saidx->mode; m_tag_prepend(m, mtag); } key_sa_recordxfer(sav, m); /* record data transfer */ /* * In transport mode requeue decrypted mbuf back to IPv4 protocol * handler. This is necessary to correctly expose rcvif. */ if (saidx->mode == IPSEC_MODE_TRANSPORT) prot = IPPROTO_IPIP; /* * Re-dispatch via software interrupt. */ switch (prot) { case IPPROTO_IPIP: isr_prot = NETISR_IP; af = AF_INET; break; #ifdef INET6 case IPPROTO_IPV6: isr_prot = NETISR_IPV6; af = AF_INET6; break; #endif default: DPRINTF(("%s: cannot handle inner ip proto %d\n", __func__, prot)); IPSEC_ISTAT(sproto, nopf); error = EPFNOSUPPORT; goto bad; } IPSEC_INIT_CTX(&ctx, &m, sav, af, IPSEC_ENC_AFTER); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; - error = netisr_queue_src(isr_prot, (uintptr_t)sav->spi, m); - if (error) { - IPSEC_ISTAT(sproto, qfull); - DPRINTF(("%s: queue full; proto %u packet dropped\n", - __func__, sproto)); - return error; + + /* Handle virtual tunneling interfaces */ + if (saidx->mode == IPSEC_MODE_TUNNEL) + error = ipsec_if_input(m, sav, af); + if (error == 0) { + error = netisr_queue_src(isr_prot, (uintptr_t)sav->spi, m); + if (error) { + IPSEC_ISTAT(sproto, qfull); + DPRINTF(("%s: queue full; proto %u packet dropped\n", + __func__, sproto)); + } } - return 0; + key_freesav(&sav); + return (error); bad: - m_freem(m); - return error; -} - -void -ipsec4_common_ctlinput(int cmd, struct sockaddr *sa, void *v, int proto) -{ - /* XXX nothing just yet */ + key_freesav(&sav); + if (m != NULL) + m_freem(m); + return (error); } #endif /* INET */ #ifdef INET6 -/* IPv6 AH wrapper. */ +/* + * IPSEC_INPUT() method implementation for IPv6. + * 0 - Permitted by inbound security policy for further processing. + * EACCES - Forbidden by inbound security policy. + * EINPROGRESS - consumed by IPsec. + */ int -ipsec6_common_input(struct mbuf **mp, int *offp, int proto) +ipsec6_input(struct mbuf *m, int offset, int proto) { - int l = 0; - int protoff; - struct ip6_ext ip6e; - - if (*offp < sizeof(struct ip6_hdr)) { - DPRINTF(("%s: bad offset %u\n", __func__, *offp)); - return IPPROTO_DONE; - } else if (*offp == sizeof(struct ip6_hdr)) { - protoff = offsetof(struct ip6_hdr, ip6_nxt); - } else { - /* Chase down the header chain... */ - protoff = sizeof(struct ip6_hdr); - - do { - protoff += l; - m_copydata(*mp, protoff, sizeof(ip6e), - (caddr_t) &ip6e); - - if (ip6e.ip6e_nxt == IPPROTO_AH) - l = (ip6e.ip6e_len + 2) << 2; - else - l = (ip6e.ip6e_len + 1) << 3; - IPSEC_ASSERT(l > 0, ("l went zero or negative")); - } while (protoff + l < *offp); - - /* Malformed packet check */ - if (protoff + l != *offp) { - DPRINTF(("%s: bad packet header chain, protoff %u, " - "l %u, off %u\n", __func__, protoff, l, *offp)); - IPSEC_ISTAT(proto, hdrops); - m_freem(*mp); - *mp = NULL; - return IPPROTO_DONE; - } - protoff += offsetof(struct ip6_ext, ip6e_nxt); + + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_IPCOMP: + /* Do inbound IPsec processing for AH/ESP/IPCOMP */ + ipsec_common_input(m, offset, + offsetof(struct ip6_hdr, ip6_nxt), AF_INET6, proto); + return (EINPROGRESS); /* mbuf consumed by IPsec */ + default: + /* + * Protocols with further headers get their IPsec treatment + * within the protocol specific processing. + */ + if ((inet6sw[ip6_protox[proto]].pr_flags & PR_LASTHDR) == 0) + return (0); + /* FALLTHROUGH */ + }; + /* + * Enforce IPsec policy checking if we are seeing last header. + */ + if (ipsec6_in_reject(m, NULL) != 0) { + /* Forbidden by inbound security policy */ + m_freem(m); + return (EACCES); } - (void) ipsec_common_input(*mp, *offp, protoff, AF_INET6, proto); - return IPPROTO_DONE; + return (0); } /* * IPsec input callback, called by the transform callback. Takes care of * filtering and other sanity checks on the processed packet. */ int ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { - char buf[INET6_ADDRSTRLEN]; + char buf[IPSEC_ADDRSTRLEN]; struct ipsec_ctx_data ctx; - int prot, af, sproto; + struct xform_history *xh; + struct secasindex *saidx; struct ip6_hdr *ip6; struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secasindex *saidx; + int prot, af, sproto; int nxt, isr_prot; - u_int8_t nxt8; int error, nest; -#ifdef notyet - char ip6buf[INET6_ADDRSTRLEN]; -#endif + uint8_t nxt8; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); saidx = &sav->sah->saidx; af = saidx->dst.sa.sa_family; IPSEC_ASSERT(af == AF_INET6, ("unexpected af %u", af)); sproto = saidx->proto; IPSEC_ASSERT(sproto == IPPROTO_ESP || sproto == IPPROTO_AH || sproto == IPPROTO_IPCOMP, ("unexpected security protocol %u", sproto)); /* Fix IPv6 header */ if (m->m_len < sizeof(struct ip6_hdr) && (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { DPRINTF(("%s: processing failed for SA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); IPSEC_ISTAT(sproto, hdrops); error = EACCES; goto bad; } - ip6 = mtod(m, struct ip6_hdr *); - ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); - IPSEC_INIT_CTX(&ctx, &m, sav, af, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); + /* Save protocol */ m_copydata(m, protoff, 1, &nxt8); prot = nxt8; /* IPv6-in-IP encapsulation */ if (prot == IPPROTO_IPV6 && saidx->mode != IPSEC_MODE_TRANSPORT) { if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; goto bad; } /* ip6n will now contain the inner IPv6 header. */ m_striphdr(m, 0, skip); skip = 0; -#ifdef notyet - /* - * Check that the inner source address is the same as - * the proxy address, if available. - */ - if ((saidx->proxy.sa.sa_family == AF_INET6 && - !IN6_IS_ADDR_UNSPECIFIED(&saidx->proxy.sin6.sin6_addr) && - !IN6_ARE_ADDR_EQUAL(&ip6n.ip6_src, - &saidx->proxy.sin6.sin6_addr)) || - (saidx->proxy.sa.sa_family != AF_INET6 && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - ip6_sprintf(ip6buf, &ip6n.ip6_src), - ipsec_address(&saidx->proxy), - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } -#endif /* notyet */ } #ifdef INET /* IP-in-IP encapsulation */ else if (prot == IPPROTO_IPIP && saidx->mode != IPSEC_MODE_TRANSPORT) { if (m->m_pkthdr.len - skip < sizeof(struct ip)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; goto bad; } /* ipn will now contain the inner IPv4 header */ - m_striphdr(m, 0, skip); + m_striphdr(m, 0, skip); skip = 0; -#ifdef notyet - /* - * Check that the inner source address is the same as - * the proxy address, if available. - */ - if ((saidx->proxy.sa.sa_family == AF_INET && - saidx->proxy.sin.sin_addr.s_addr != INADDR_ANY && - ipn.ip_src.s_addr != saidx->proxy.sin.sin_addr.s_addr) || - (saidx->proxy.sa.sa_family != AF_INET && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - inet_ntoa4(ipn.ip_src), - ipsec_address(&saidx->proxy), - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } -#endif /* notyet */ } #endif /* INET */ else { prot = IPPROTO_IPV6; /* for correct BPF processing */ } /* * Record what we've done to the packet (under what SA it was * processed). */ if (sproto != IPPROTO_IPCOMP) { mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE, - sizeof(struct tdb_ident), M_NOWAIT); + sizeof(struct xform_history), M_NOWAIT); if (mtag == NULL) { DPRINTF(("%s: failed to get tag\n", __func__)); IPSEC_ISTAT(sproto, hdrops); error = ENOMEM; goto bad; } - tdbi = (struct tdb_ident *)(mtag + 1); - bcopy(&saidx->dst, &tdbi->dst, sizeof(union sockaddr_union)); - tdbi->proto = sproto; - tdbi->spi = sav->spi; - /* Cache those two for enc(4) in xform_ipip. */ - tdbi->alg_auth = sav->alg_auth; - tdbi->alg_enc = sav->alg_enc; - + xh = (struct xform_history *)(mtag + 1); + bcopy(&saidx->dst, &xh->dst, saidx->dst.sa.sa_len); + xh->spi = sav->spi; + xh->proto = sproto; + xh->mode = saidx->mode; m_tag_prepend(m, mtag); } key_sa_recordxfer(sav, m); - #ifdef INET if (prot == IPPROTO_IPIP) af = AF_INET; else #endif af = AF_INET6; IPSEC_INIT_CTX(&ctx, &m, sav, af, IPSEC_ENC_AFTER); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; if (skip == 0) { /* * We stripped outer IPv6 header. * Now we should requeue decrypted packet via netisr. */ switch (prot) { #ifdef INET case IPPROTO_IPIP: isr_prot = NETISR_IP; break; #endif case IPPROTO_IPV6: isr_prot = NETISR_IPV6; break; default: DPRINTF(("%s: cannot handle inner ip proto %d\n", __func__, prot)); IPSEC_ISTAT(sproto, nopf); error = EPFNOSUPPORT; goto bad; } - error = netisr_queue_src(isr_prot, (uintptr_t)sav->spi, m); - if (error) { - IPSEC_ISTAT(sproto, qfull); - DPRINTF(("%s: queue full; proto %u packet dropped\n", - __func__, sproto)); + /* Handle virtual tunneling interfaces */ + if (saidx->mode == IPSEC_MODE_TUNNEL) + error = ipsec_if_input(m, sav, af); + if (error == 0) { + error = netisr_queue_src(isr_prot, + (uintptr_t)sav->spi, m); + if (error) { + IPSEC_ISTAT(sproto, qfull); + DPRINTF(("%s: queue full; proto %u packet" + " dropped\n", __func__, sproto)); + } } + key_freesav(&sav); return (error); } /* * See the end of ip6_input for this logic. * IPPROTO_IPV[46] case will be processed just like other ones */ nest = 0; nxt = nxt8; while (nxt != IPPROTO_DONE) { if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) { IP6STAT_INC(ip6s_toomanyhdr); error = EINVAL; goto bad; } /* * Protection against faulty packet - there should be * more sanity checks in header chain processing. */ if (m->m_pkthdr.len < skip) { IP6STAT_INC(ip6s_tooshort); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); error = EINVAL; goto bad; } /* * Enforce IPsec policy checking if we are seeing last header. * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 && ipsec6_in_reject(m, NULL)) { error = EINVAL; goto bad; } nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &skip, nxt); } - return 0; + key_freesav(&sav); + return (0); bad: + key_freesav(&sav); if (m) m_freem(m); - return error; -} - -void -esp6_ctlinput(int cmd, struct sockaddr *sa, void *d) -{ - struct ip6ctlparam *ip6cp = NULL; - struct mbuf *m = NULL; - struct ip6_hdr *ip6; - int off; - - if (sa->sa_family != AF_INET6 || - sa->sa_len != sizeof(struct sockaddr_in6)) - return; - if ((unsigned)cmd >= PRC_NCMDS) - return; - - /* if the parameter is from icmp6, decode it. */ - if (d != NULL) { - ip6cp = (struct ip6ctlparam *)d; - m = ip6cp->ip6c_m; - ip6 = ip6cp->ip6c_ip6; - off = ip6cp->ip6c_off; - } else { - m = NULL; - ip6 = NULL; - off = 0; /* calm gcc */ - } - - if (ip6 != NULL) { - - struct ip6ctlparam ip6cp1; - - /* - * Notify the error to all possible sockets via pfctlinput2. - * Since the upper layer information (such as protocol type, - * source and destination ports) is embedded in the encrypted - * data and might have been cut, we can't directly call - * an upper layer ctlinput function. However, the pcbnotify - * function will consider source and destination addresses - * as well as the flow info value, and may be able to find - * some PCB that should be notified. - * Although pfctlinput2 will call esp6_ctlinput(), there is - * no possibility of an infinite loop of function calls, - * because we don't pass the inner IPv6 header. - */ - bzero(&ip6cp1, sizeof(ip6cp1)); - ip6cp1.ip6c_src = ip6cp->ip6c_src; - pfctlinput2(cmd, sa, (void *)&ip6cp1); - - /* - * Then go to special cases that need ESP header information. - * XXX: We assume that when ip6 is non NULL, - * M and OFF are valid. - */ - - if (cmd == PRC_MSGSIZE) { - struct secasvar *sav; - u_int32_t spi; - int valid; - - /* check header length before using m_copydata */ - if (m->m_pkthdr.len < off + sizeof (struct esp)) - return; - m_copydata(m, off + offsetof(struct esp, esp_spi), - sizeof(u_int32_t), (caddr_t) &spi); - /* - * Check to see if we have a valid SA corresponding to - * the address in the ICMP message payload. - */ - sav = KEY_ALLOCSA((union sockaddr_union *)sa, - IPPROTO_ESP, spi); - valid = (sav != NULL); - if (sav) - KEY_FREESAV(&sav); - - /* XXX Further validation? */ - - /* - * Depending on whether the SA is "valid" and - * routing table size (mtudisc_{hi,lo}wat), we will: - * - recalcurate the new MTU and create the - * corresponding routing entry, or - * - ignore the MTU change notification. - */ - icmp6_mtudisc_update(ip6cp, valid); - } - } else { - /* we normally notify any pcb here */ - } + return (error); } #endif /* INET6 */ diff --git a/sys/netipsec/ipsec_mbuf.c b/sys/netipsec/ipsec_mbuf.c index 7111210d1159..6617d48af016 100644 --- a/sys/netipsec/ipsec_mbuf.c +++ b/sys/netipsec/ipsec_mbuf.c @@ -1,325 +1,337 @@ /*- * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * IPsec-specific mbuf routines. */ -#include "opt_param.h" - #include #include #include #include #include #include #include #include /* * Make space for a new header of length hlen at skip bytes * into the packet. When doing this we allocate new mbufs only * when absolutely necessary. The mbuf where the new header * is to go is returned together with an offset into the mbuf. * If NULL is returned then the mbuf chain may have been modified; * the caller is assumed to always free the chain. */ struct mbuf * m_makespace(struct mbuf *m0, int skip, int hlen, int *off) { struct mbuf *m; unsigned remain; IPSEC_ASSERT(m0 != NULL, ("null mbuf")); IPSEC_ASSERT(hlen < MHLEN, ("hlen too big: %u", hlen)); for (m = m0; m && skip > m->m_len; m = m->m_next) skip -= m->m_len; if (m == NULL) return (NULL); /* * At this point skip is the offset into the mbuf m * where the new header should be placed. Figure out * if there's space to insert the new header. If so, * and copying the remainder makes sense then do so. * Otherwise insert a new mbuf in the chain, splitting * the contents of m as needed. */ remain = m->m_len - skip; /* data to move */ - if (hlen > M_TRAILINGSPACE(m)) { + if (remain > skip && + hlen + max_linkhdr < M_LEADINGSPACE(m)) { + /* + * mbuf has enough free space at the beginning. + * XXX: which operation is the most heavy - copying of + * possible several hundred of bytes or allocation + * of new mbuf? We can remove max_linkhdr check + * here, but it is possible that this will lead + * to allocation of new mbuf in Layer 2 code. + */ + m->m_data -= hlen; + bcopy(mtodo(m, hlen), mtod(m, caddr_t), skip); + m->m_len += hlen; + *off = skip; + } else if (hlen > M_TRAILINGSPACE(m)) { struct mbuf *n0, *n, **np; int todo, len, done, alloc; n0 = NULL; np = &n0; alloc = 0; done = 0; todo = remain; while (todo > 0) { if (todo > MHLEN) { n = m_getcl(M_NOWAIT, m->m_type, 0); len = MCLBYTES; } else { n = m_get(M_NOWAIT, m->m_type); len = MHLEN; } if (n == NULL) { m_freem(n0); return NULL; } *np = n; np = &n->m_next; alloc++; len = min(todo, len); memcpy(n->m_data, mtod(m, char *) + skip + done, len); n->m_len = len; done += len; todo -= len; } if (hlen <= M_TRAILINGSPACE(m) + remain) { m->m_len = skip + hlen; *off = skip; if (n0 != NULL) { *np = m->m_next; m->m_next = n0; } } else { n = m_get(M_NOWAIT, m->m_type); if (n == NULL) { m_freem(n0); return NULL; } alloc++; if ((n->m_next = n0) == NULL) np = &n->m_next; n0 = n; *np = m->m_next; m->m_next = n0; n->m_len = hlen; m->m_len = skip; m = n; /* header is at front ... */ *off = 0; /* ... of new mbuf */ } IPSECSTAT_INC(ips_mbinserted); } else { /* * Copy the remainder to the back of the mbuf * so there's space to write the new header. */ bcopy(mtod(m, caddr_t) + skip, - mtod(m, caddr_t) + skip + hlen, remain); + mtod(m, caddr_t) + skip + hlen, remain); m->m_len += hlen; *off = skip; } m0->m_pkthdr.len += hlen; /* adjust packet length */ return m; } /* * m_pad(m, n) pads with bytes at the end. The packet header * length is updated, and a pointer to the first byte of the padding * (which is guaranteed to be all in one mbuf) is returned. */ caddr_t m_pad(struct mbuf *m, int n) { register struct mbuf *m0, *m1; register int len, pad; caddr_t retval; if (n <= 0) { /* No stupid arguments. */ DPRINTF(("%s: pad length invalid (%d)\n", __func__, n)); m_freem(m); return NULL; } len = m->m_pkthdr.len; pad = n; m0 = m; while (m0->m_len < len) { len -= m0->m_len; m0 = m0->m_next; } if (m0->m_len != len) { DPRINTF(("%s: length mismatch (should be %d instead of %d)\n", __func__, m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len)); m_freem(m); return NULL; } /* Check for zero-length trailing mbufs, and find the last one. */ for (m1 = m0; m1->m_next; m1 = m1->m_next) { if (m1->m_next->m_len != 0) { DPRINTF(("%s: length mismatch (should be %d instead " "of %d)\n", __func__, m->m_pkthdr.len, m->m_pkthdr.len + m1->m_next->m_len)); m_freem(m); return NULL; } m0 = m1->m_next; } if (pad > M_TRAILINGSPACE(m0)) { /* Add an mbuf to the chain. */ MGET(m1, M_NOWAIT, MT_DATA); if (m1 == NULL) { m_freem(m0); DPRINTF(("%s: unable to get extra mbuf\n", __func__)); return NULL; } m0->m_next = m1; m0 = m1; m0->m_len = 0; } retval = m0->m_data + m0->m_len; m0->m_len += pad; m->m_pkthdr.len += pad; return retval; } /* * Remove hlen data at offset skip in the packet. This is used by * the protocols strip protocol headers and associated data (e.g. IV, * authenticator) on input. */ int m_striphdr(struct mbuf *m, int skip, int hlen) { struct mbuf *m1; int roff; /* Find beginning of header */ m1 = m_getptr(m, skip, &roff); if (m1 == NULL) return (EINVAL); /* Remove the header and associated data from the mbuf. */ if (roff == 0) { /* The header was at the beginning of the mbuf */ IPSECSTAT_INC(ips_input_front); m_adj(m1, hlen); if ((m1->m_flags & M_PKTHDR) == 0) m->m_pkthdr.len -= hlen; } else if (roff + hlen >= m1->m_len) { struct mbuf *mo; /* * Part or all of the header is at the end of this mbuf, * so first let's remove the remainder of the header from * the beginning of the remainder of the mbuf chain, if any. */ IPSECSTAT_INC(ips_input_end); if (roff + hlen > m1->m_len) { /* Adjust the next mbuf by the remainder */ m_adj(m1->m_next, roff + hlen - m1->m_len); /* The second mbuf is guaranteed not to have a pkthdr... */ m->m_pkthdr.len -= (roff + hlen - m1->m_len); } /* Now, let's unlink the mbuf chain for a second...*/ mo = m1->m_next; m1->m_next = NULL; /* ...and trim the end of the first part of the chain...sick */ m_adj(m1, -(m1->m_len - roff)); if ((m1->m_flags & M_PKTHDR) == 0) m->m_pkthdr.len -= (m1->m_len - roff); /* Finally, let's relink */ m1->m_next = mo; } else { /* * The header lies in the "middle" of the mbuf; copy * the remainder of the mbuf down over the header. */ IPSECSTAT_INC(ips_input_middle); bcopy(mtod(m1, u_char *) + roff + hlen, mtod(m1, u_char *) + roff, m1->m_len - (roff + hlen)); m1->m_len -= hlen; m->m_pkthdr.len -= hlen; } return (0); } /* * Diagnostic routine to check mbuf alignment as required by the * crypto device drivers (that use DMA). */ void m_checkalignment(const char* where, struct mbuf *m0, int off, int len) { int roff; struct mbuf *m = m_getptr(m0, off, &roff); caddr_t addr; if (m == NULL) return; printf("%s (off %u len %u): ", where, off, len); addr = mtod(m, caddr_t) + roff; do { int mlen; if (((uintptr_t) addr) & 3) { printf("addr misaligned %p,", addr); break; } mlen = m->m_len; if (mlen > len) mlen = len; len -= mlen; if (len && (mlen & 3)) { printf("len mismatch %u,", mlen); break; } m = m->m_next; addr = m ? mtod(m, caddr_t) : NULL; } while (m && len > 0); for (m = m0; m; m = m->m_next) printf(" [%p:%u]", mtod(m, caddr_t), m->m_len); printf("\n"); } diff --git a/sys/netipsec/ipsec_mod.c b/sys/netipsec/ipsec_mod.c new file mode 100644 index 000000000000..06523337f9fb --- /dev/null +++ b/sys/netipsec/ipsec_mod.c @@ -0,0 +1,148 @@ +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#ifdef INET +static const struct ipsec_methods ipv4_methods = { + .input = ipsec4_input, + .forward = ipsec4_forward, + .output = ipsec4_output, + .pcbctl = ipsec4_pcbctl, + .capability = ipsec4_capability, + .check_policy = ipsec4_in_reject, + .hdrsize = ipsec_hdrsiz_inpcb, + .udp_input = udp_ipsec_input, + .udp_pcbctl = udp_ipsec_pcbctl, +}; +#ifndef KLD_MODULE +static const struct ipsec_support ipv4_ipsec = { + .enabled = IPSEC_MODULE_ENABLED, + .methods = &ipv4_methods +}; +const struct ipsec_support * const ipv4_ipsec_support = &ipv4_ipsec; +#endif /* !KLD_MODULE */ +#endif /* INET */ + +#ifdef INET6 +static const struct ipsec_methods ipv6_methods = { + .input = ipsec6_input, + .forward = ipsec6_forward, + .output = ipsec6_output, + .pcbctl = ipsec6_pcbctl, + .capability = ipsec6_capability, + .check_policy = ipsec6_in_reject, + .hdrsize = ipsec_hdrsiz_inpcb, +}; +#ifndef KLD_MODULE +static const struct ipsec_support ipv6_ipsec = { + .enabled = IPSEC_MODULE_ENABLED, + .methods = &ipv6_methods +}; +const struct ipsec_support * const ipv6_ipsec_support = &ipv6_ipsec; +#endif /* !KLD_MODULE */ +#endif /* INET6 */ + +/* + * Always register ipsec module. + * Even when IPsec is build in the kernel, we need to have + * module registered. This will prevent to load ipsec.ko. + */ +static int +ipsec_modevent(module_t mod, int type, void *data) +{ + + switch (type) { + case MOD_LOAD: + /* All xforms are registered via SYSINIT */ + if (!ipsec_initialized()) + return (ENOMEM); +#ifdef KLD_MODULE +#ifdef INET + ipsec_support_enable(ipv4_ipsec_support, &ipv4_methods); +#endif +#ifdef INET6 + ipsec_support_enable(ipv6_ipsec_support, &ipv6_methods); +#endif +#endif /* KLD_MODULE */ + break; + case MOD_UNLOAD: + /* All xforms are unregistered via SYSUNINIT */ +#ifdef KLD_MODULE +#ifdef INET + ipsec_support_disable(ipv4_ipsec_support); +#endif +#ifdef INET6 + ipsec_support_disable(ipv6_ipsec_support); +#endif +#endif /* KLD_MODULE */ + break; + default: + return (EOPNOTSUPP); + } + return (0); +} + +static moduledata_t ipsec_mod = { + "ipsec", + ipsec_modevent, + 0 +}; + +DECLARE_MODULE(ipsec, ipsec_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); +MODULE_VERSION(ipsec, 1); +#ifdef KLD_MODULE +MODULE_DEPEND(ipsec, ipsec_support, 1, 1, 1); +#endif diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 24d8f4e571f3..392008e91978 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -1,735 +1,971 @@ /*- * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting + * Copyright (c) 2016 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * IPsec output processing. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_sctp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #include #endif #include #ifdef INET6 #include #endif +#ifdef SCTP +#include +#endif +#include +#include +#include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #include -#ifdef IPSEC_NAT_T -#include +#define IPSEC_OSTAT_INC(proto, name) do { \ + if ((proto) == IPPROTO_ESP) \ + ESPSTAT_INC(esps_##name); \ + else if ((proto) == IPPROTO_AH)\ + AHSTAT_INC(ahs_##name); \ + else \ + IPCOMPSTAT_INC(ipcomps_##name); \ +} while (0) + +static int ipsec_encap(struct mbuf **mp, struct secasindex *saidx); + +#ifdef INET +static struct secasvar * +ipsec4_allocsa(struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) +{ + struct secasindex *saidx, tmpsaidx; + struct ipsecrequest *isr; + struct sockaddr_in *sin; + struct secasvar *sav; + struct ip *ip; + + /* + * Check system global policy controls. + */ +next: + isr = sp->req[*pidx]; + if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) || + (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) || + (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { + DPRINTF(("%s: IPsec outbound packet dropped due" + " to policy (check your sysctls)\n", __func__)); + IPSEC_OSTAT_INC(isr->saidx.proto, pdrops); + *error = EHOSTUNREACH; + return (NULL); + } + /* + * Craft SA index to search for proper SA. Note that + * we only initialize unspecified SA peers for transport + * mode; for tunnel mode they must already be filled in. + */ + if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { + saidx = &tmpsaidx; + *saidx = isr->saidx; + ip = mtod(m, struct ip *); + if (saidx->src.sa.sa_len == 0) { + sin = &saidx->src.sin; + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_port = IPSEC_PORT_ANY; + sin->sin_addr = ip->ip_src; + } + if (saidx->dst.sa.sa_len == 0) { + sin = &saidx->dst.sin; + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_port = IPSEC_PORT_ANY; + sin->sin_addr = ip->ip_dst; + } + } else + saidx = &sp->req[*pidx]->saidx; + /* + * Lookup SA and validate it. + */ + sav = key_allocsa_policy(sp, saidx, error); + if (sav == NULL) { + IPSECSTAT_INC(ips_out_nosa); + if (*error != 0) + return (NULL); + if (ipsec_get_reqlevel(sp, *pidx) != IPSEC_LEVEL_REQUIRE) { + /* + * We have no SA and policy that doesn't require + * this IPsec transform, thus we can continue w/o + * IPsec processing, i.e. return EJUSTRETURN. + * But first check if there is some bundled transform. + */ + if (sp->tcount > ++(*pidx)) + goto next; + *error = EJUSTRETURN; + } + return (NULL); + } + IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform")); + return (sav); +} + +/* + * IPsec output logic for IPv4. + */ +static int +ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) +{ + char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN]; + struct ipsec_ctx_data ctx; + union sockaddr_union *dst; + struct secasvar *sav; + struct ip *ip; + int error, i, off; + + IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); + + /* + * We hold the reference to SP. Content of SP couldn't be changed. + * Craft secasindex and do lookup for suitable SA. + * Then do encapsulation if needed and call xform's output. + * We need to store SP in the xform callback parameters. + * In xform callback we will extract SP and it can be used to + * determine next transform. At the end of transform we can + * release reference to SP. + */ + sav = ipsec4_allocsa(m, sp, &idx, &error); + if (sav == NULL) { + if (error == EJUSTRETURN) { /* No IPsec required */ + key_freesp(&sp); + return (error); + } + goto bad; + } + /* + * XXXAE: most likely ip_sum at this point is wrong. + */ + IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET, IPSEC_ENC_BEFORE); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) + goto bad; + + ip = mtod(m, struct ip *); + dst = &sav->sah->saidx.dst; + /* Do the appropriate encapsulation, if necessary */ + if (sp->req[idx]->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ + dst->sa.sa_family != AF_INET || /* PF mismatch */ + (dst->sa.sa_family == AF_INET && /* Proxy */ + dst->sin.sin_addr.s_addr != INADDR_ANY && + dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) { + /* Fix IPv4 header checksum and length */ + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, ip->ip_hl << 2); + error = ipsec_encap(&m, &sav->sah->saidx); + if (error != 0) { + DPRINTF(("%s: encapsulation for SA %s->%s " + "SPI 0x%08x failed with error %d\n", __func__, + ipsec_address(&sav->sah->saidx.src, sbuf, + sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, + sizeof(dbuf)), ntohl(sav->spi), error)); + /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ + goto bad; + } + } + + IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) + goto bad; + + /* + * Dispatch to the appropriate IPsec transform logic. The + * packet will be returned for transmission after crypto + * processing, etc. are completed. + * + * NB: m & sav are ``passed to caller'' who's responsible for + * reclaiming their resources. + */ + switch(dst->sa.sa_family) { + case AF_INET: + ip = mtod(m, struct ip *); + i = ip->ip_hl << 2; + off = offsetof(struct ip, ip_p); + break; +#ifdef INET6 + case AF_INET6: + i = sizeof(struct ip6_hdr); + off = offsetof(struct ip6_hdr, ip6_nxt); + break; +#endif /* INET6 */ + default: + DPRINTF(("%s: unsupported protocol family %u\n", + __func__, dst->sa.sa_family)); + error = EPFNOSUPPORT; + IPSEC_OSTAT_INC(sav->sah->saidx.proto, nopf); + goto bad; + } + error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off); + if (error != 0) { + key_freesav(&sav); + key_freesp(&sp); + } + return (error); +bad: + IPSECSTAT_INC(ips_out_inval); + if (m != NULL) + m_freem(m); + if (sav != NULL) + key_freesav(&sav); + key_freesp(&sp); + return (error); +} + +int +ipsec4_process_packet(struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp) +{ + + return (ipsec4_perform_request(m, sp, 0)); +} + +static int +ipsec4_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) +{ + struct secpolicy *sp; + int error; + + /* Lookup for the corresponding outbound security policy */ + sp = ipsec4_checkpolicy(m, inp, &error); + if (sp == NULL) { + if (error == -EINVAL) { + /* Discarded by policy. */ + m_freem(m); + return (EACCES); + } + return (0); /* No IPsec required. */ + } + + /* + * Usually we have to have tunnel mode IPsec security policy + * when we are forwarding a packet. Otherwise we could not handle + * encrypted replies, because they are not destined for us. But + * some users are doing source address translation for forwarded + * packets, and thus, even if they are forwarded, the replies will + * return back to us. + */ + if (!forwarding) { + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +#ifdef SCTP + if (m->m_pkthdr.csum_flags & CSUM_SCTP) { + struct ip *ip = mtod(m, struct ip *); + + sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP; + } #endif + } + /* NB: callee frees mbuf and releases reference to SP */ + error = ipsec4_process_packet(m, sp, inp); + if (error == EJUSTRETURN) { + /* + * We had a SP with a level of 'use' and no SA. We + * will just continue to process the packet without + * IPsec processing and return without error. + */ + return (0); + } + if (error == 0) + return (EINPROGRESS); /* consumed by IPsec */ + return (error); +} + +/* + * IPSEC_OUTPUT() method implementation for IPv4. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. + */ +int +ipsec4_output(struct mbuf *m, struct inpcb *inp) +{ + + /* + * If the packet is resubmitted to ip_output (e.g. after + * AH, ESP, etc. processing), there will be a tag to bypass + * the lookup and related policy checking. + */ + if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) + return (0); + + return (ipsec4_common_output(m, inp, 0)); +} + +/* + * IPSEC_FORWARD() method implementation for IPv4. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. + */ +int +ipsec4_forward(struct mbuf *m) +{ + + /* + * Check if this packet has an active inbound SP and needs to be + * dropped instead of forwarded. + */ + if (ipsec4_in_reject(m, NULL) != 0) { + m_freem(m); + return (EACCES); + } + return (ipsec4_common_output(m, NULL, 1)); +} +#endif + +#ifdef INET6 +static int +in6_sa_equal_addrwithscope(const struct sockaddr_in6 *sa, + const struct in6_addr *ia) +{ + struct in6_addr ia2; + + if (IN6_IS_SCOPE_LINKLOCAL(&sa->sin6_addr)) { + memcpy(&ia2, &sa->sin6_addr, sizeof(ia2)); + ia2.s6_addr16[1] = htons(sa->sin6_scope_id); + return (IN6_ARE_ADDR_EQUAL(ia, &ia2)); + } + return (IN6_ARE_ADDR_EQUAL(&sa->sin6_addr, ia)); +} + +static struct secasvar * +ipsec6_allocsa(struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) +{ + struct secasindex *saidx, tmpsaidx; + struct ipsecrequest *isr; + struct sockaddr_in6 *sin6; + struct secasvar *sav; + struct ip6_hdr *ip6; + + /* + * Check system global policy controls. + */ +next: + isr = sp->req[*pidx]; + if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) || + (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) || + (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { + DPRINTF(("%s: IPsec outbound packet dropped due" + " to policy (check your sysctls)\n", __func__)); + IPSEC_OSTAT_INC(isr->saidx.proto, pdrops); + *error = EHOSTUNREACH; + return (NULL); + } + /* + * Craft SA index to search for proper SA. Note that + * we only fillin unspecified SA peers for transport + * mode; for tunnel mode they must already be filled in. + */ + if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { + saidx = &tmpsaidx; + *saidx = isr->saidx; + ip6 = mtod(m, struct ip6_hdr *); + if (saidx->src.sin6.sin6_len == 0) { + sin6 = (struct sockaddr_in6 *)&saidx->src; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = IPSEC_PORT_ANY; + sin6->sin6_addr = ip6->ip6_src; + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { + /* fix scope id for comparing SPD */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = + ntohs(ip6->ip6_src.s6_addr16[1]); + } + } + if (saidx->dst.sin6.sin6_len == 0) { + sin6 = (struct sockaddr_in6 *)&saidx->dst; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = IPSEC_PORT_ANY; + sin6->sin6_addr = ip6->ip6_dst; + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { + /* fix scope id for comparing SPD */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = + ntohs(ip6->ip6_dst.s6_addr16[1]); + } + } + } else + saidx = &sp->req[*pidx]->saidx; + /* + * Lookup SA and validate it. + */ + sav = key_allocsa_policy(sp, saidx, error); + if (sav == NULL) { + IPSEC6STAT_INC(ips_out_nosa); + if (*error != 0) + return (NULL); + if (ipsec_get_reqlevel(sp, *pidx) != IPSEC_LEVEL_REQUIRE) { + /* + * We have no SA and policy that doesn't require + * this IPsec transform, thus we can continue w/o + * IPsec processing, i.e. return EJUSTRETURN. + * But first check if there is some bundled transform. + */ + if (sp->tcount > ++(*pidx)) + goto next; + *error = EJUSTRETURN; + } + return (NULL); + } + IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform")); + return (sav); +} + +/* + * IPsec output logic for IPv6. + */ +static int +ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) +{ + char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN]; + struct ipsec_ctx_data ctx; + union sockaddr_union *dst; + struct secasvar *sav; + struct ip6_hdr *ip6; + int error, i, off; + + IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); + + sav = ipsec6_allocsa(m, sp, &idx, &error); + if (sav == NULL) { + if (error == EJUSTRETURN) { /* No IPsec required */ + key_freesp(&sp); + return (error); + } + goto bad; + } + + /* Fix IP length in case if it is not set yet. */ + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); + + IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET6, IPSEC_ENC_BEFORE); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) + goto bad; + + ip6 = mtod(m, struct ip6_hdr *); /* pfil can change mbuf */ + dst = &sav->sah->saidx.dst; + + /* Do the appropriate encapsulation, if necessary */ + if (sp->req[idx]->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ + dst->sa.sa_family != AF_INET6 || /* PF mismatch */ + ((dst->sa.sa_family == AF_INET6) && + (!IN6_IS_ADDR_UNSPECIFIED(&dst->sin6.sin6_addr)) && + (!in6_sa_equal_addrwithscope(&dst->sin6, &ip6->ip6_dst)))) { + if (m->m_pkthdr.len - sizeof(*ip6) > IPV6_MAXPACKET) { + /* No jumbogram support. */ + error = ENXIO; /*XXX*/ + goto bad; + } + error = ipsec_encap(&m, &sav->sah->saidx); + if (error != 0) { + DPRINTF(("%s: encapsulation for SA %s->%s " + "SPI 0x%08x failed with error %d\n", __func__, + ipsec_address(&sav->sah->saidx.src, sbuf, + sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, + sizeof(dbuf)), ntohl(sav->spi), error)); + /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ + goto bad; + } + } + + IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) + goto bad; + + switch(dst->sa.sa_family) { +#ifdef INET + case AF_INET: + { + struct ip *ip; + ip = mtod(m, struct ip *); + i = ip->ip_hl << 2; + off = offsetof(struct ip, ip_p); + } + break; +#endif /* AF_INET */ + case AF_INET6: + i = sizeof(struct ip6_hdr); + off = offsetof(struct ip6_hdr, ip6_nxt); + break; + default: + DPRINTF(("%s: unsupported protocol family %u\n", + __func__, dst->sa.sa_family)); + error = EPFNOSUPPORT; + IPSEC_OSTAT_INC(sav->sah->saidx.proto, nopf); + goto bad; + } + error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off); + if (error != 0) { + key_freesav(&sav); + key_freesp(&sp); + } + return (error); +bad: + IPSEC6STAT_INC(ips_out_inval); + if (m != NULL) + m_freem(m); + if (sav != NULL) + key_freesav(&sav); + key_freesp(&sp); + return (error); +} + +int +ipsec6_process_packet(struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp) +{ + + return (ipsec6_perform_request(m, sp, 0)); +} + +static int +ipsec6_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) +{ + struct secpolicy *sp; + int error; + + /* Lookup for the corresponding outbound security policy */ + sp = ipsec6_checkpolicy(m, inp, &error); + if (sp == NULL) { + if (error == -EINVAL) { + /* Discarded by policy. */ + m_freem(m); + return (EACCES); + } + return (0); /* No IPsec required. */ + } + + if (!forwarding) { + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { + in6_delayed_cksum(m, m->m_pkthdr.len - + sizeof(struct ip6_hdr), sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; + } +#ifdef SCTP + if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { + sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; + } +#endif + } + /* NB: callee frees mbuf and releases reference to SP */ + error = ipsec6_process_packet(m, sp, inp); + if (error == EJUSTRETURN) { + /* + * We had a SP with a level of 'use' and no SA. We + * will just continue to process the packet without + * IPsec processing and return without error. + */ + return (0); + } + if (error == 0) + return (EINPROGRESS); /* consumed by IPsec */ + return (error); +} + +/* + * IPSEC_OUTPUT() method implementation for IPv6. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. + */ +int +ipsec6_output(struct mbuf *m, struct inpcb *inp) +{ + + /* + * If the packet is resubmitted to ip_output (e.g. after + * AH, ESP, etc. processing), there will be a tag to bypass + * the lookup and related policy checking. + */ + if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) + return (0); + + return (ipsec6_common_output(m, inp, 0)); +} + +/* + * IPSEC_FORWARD() method implementation for IPv6. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. + */ +int +ipsec6_forward(struct mbuf *m) +{ + + /* + * Check if this packet has an active inbound SP and needs to be + * dropped instead of forwarded. + */ + if (ipsec6_in_reject(m, NULL) != 0) { + m_freem(m); + return (EACCES); + } + return (ipsec6_common_output(m, NULL, 1)); +} +#endif /* INET6 */ int -ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) +ipsec_process_done(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx) { - struct tdb_ident *tdbi; - struct m_tag *mtag; - struct secasvar *sav; + struct xform_history *xh; struct secasindex *saidx; + struct m_tag *mtag; int error; - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(isr != NULL, ("null ISR")); - IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); - sav = isr->sav; - IPSEC_ASSERT(sav != NULL, ("null SA")); - IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); - saidx = &sav->sah->saidx; switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: /* Fix the header length, for AH processing. */ mtod(m, struct ip *)->ip_len = htons(m->m_pkthdr.len); break; #endif /* INET */ #ifdef INET6 case AF_INET6: /* Fix the header length, for AH processing. */ if (m->m_pkthdr.len < sizeof (struct ip6_hdr)) { error = ENXIO; goto bad; } if (m->m_pkthdr.len - sizeof (struct ip6_hdr) > IPV6_MAXPACKET) { /* No jumbogram support. */ error = ENXIO; /*?*/ goto bad; } mtod(m, struct ip6_hdr *)->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); break; #endif /* INET6 */ default: DPRINTF(("%s: unknown protocol family %u\n", __func__, saidx->dst.sa.sa_family)); error = ENXIO; goto bad; } /* - * Add a record of what we've done or what needs to be done to the - * packet. + * Add a record of what we've done to the packet. */ - mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE, - sizeof(struct tdb_ident), M_NOWAIT); + mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE, sizeof(*xh), M_NOWAIT); if (mtag == NULL) { DPRINTF(("%s: could not get packet tag\n", __func__)); error = ENOMEM; goto bad; } - tdbi = (struct tdb_ident *)(mtag + 1); - tdbi->dst = saidx->dst; - tdbi->proto = saidx->proto; - tdbi->spi = sav->spi; + xh = (struct xform_history *)(mtag + 1); + xh->dst = saidx->dst; + xh->proto = saidx->proto; + xh->mode = saidx->mode; + xh->spi = sav->spi; m_tag_prepend(m, mtag); key_sa_recordxfer(sav, m); /* record data transfer */ /* * If there's another (bundled) SA to apply, do so. * Note that this puts a burden on the kernel stack size. * If this is a problem we'll need to introduce a queue * to set the packet on so we can unwind the stack before * doing further processing. */ - if (isr->next) { - /* XXX-BZ currently only support same AF bundles. */ + if (++idx < sp->tcount) { switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: + key_freesav(&sav); IPSECSTAT_INC(ips_out_bundlesa); - return (ipsec4_process_packet(m, isr->next)); + return (ipsec4_perform_request(m, sp, idx)); /* NOTREACHED */ #endif -#ifdef notyet #ifdef INET6 case AF_INET6: - /* XXX */ + key_freesav(&sav); IPSEC6STAT_INC(ips_out_bundlesa); - return (ipsec6_process_packet(m, isr->next)); + return (ipsec6_perform_request(m, sp, idx)); /* NOTREACHED */ #endif /* INET6 */ -#endif default: DPRINTF(("%s: unknown protocol family %u\n", __func__, saidx->dst.sa.sa_family)); - error = ENXIO; + error = EPFNOSUPPORT; goto bad; } } + key_freesp(&sp), sp = NULL; /* Release reference to SP */ +#ifdef INET + /* + * Do UDP encapsulation if SA requires it. + */ + if (sav->natt != NULL) { + error = udp_ipsec_output(m, sav); + if (error != 0) + goto bad; + } +#endif /* INET */ /* * We're done with IPsec processing, transmit the packet using the * appropriate network protocol (IP or IPv6). */ switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: -#ifdef IPSEC_NAT_T - /* - * If NAT-T is enabled, now that all IPsec processing is done - * insert UDP encapsulation header after IP header. - */ - if (sav->natt_type) { - struct ip *ip = mtod(m, struct ip *); - const int hlen = (ip->ip_hl << 2); - int size, off; - struct mbuf *mi; - struct udphdr *udp; - - size = sizeof(struct udphdr); - if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) { - /* - * draft-ietf-ipsec-nat-t-ike-0[01].txt and - * draft-ietf-ipsec-udp-encaps-(00/)01.txt, - * ignoring possible AH mode - * non-IKE marker + non-ESP marker - * from draft-ietf-ipsec-udp-encaps-00.txt. - */ - size += sizeof(u_int64_t); - } - mi = m_makespace(m, hlen, size, &off); - if (mi == NULL) { - DPRINTF(("%s: m_makespace for udphdr failed\n", - __func__)); - error = ENOBUFS; - goto bad; - } - - udp = (struct udphdr *)(mtod(mi, caddr_t) + off); - if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) - udp->uh_sport = htons(UDP_ENCAP_ESPINUDP_PORT); - else - udp->uh_sport = - KEY_PORTFROMSADDR(&sav->sah->saidx.src); - udp->uh_dport = KEY_PORTFROMSADDR(&sav->sah->saidx.dst); - udp->uh_sum = 0; - udp->uh_ulen = htons(m->m_pkthdr.len - hlen); - ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_p = IPPROTO_UDP; - - if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) - *(u_int64_t *)(udp + 1) = 0; - } -#endif /* IPSEC_NAT_T */ - + key_freesav(&sav); return ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL); #endif /* INET */ #ifdef INET6 case AF_INET6: - /* - * We don't need massage, IPv6 header fields are always in - * net endian. - */ + key_freesav(&sav); return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); #endif /* INET6 */ } panic("ipsec_process_done"); bad: m_freem(m); + key_freesav(&sav); + if (sp != NULL) + key_freesp(&sp); return (error); } -static struct ipsecrequest * -ipsec_nextisr( - struct mbuf *m, - struct ipsecrequest *isr, - int af, - struct secasindex *saidx, - int *error -) +/* + * ipsec_prepend() is optimized version of M_PREPEND(). + * ipsec_encap() is called by IPsec output routine for tunnel mode SA. + * It is expected that after IP encapsulation some IPsec transform will + * be performed. Each IPsec transform inserts its variable length header + * just after outer IP header using m_makespace(). If given mbuf has not + * enough free space at the beginning, we allocate new mbuf and reserve + * some space at the beginning and at the end. + * This helps avoid allocating of new mbuf and data copying in m_makespace(), + * we place outer header in the middle of mbuf's data with reserved leading + * and trailing space: + * [ LEADINGSPACE ][ Outer IP header ][ TRAILINGSPACE ] + * LEADINGSPACE will be used to add ethernet header, TRAILINGSPACE will + * be used to inject AH/ESP/IPCOMP header. + */ +#define IPSEC_TRAILINGSPACE (sizeof(struct udphdr) +/* NAT-T */ \ + max(sizeof(struct newesp) + EALG_MAX_BLOCK_LEN, /* ESP + IV */ \ + sizeof(struct newah) + HASH_MAX_LEN /* AH + ICV */)) +static struct mbuf * +ipsec_prepend(struct mbuf *m, int len, int how) { -#define IPSEC_OSTAT(name) do { \ - if (isr->saidx.proto == IPPROTO_ESP) \ - ESPSTAT_INC(esps_##name); \ - else if (isr->saidx.proto == IPPROTO_AH)\ - AHSTAT_INC(ahs_##name); \ - else \ - IPCOMPSTAT_INC(ipcomps_##name); \ -} while (0) - struct secasvar *sav; - - IPSECREQUEST_LOCK_ASSERT(isr); - - IPSEC_ASSERT(af == AF_INET || af == AF_INET6, - ("invalid address family %u", af)); -again: - /* - * Craft SA index to search for proper SA. Note that - * we only fillin unspecified SA peers for transport - * mode; for tunnel mode they must already be filled in. - */ - *saidx = isr->saidx; - if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { - /* Fillin unspecified SA peers only for transport mode */ - if (af == AF_INET) { - struct sockaddr_in *sin; - struct ip *ip = mtod(m, struct ip *); - - if (saidx->src.sa.sa_len == 0) { - sin = &saidx->src.sin; - sin->sin_len = sizeof(*sin); - sin->sin_family = AF_INET; - sin->sin_port = IPSEC_PORT_ANY; - sin->sin_addr = ip->ip_src; - } - if (saidx->dst.sa.sa_len == 0) { - sin = &saidx->dst.sin; - sin->sin_len = sizeof(*sin); - sin->sin_family = AF_INET; - sin->sin_port = IPSEC_PORT_ANY; - sin->sin_addr = ip->ip_dst; - } - } else { - struct sockaddr_in6 *sin6; - struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - - if (saidx->src.sin6.sin6_len == 0) { - sin6 = (struct sockaddr_in6 *)&saidx->src; - sin6->sin6_len = sizeof(*sin6); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = IPSEC_PORT_ANY; - sin6->sin6_addr = ip6->ip6_src; - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { - /* fix scope id for comparing SPD */ - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = - ntohs(ip6->ip6_src.s6_addr16[1]); - } - } - if (saidx->dst.sin6.sin6_len == 0) { - sin6 = (struct sockaddr_in6 *)&saidx->dst; - sin6->sin6_len = sizeof(*sin6); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = IPSEC_PORT_ANY; - sin6->sin6_addr = ip6->ip6_dst; - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { - /* fix scope id for comparing SPD */ - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = - ntohs(ip6->ip6_dst.s6_addr16[1]); - } - } - } - } - - /* - * Lookup SA and validate it. - */ - *error = key_checkrequest(isr, saidx); - if (*error != 0) { - /* - * IPsec processing is required, but no SA found. - * I assume that key_acquire() had been called - * to get/establish the SA. Here I discard - * this packet because it is responsibility for - * upper layer to retransmit the packet. - */ - switch(af) { - case AF_INET: - IPSECSTAT_INC(ips_out_nosa); - break; -#ifdef INET6 - case AF_INET6: - IPSEC6STAT_INC(ips_out_nosa); - break; -#endif - } - goto bad; - } - sav = isr->sav; - if (sav == NULL) { - IPSEC_ASSERT(ipsec_get_reqlevel(isr) == IPSEC_LEVEL_USE, - ("no SA found, but required; level %u", - ipsec_get_reqlevel(isr))); - IPSECREQUEST_UNLOCK(isr); - isr = isr->next; - /* - * If isr is NULL, we found a 'use' policy w/o SA. - * Return w/o error and w/o isr so we can drop out - * and continue w/o IPsec processing. - */ - if (isr == NULL) - return isr; - IPSECREQUEST_LOCK(isr); - goto again; - } - - /* - * Check system global policy controls. - */ - if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) || - (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) || - (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { - DPRINTF(("%s: IPsec outbound packet dropped due" - " to policy (check your sysctls)\n", __func__)); - IPSEC_OSTAT(pdrops); - *error = EHOSTUNREACH; - goto bad; + struct mbuf *n; + + M_ASSERTPKTHDR(m); + IPSEC_ASSERT(len < MHLEN, ("wrong length")); + if (M_LEADINGSPACE(m) >= len) { + /* No need to allocate new mbuf. */ + m->m_data -= len; + m->m_len += len; + m->m_pkthdr.len += len; + return (m); } - - /* - * Sanity check the SA contents for the caller - * before they invoke the xform output method. - */ - if (sav->tdb_xform == NULL) { - DPRINTF(("%s: no transform for SA\n", __func__)); - IPSEC_OSTAT(noxform); - *error = EHOSTUNREACH; - goto bad; + n = m_gethdr(how, m->m_type); + if (n == NULL) { + m_freem(m); + return (NULL); } - return isr; -bad: - IPSEC_ASSERT(*error != 0, ("error return w/ no error code")); - IPSECREQUEST_UNLOCK(isr); - return NULL; -#undef IPSEC_OSTAT + m_move_pkthdr(n, m); + n->m_next = m; + if (len + IPSEC_TRAILINGSPACE < M_SIZE(n)) + m_align(n, len + IPSEC_TRAILINGSPACE); + n->m_len = len; + n->m_pkthdr.len += len; + return (n); } static int ipsec_encap(struct mbuf **mp, struct secasindex *saidx) { #ifdef INET6 struct ip6_hdr *ip6; #endif struct ip *ip; int setdf; uint8_t itos, proto; ip = mtod(*mp, struct ip *); switch (ip->ip_v) { #ifdef INET case IPVERSION: proto = IPPROTO_IPIP; /* * Collect IP_DF state from the inner header * and honor system-wide control of how to handle it. */ switch (V_ip4_ipsec_dfbit) { case 0: /* clear in outer header */ case 1: /* set in outer header */ setdf = V_ip4_ipsec_dfbit; break; default:/* propagate to outer header */ setdf = (ip->ip_off & htons(IP_DF)) != 0; } itos = ip->ip_tos; break; #endif #ifdef INET6 case (IPV6_VERSION >> 4): proto = IPPROTO_IPV6; ip6 = mtod(*mp, struct ip6_hdr *); itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; setdf = V_ip4_ipsec_dfbit ? 1: 0; /* scoped address handling */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); break; #endif default: return (EAFNOSUPPORT); } switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: if (saidx->src.sa.sa_family != AF_INET || saidx->src.sin.sin_addr.s_addr == INADDR_ANY || saidx->dst.sin.sin_addr.s_addr == INADDR_ANY) return (EINVAL); - M_PREPEND(*mp, sizeof(struct ip), M_NOWAIT); + *mp = ipsec_prepend(*mp, sizeof(struct ip), M_NOWAIT); if (*mp == NULL) return (ENOBUFS); ip = mtod(*mp, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(struct ip) >> 2; ip->ip_p = proto; ip->ip_len = htons((*mp)->m_pkthdr.len); ip->ip_ttl = V_ip_defttl; ip->ip_sum = 0; ip->ip_off = setdf ? htons(IP_DF): 0; ip->ip_src = saidx->src.sin.sin_addr; ip->ip_dst = saidx->dst.sin.sin_addr; ip_ecn_ingress(V_ip4_ipsec_ecn, &ip->ip_tos, &itos); ip_fillid(ip); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (saidx->src.sa.sa_family != AF_INET6 || IN6_IS_ADDR_UNSPECIFIED(&saidx->src.sin6.sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&saidx->dst.sin6.sin6_addr)) return (EINVAL); - M_PREPEND(*mp, sizeof(struct ip6_hdr), M_NOWAIT); + *mp = ipsec_prepend(*mp, sizeof(struct ip6_hdr), M_NOWAIT); if (*mp == NULL) return (ENOBUFS); ip6 = mtod(*mp, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_hlim = V_ip6_defhlim; ip6->ip6_nxt = proto; ip6->ip6_dst = saidx->dst.sin6.sin6_addr; /* For link-local address embed scope zone id */ if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) ip6->ip6_dst.s6_addr16[1] = htons(saidx->dst.sin6.sin6_scope_id & 0xffff); ip6->ip6_src = saidx->src.sin6.sin6_addr; if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) ip6->ip6_src.s6_addr16[1] = htons(saidx->src.sin6.sin6_scope_id & 0xffff); ip6->ip6_plen = htons((*mp)->m_pkthdr.len - sizeof(*ip6)); ip_ecn_ingress(V_ip6_ipsec_ecn, &proto, &itos); ip6->ip6_flow |= htonl((uint32_t)proto << 20); break; #endif /* INET6 */ default: return (EAFNOSUPPORT); } + (*mp)->m_flags &= ~(M_BCAST | M_MCAST); return (0); } -#ifdef INET -/* - * IPsec output logic for IPv4. - */ -int -ipsec4_process_packet(struct mbuf *m, struct ipsecrequest *isr) -{ - char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; - struct ipsec_ctx_data ctx; - union sockaddr_union *dst; - struct secasindex saidx; - struct secasvar *sav; - struct ip *ip; - int error, i, off; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(isr != NULL, ("null isr")); - - IPSECREQUEST_LOCK(isr); /* insure SA contents don't change */ - - isr = ipsec_nextisr(m, isr, AF_INET, &saidx, &error); - if (isr == NULL) { - if (error != 0) - goto bad; - return EJUSTRETURN; - } - - sav = isr->sav; - if (m->m_len < sizeof(struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == NULL) { - error = ENOBUFS; - goto bad; - } - - IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET, IPSEC_ENC_BEFORE); - if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) - goto bad; - - ip = mtod(m, struct ip *); - dst = &sav->sah->saidx.dst; - /* Do the appropriate encapsulation, if necessary */ - if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ - dst->sa.sa_family != AF_INET || /* PF mismatch */ - (dst->sa.sa_family == AF_INET && /* Proxy */ - dst->sin.sin_addr.s_addr != INADDR_ANY && - dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) { - /* Fix IPv4 header checksum and length */ - ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_sum = 0; - ip->ip_sum = in_cksum(m, ip->ip_hl << 2); - error = ipsec_encap(&m, &sav->sah->saidx); - if (error != 0) { - DPRINTF(("%s: encapsulation for SA %s->%s " - "SPI 0x%08x failed with error %d\n", __func__, - ipsec_address(&sav->sah->saidx.src, sbuf, - sizeof(sbuf)), - ipsec_address(&sav->sah->saidx.dst, dbuf, - sizeof(dbuf)), ntohl(sav->spi), error)); - goto bad; - } - } - - IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); - if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) - goto bad; - - /* - * Dispatch to the appropriate IPsec transform logic. The - * packet will be returned for transmission after crypto - * processing, etc. are completed. - * - * NB: m & sav are ``passed to caller'' who's responsible for - * for reclaiming their resources. - */ - switch(dst->sa.sa_family) { - case AF_INET: - ip = mtod(m, struct ip *); - i = ip->ip_hl << 2; - off = offsetof(struct ip, ip_p); - break; -#ifdef INET6 - case AF_INET6: - i = sizeof(struct ip6_hdr); - off = offsetof(struct ip6_hdr, ip6_nxt); - break; -#endif /* INET6 */ - default: - DPRINTF(("%s: unsupported protocol family %u\n", - __func__, dst->sa.sa_family)); - error = EPFNOSUPPORT; - IPSECSTAT_INC(ips_out_inval); - goto bad; - } - error = (*sav->tdb_xform->xf_output)(m, isr, NULL, i, off); - IPSECREQUEST_UNLOCK(isr); - return (error); -bad: - if (isr) - IPSECREQUEST_UNLOCK(isr); - if (m) - m_freem(m); - return error; -} -#endif - - -#ifdef INET6 -static int -in6_sa_equal_addrwithscope(const struct sockaddr_in6 *sa, const struct in6_addr *ia) -{ - struct in6_addr ia2; - - memcpy(&ia2, &sa->sin6_addr, sizeof(ia2)); - if (IN6_IS_SCOPE_LINKLOCAL(&sa->sin6_addr)) - ia2.s6_addr16[1] = htons(sa->sin6_scope_id); - - return IN6_ARE_ADDR_EQUAL(ia, &ia2); -} - -/* - * IPsec output logic for IPv6. - */ -int -ipsec6_process_packet(struct mbuf *m, struct ipsecrequest *isr) -{ - char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; - struct ipsec_ctx_data ctx; - struct secasindex saidx; - struct secasvar *sav; - struct ip6_hdr *ip6; - int error, i, off; - union sockaddr_union *dst; - - IPSEC_ASSERT(m != NULL, ("ipsec6_process_packet: null mbuf")); - IPSEC_ASSERT(isr != NULL, ("ipsec6_process_packet: null isr")); - - IPSECREQUEST_LOCK(isr); /* insure SA contents don't change */ - - isr = ipsec_nextisr(m, isr, AF_INET6, &saidx, &error); - if (isr == NULL) { - if (error != 0) - goto bad; - return EJUSTRETURN; - } - sav = isr->sav; - dst = &sav->sah->saidx.dst; - - IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET6, IPSEC_ENC_BEFORE); - if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) - goto bad; - - ip6 = mtod(m, struct ip6_hdr *); - ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); - - /* Do the appropriate encapsulation, if necessary */ - if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ - dst->sa.sa_family != AF_INET6 || /* PF mismatch */ - ((dst->sa.sa_family == AF_INET6) && - (!IN6_IS_ADDR_UNSPECIFIED(&dst->sin6.sin6_addr)) && - (!in6_sa_equal_addrwithscope(&dst->sin6, - &ip6->ip6_dst)))) { - if (m->m_pkthdr.len - sizeof(*ip6) > IPV6_MAXPACKET) { - /* No jumbogram support. */ - error = ENXIO; /*XXX*/ - goto bad; - } - error = ipsec_encap(&m, &sav->sah->saidx); - if (error != 0) { - DPRINTF(("%s: encapsulation for SA %s->%s " - "SPI 0x%08x failed with error %d\n", __func__, - ipsec_address(&sav->sah->saidx.src, sbuf, - sizeof(sbuf)), - ipsec_address(&sav->sah->saidx.dst, dbuf, - sizeof(dbuf)), ntohl(sav->spi), error)); - goto bad; - } - } - - IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); - if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) - goto bad; - - switch(dst->sa.sa_family) { -#ifdef INET - case AF_INET: - { - struct ip *ip; - ip = mtod(m, struct ip *); - i = ip->ip_hl << 2; - off = offsetof(struct ip, ip_p); - } - break; -#endif /* AF_INET */ - case AF_INET6: - i = sizeof(struct ip6_hdr); - off = offsetof(struct ip6_hdr, ip6_nxt); - break; - default: - DPRINTF(("%s: unsupported protocol family %u\n", - __func__, dst->sa.sa_family)); - error = EPFNOSUPPORT; - goto bad; - } - error = (*sav->tdb_xform->xf_output)(m, isr, NULL, i, off); - IPSECREQUEST_UNLOCK(isr); - return error; -bad: - IPSEC6STAT_INC(ips_out_inval); - if (isr) - IPSECREQUEST_UNLOCK(isr); - if (m) - m_freem(m); - return error; -} -#endif /*INET6*/ diff --git a/sys/netipsec/ipsec_pcb.c b/sys/netipsec/ipsec_pcb.c new file mode 100644 index 000000000000..b6ed5f3e783b --- /dev/null +++ b/sys/netipsec/ipsec_pcb.c @@ -0,0 +1,479 @@ +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy"); + +static void +ipsec_setsockaddrs_inpcb(struct inpcb *inp, union sockaddr_union *src, + union sockaddr_union *dst, u_int dir) +{ + +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + struct sockaddr_in6 *sin6; + + bzero(&src->sin6, sizeof(src->sin6)); + bzero(&dst->sin6, sizeof(dst->sin6)); + src->sin6.sin6_family = AF_INET6; + src->sin6.sin6_len = sizeof(struct sockaddr_in6); + dst->sin6.sin6_family = AF_INET6; + dst->sin6.sin6_len = sizeof(struct sockaddr_in6); + + if (dir == IPSEC_DIR_OUTBOUND) + sin6 = &src->sin6; + else + sin6 = &dst->sin6; + sin6->sin6_addr = inp->in6p_laddr; + sin6->sin6_port = inp->inp_lport; + if (IN6_IS_SCOPE_LINKLOCAL(&inp->in6p_laddr)) { + /* XXXAE: use in6p_zoneid */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = ntohs( + inp->in6p_laddr.s6_addr16[1]); + } + + if (dir == IPSEC_DIR_OUTBOUND) + sin6 = &dst->sin6; + else + sin6 = &src->sin6; + sin6->sin6_addr = inp->in6p_faddr; + sin6->sin6_port = inp->inp_fport; + if (IN6_IS_SCOPE_LINKLOCAL(&inp->in6p_faddr)) { + /* XXXAE: use in6p_zoneid */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = ntohs( + inp->in6p_faddr.s6_addr16[1]); + } + } +#endif +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + struct sockaddr_in *sin; + + bzero(&src->sin, sizeof(src->sin)); + bzero(&dst->sin, sizeof(dst->sin)); + src->sin.sin_family = AF_INET; + src->sin.sin_len = sizeof(struct sockaddr_in); + dst->sin.sin_family = AF_INET; + dst->sin.sin_len = sizeof(struct sockaddr_in); + + if (dir == IPSEC_DIR_OUTBOUND) + sin = &src->sin; + else + sin = &dst->sin; + sin->sin_addr = inp->inp_laddr; + sin->sin_port = inp->inp_lport; + + if (dir == IPSEC_DIR_OUTBOUND) + sin = &dst->sin; + else + sin = &src->sin; + sin->sin_addr = inp->inp_faddr; + sin->sin_port = inp->inp_fport; + } +#endif +} + +void +ipsec_setspidx_inpcb(struct inpcb *inp, struct secpolicyindex *spidx, + u_int dir) +{ + + ipsec_setsockaddrs_inpcb(inp, &spidx->src, &spidx->dst, dir); +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + spidx->prefs = sizeof(struct in6_addr) << 3; + spidx->prefd = sizeof(struct in6_addr) << 3; + } +#endif +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + spidx->prefs = sizeof(struct in_addr) << 3; + spidx->prefd = sizeof(struct in_addr) << 3; + } +#endif + spidx->ul_proto = IPPROTO_TCP; /* XXX: currently only TCP uses this */ + spidx->dir = dir; + KEYDBG(IPSEC_DUMP, + printf("%s: ", __func__); kdebug_secpolicyindex(spidx, NULL)); +} + +/* Initialize PCB policy. */ +int +ipsec_init_pcbpolicy(struct inpcb *inp) +{ + + IPSEC_ASSERT(inp != NULL, ("null inp")); + IPSEC_ASSERT(inp->inp_sp == NULL, ("inp_sp already initialized")); + + inp->inp_sp = malloc(sizeof(struct inpcbpolicy), M_IPSEC_INPCB, + M_NOWAIT | M_ZERO); + if (inp->inp_sp == NULL) + return (ENOBUFS); + return (0); +} + +/* Delete PCB policy. */ +int +ipsec_delete_pcbpolicy(struct inpcb *inp) +{ + + if (inp->inp_sp == NULL) + return (0); + + if (inp->inp_sp->flags & INP_INBOUND_POLICY) + key_freesp(&inp->inp_sp->sp_in); + + if (inp->inp_sp->flags & INP_OUTBOUND_POLICY) + key_freesp(&inp->inp_sp->sp_out); + + free(inp->inp_sp, M_IPSEC_INPCB); + inp->inp_sp = NULL; + return (0); +} + +/* Deep-copy a policy in PCB. */ +static struct secpolicy * +ipsec_deepcopy_pcbpolicy(struct secpolicy *src) +{ + struct secpolicy *dst; + int i; + + if (src == NULL) + return (NULL); + + IPSEC_ASSERT(src->state == IPSEC_SPSTATE_PCB, ("SP isn't PCB")); + + dst = key_newsp(); + if (dst == NULL) + return (NULL); + + /* spidx is not copied here */ + dst->policy = src->policy; + dst->state = src->state; + dst->priority = src->priority; + /* Do not touch the refcnt field. */ + + /* Copy IPsec request chain. */ + for (i = 0; i < src->tcount; i++) { + dst->req[i] = ipsec_newisr(); + if (dst->req[i] == NULL) { + key_freesp(&dst); + return (NULL); + } + bcopy(src->req[i], dst->req[i], sizeof(struct ipsecrequest)); + dst->tcount++; + } + KEYDBG(IPSEC_DUMP, + printf("%s: copied SP(%p) -> SP(%p)\n", __func__, src, dst); + kdebug_secpolicy(dst)); + return (dst); +} + +/* + * Copy IPsec policy from old INPCB into new. + * It is expected that new INPCB has not configured policies. + */ +int +ipsec_copy_pcbpolicy(struct inpcb *old, struct inpcb *new) +{ + struct secpolicy *sp; + + /* + * old->inp_sp can be NULL if PCB was created when an IPsec + * support was unavailable. This is not an error, we don't have + * policies in this PCB, so nothing to copy. + */ + if (old->inp_sp == NULL) + return (0); + + IPSEC_ASSERT(new->inp_sp != NULL, ("new inp_sp is NULL")); + IPSEC_ASSERT((new->inp_sp->flags & ( + INP_INBOUND_POLICY | INP_OUTBOUND_POLICY)) == 0, + ("new PCB already has configured policies")); + INP_WLOCK_ASSERT(new); + INP_LOCK_ASSERT(old); + + if (old->inp_sp->flags & INP_INBOUND_POLICY) { + sp = ipsec_deepcopy_pcbpolicy(old->inp_sp->sp_in); + if (sp == NULL) + return (ENOBUFS); + ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_INBOUND); + new->inp_sp->sp_in = sp; + new->inp_sp->flags |= INP_INBOUND_POLICY; + } + if (old->inp_sp->flags & INP_OUTBOUND_POLICY) { + sp = ipsec_deepcopy_pcbpolicy(old->inp_sp->sp_out); + if (sp == NULL) + return (ENOBUFS); + ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_OUTBOUND); + new->inp_sp->sp_out = sp; + new->inp_sp->flags |= INP_OUTBOUND_POLICY; + } + return (0); +} + +static int +ipsec_set_pcbpolicy(struct inpcb *inp, struct ucred *cred, + void *request, size_t len) +{ + struct sadb_x_policy *xpl; + struct secpolicy **spp, *newsp; + int error, flags; + + xpl = (struct sadb_x_policy *)request; + /* Select direction. */ + switch (xpl->sadb_x_policy_dir) { + case IPSEC_DIR_INBOUND: + case IPSEC_DIR_OUTBOUND: + break; + default: + ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, + xpl->sadb_x_policy_dir)); + return (EINVAL); + } + /* + * Privileged sockets are allowed to set own security policy + * and configure IPsec bypass. Unprivileged sockets only can + * have ENTRUST policy. + */ + switch (xpl->sadb_x_policy_type) { + case IPSEC_POLICY_IPSEC: + case IPSEC_POLICY_BYPASS: + if (cred != NULL && + priv_check_cred(cred, PRIV_NETINET_IPSEC, 0) != 0) + return (EACCES); + /* Allocate new SP entry. */ + newsp = key_msg2sp(xpl, len, &error); + if (newsp == NULL) + return (error); + newsp->state = IPSEC_SPSTATE_PCB; + newsp->spidx.ul_proto = IPSEC_ULPROTO_ANY; +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + newsp->spidx.src.sin.sin_family = + newsp->spidx.dst.sin.sin_family = AF_INET; + newsp->spidx.src.sin.sin_len = + newsp->spidx.dst.sin.sin_len = + sizeof(struct sockaddr_in); + } +#endif +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + newsp->spidx.src.sin6.sin6_family = + newsp->spidx.dst.sin6.sin6_family = AF_INET6; + newsp->spidx.src.sin6.sin6_len = + newsp->spidx.dst.sin6.sin6_len = + sizeof(struct sockaddr_in6); + } +#endif + break; + case IPSEC_POLICY_ENTRUST: + /* We just use NULL pointer for ENTRUST policy */ + newsp = NULL; + break; + default: + /* Other security policy types aren't allowed for PCB */ + return (EINVAL); + } + + INP_WLOCK(inp); + if (xpl->sadb_x_policy_dir == IPSEC_DIR_INBOUND) { + spp = &inp->inp_sp->sp_in; + flags = INP_INBOUND_POLICY; + } else { + spp = &inp->inp_sp->sp_out; + flags = INP_OUTBOUND_POLICY; + } + /* Clear old SP and set new SP. */ + if (*spp != NULL) + key_freesp(spp); + *spp = newsp; + KEYDBG(IPSEC_DUMP, + printf("%s: new SP(%p)\n", __func__, newsp)); + if (newsp == NULL) + inp->inp_sp->flags &= ~flags; + else { + inp->inp_sp->flags |= flags; + KEYDBG(IPSEC_DUMP, kdebug_secpolicy(newsp)); + } + INP_WUNLOCK(inp); + return (0); +} + +static int +ipsec_get_pcbpolicy(struct inpcb *inp, void *request, size_t *len) +{ + struct sadb_x_policy *xpl; + struct secpolicy *sp; + int error, flags; + + xpl = (struct sadb_x_policy *)request; + + INP_RLOCK(inp); + flags = inp->inp_sp->flags; + /* Select direction. */ + switch (xpl->sadb_x_policy_dir) { + case IPSEC_DIR_INBOUND: + sp = inp->inp_sp->sp_in; + flags &= INP_INBOUND_POLICY; + break; + case IPSEC_DIR_OUTBOUND: + sp = inp->inp_sp->sp_out; + flags &= INP_OUTBOUND_POLICY; + break; + default: + INP_RUNLOCK(inp); + ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, + xpl->sadb_x_policy_dir)); + return (EINVAL); + } + + if (flags == 0) { + /* Return ENTRUST policy */ + INP_RUNLOCK(inp); + xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY; + xpl->sadb_x_policy_type = IPSEC_POLICY_ENTRUST; + xpl->sadb_x_policy_id = 0; + xpl->sadb_x_policy_priority = 0; + xpl->sadb_x_policy_len = PFKEY_UNIT64(sizeof(*xpl)); + *len = sizeof(*xpl); + return (0); + } + + IPSEC_ASSERT(sp != NULL, + ("sp is NULL, but flags is 0x%04x", inp->inp_sp->flags)); + + key_addref(sp); + INP_RUNLOCK(inp); + error = key_sp2msg(sp, request, len); + key_freesp(&sp); + if (error == EINVAL) + return (error); + /* + * We return "success", but user should check *len. + * *len will be set to size of valid data and + * sadb_x_policy_len will contain needed size. + */ + return (0); +} + +/* Handle socket option control request for PCB */ +static int +ipsec_control_pcbpolicy(struct inpcb *inp, struct sockopt *sopt) +{ + void *optdata; + size_t optlen; + int error; + + if (inp->inp_sp == NULL) + return (ENOPROTOOPT); + + /* Limit maximum request size to PAGE_SIZE */ + optlen = sopt->sopt_valsize; + if (optlen < sizeof(struct sadb_x_policy) || optlen > PAGE_SIZE) + return (EINVAL); + + optdata = malloc(optlen, M_TEMP, sopt->sopt_td ? M_WAITOK: M_NOWAIT); + if (optdata == NULL) + return (ENOBUFS); + /* + * We need a hint from the user, what policy is requested - input + * or output? User should specify it in the buffer, even for + * setsockopt(). + */ + error = sooptcopyin(sopt, optdata, optlen, optlen); + if (error == 0) { + if (sopt->sopt_dir == SOPT_SET) + error = ipsec_set_pcbpolicy(inp, + sopt->sopt_td ? sopt->sopt_td->td_ucred: NULL, + optdata, optlen); + else { + error = ipsec_get_pcbpolicy(inp, optdata, &optlen); + if (error == 0) + error = sooptcopyout(sopt, optdata, optlen); + } + } + free(optdata, M_TEMP); + return (error); +} + +#ifdef INET +/* + * IPSEC_PCBCTL() method implementation for IPv4. + */ +int +ipsec4_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + + if (sopt->sopt_name != IP_IPSEC_POLICY) + return (ENOPROTOOPT); + return (ipsec_control_pcbpolicy(inp, sopt)); +} +#endif + +#ifdef INET6 +/* + * IPSEC_PCBCTL() method implementation for IPv6. + */ +int +ipsec6_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + + if (sopt->sopt_name != IPV6_IPSEC_POLICY) + return (ENOPROTOOPT); + return (ipsec_control_pcbpolicy(inp, sopt)); +} +#endif + diff --git a/sys/netipsec/ipsec_support.h b/sys/netipsec/ipsec_support.h new file mode 100644 index 000000000000..b72aee20a7ae --- /dev/null +++ b/sys/netipsec/ipsec_support.h @@ -0,0 +1,190 @@ +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETIPSEC_IPSEC_SUPPORT_H_ +#define _NETIPSEC_IPSEC_SUPPORT_H_ + +#ifdef _KERNEL +#if defined(IPSEC) || defined(IPSEC_SUPPORT) +struct mbuf; +struct inpcb; +struct tcphdr; +struct sockopt; +struct sockaddr; +struct ipsec_support; +struct tcpmd5_support; + +size_t ipsec_hdrsiz_inpcb(struct inpcb *); +int ipsec_init_pcbpolicy(struct inpcb *); +int ipsec_delete_pcbpolicy(struct inpcb *); +int ipsec_copy_pcbpolicy(struct inpcb *, struct inpcb *); + +struct ipsec_methods { + int (*input)(struct mbuf *, int, int); + int (*check_policy)(const struct mbuf *, struct inpcb *); + int (*forward)(struct mbuf *); + int (*output)(struct mbuf *, struct inpcb *); + int (*pcbctl)(struct inpcb *, struct sockopt *); + size_t (*hdrsize)(struct inpcb *); + int (*capability)(struct mbuf *, u_int); + int (*ctlinput)(int, struct sockaddr *, void *); + + int (*udp_input)(struct mbuf *, int, int); + int (*udp_pcbctl)(struct inpcb *, struct sockopt *); +}; +#define IPSEC_CAP_OPERABLE 1 +#define IPSEC_CAP_BYPASS_FILTER 2 + +struct tcpmd5_methods { + int (*input)(struct mbuf *, struct tcphdr *, u_char *); + int (*output)(struct mbuf *, struct tcphdr *, u_char *); + int (*pcbctl)(struct inpcb *, struct sockopt *); +}; + +#define IPSEC_MODULE_ENABLED 0x0001 +#define IPSEC_ENABLED(proto) \ + ((proto ## _ipsec_support)->enabled & IPSEC_MODULE_ENABLED) +#define TCPMD5_ENABLED() IPSEC_ENABLED(tcp) + +#ifdef TCP_SIGNATURE +/* TCP-MD5 build in the kernel */ +struct tcpmd5_support { + const u_int enabled; + const struct tcpmd5_methods * const methods; +}; +extern const struct tcpmd5_support * const tcp_ipsec_support; + +#define TCPMD5_INPUT(m, ...) \ + (*tcp_ipsec_support->methods->input)(m, __VA_ARGS__) +#define TCPMD5_OUTPUT(m, ...) \ + (*tcp_ipsec_support->methods->output)(m, __VA_ARGS__) +#define TCPMD5_PCBCTL(inp, sopt) \ + (*tcp_ipsec_support->methods->pcbctl)(inp, sopt) +#elif defined(IPSEC_SUPPORT) +/* TCP-MD5 build as module */ +struct tcpmd5_support { + volatile u_int enabled; + const struct tcpmd5_methods * volatile methods; +}; +extern struct tcpmd5_support * const tcp_ipsec_support; + +void tcpmd5_support_enable(const struct tcpmd5_methods * const); +void tcpmd5_support_disable(void); + +int tcpmd5_kmod_pcbctl(struct tcpmd5_support * const, struct inpcb *, + struct sockopt *); +int tcpmd5_kmod_input(struct tcpmd5_support * const, struct mbuf *, + struct tcphdr *, u_char *); +int tcpmd5_kmod_output(struct tcpmd5_support * const, struct mbuf *, + struct tcphdr *, u_char *); +#define TCPMD5_INPUT(m, ...) \ + tcpmd5_kmod_input(tcp_ipsec_support, m, __VA_ARGS__) +#define TCPMD5_OUTPUT(m, ...) \ + tcpmd5_kmod_output(tcp_ipsec_support, m, __VA_ARGS__) +#define TCPMD5_PCBCTL(inp, sopt) \ + tcpmd5_kmod_pcbctl(tcp_ipsec_support, inp, sopt) +#endif + +#endif /* IPSEC || IPSEC_SUPPORT */ + +#if defined(IPSEC) +struct ipsec_support { + const u_int enabled; + const struct ipsec_methods * const methods; +}; +extern const struct ipsec_support * const ipv4_ipsec_support; +extern const struct ipsec_support * const ipv6_ipsec_support; + +#define IPSEC_INPUT(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->input)(m, __VA_ARGS__) +#define IPSEC_CHECK_POLICY(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->check_policy)(m, __VA_ARGS__) +#define IPSEC_FORWARD(proto, m) \ + (*(proto ## _ipsec_support)->methods->forward)(m) +#define IPSEC_OUTPUT(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->output)(m, __VA_ARGS__) +#define IPSEC_PCBCTL(proto, inp, sopt) \ + (*(proto ## _ipsec_support)->methods->pcbctl)(inp, sopt) +#define IPSEC_CAPS(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->capability)(m, __VA_ARGS__) +#define IPSEC_HDRSIZE(proto, inp) \ + (*(proto ## _ipsec_support)->methods->hdrsize)(inp) + +#define UDPENCAP_INPUT(m, ...) \ + (*ipv4_ipsec_support->methods->udp_input)(m, __VA_ARGS__) +#define UDPENCAP_PCBCTL(inp, sopt) \ + (*ipv4_ipsec_support->methods->udp_pcbctl)(inp, sopt) + +#elif defined(IPSEC_SUPPORT) +struct ipsec_support { + volatile u_int enabled; + const struct ipsec_methods * volatile methods; +}; +extern struct ipsec_support * const ipv4_ipsec_support; +extern struct ipsec_support * const ipv6_ipsec_support; + +void ipsec_support_enable(struct ipsec_support * const, + const struct ipsec_methods * const); +void ipsec_support_disable(struct ipsec_support * const); + +int ipsec_kmod_input(struct ipsec_support * const, struct mbuf *, int, int); +int ipsec_kmod_check_policy(struct ipsec_support * const, struct mbuf *, + struct inpcb *); +int ipsec_kmod_forward(struct ipsec_support * const, struct mbuf *); +int ipsec_kmod_output(struct ipsec_support * const, struct mbuf *, + struct inpcb *); +int ipsec_kmod_pcbctl(struct ipsec_support * const, struct inpcb *, + struct sockopt *); +int ipsec_kmod_capability(struct ipsec_support * const, struct mbuf *, u_int); +size_t ipsec_kmod_hdrsize(struct ipsec_support * const, struct inpcb *); +int ipsec_kmod_udp_input(struct ipsec_support * const, struct mbuf *, int, int); +int ipsec_kmod_udp_pcbctl(struct ipsec_support * const, struct inpcb *, + struct sockopt *); + +#define UDPENCAP_INPUT(m, ...) \ + ipsec_kmod_udp_input(ipv4_ipsec_support, m, __VA_ARGS__) +#define UDPENCAP_PCBCTL(inp, sopt) \ + ipsec_kmod_udp_pcbctl(ipv4_ipsec_support, inp, sopt) + +#define IPSEC_INPUT(proto, ...) \ + ipsec_kmod_input(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_CHECK_POLICY(proto, ...) \ + ipsec_kmod_check_policy(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_FORWARD(proto, ...) \ + ipsec_kmod_forward(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_OUTPUT(proto, ...) \ + ipsec_kmod_output(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_PCBCTL(proto, ...) \ + ipsec_kmod_pcbctl(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_CAPS(proto, ...) \ + ipsec_kmod_capability(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_HDRSIZE(proto, ...) \ + ipsec_kmod_hdrsize(proto ## _ipsec_support, __VA_ARGS__) +#endif /* IPSEC_SUPPORT */ +#endif /* _KERNEL */ +#endif /* _NETIPSEC_IPSEC_SUPPORT_H_ */ diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c index 2a5e0a60e626..636fbf2e8f7c 100644 --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -1,8033 +1,8374 @@ /* $FreeBSD$ */ /* $KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This code is referd to RFC 2367 */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include + #include #include #include #include #include #include #include #include +#include #ifdef INET6 #include #include #include #endif /* INET6 */ -#if defined(INET) || defined(INET6) -#include -#endif -#ifdef INET6 -#include -#endif /* INET6 */ - #include #include #include #include #include #include #ifdef INET6 #include #endif #include - +#include #include /* randomness */ #include #define FULLMASK 0xff #define _BITS(bytes) ((bytes) << 3) /* * Note on SA reference counting: * - SAs that are not in DEAD state will have (total external reference + 1) * following value in reference count field. they cannot be freed and are * referenced from SA header. * - SAs that are in DEAD state will have (total external reference) * in reference count field. they are ready to be freed. reference from * SA header will be removed in key_delsav(), when the reference count * field hits 0 (= no external reference other than from SA header. */ VNET_DEFINE(u_int32_t, key_debug_level) = 0; static VNET_DEFINE(u_int, key_spi_trycnt) = 1000; static VNET_DEFINE(u_int32_t, key_spi_minval) = 0x100; static VNET_DEFINE(u_int32_t, key_spi_maxval) = 0x0fffffff; /* XXX */ static VNET_DEFINE(u_int32_t, policy_id) = 0; /*interval to initialize randseed,1(m)*/ static VNET_DEFINE(u_int, key_int_random) = 60; /* interval to expire acquiring, 30(s)*/ static VNET_DEFINE(u_int, key_larval_lifetime) = 30; /* counter for blocking SADB_ACQUIRE.*/ static VNET_DEFINE(int, key_blockacq_count) = 10; /* lifetime for blocking SADB_ACQUIRE.*/ static VNET_DEFINE(int, key_blockacq_lifetime) = 20; /* preferred old sa rather than new sa.*/ static VNET_DEFINE(int, key_preferred_oldsa) = 1; #define V_key_spi_trycnt VNET(key_spi_trycnt) #define V_key_spi_minval VNET(key_spi_minval) #define V_key_spi_maxval VNET(key_spi_maxval) #define V_policy_id VNET(policy_id) #define V_key_int_random VNET(key_int_random) #define V_key_larval_lifetime VNET(key_larval_lifetime) #define V_key_blockacq_count VNET(key_blockacq_count) #define V_key_blockacq_lifetime VNET(key_blockacq_lifetime) #define V_key_preferred_oldsa VNET(key_preferred_oldsa) static VNET_DEFINE(u_int32_t, acq_seq) = 0; #define V_acq_seq VNET(acq_seq) - /* SPD */ -static VNET_DEFINE(TAILQ_HEAD(_sptree, secpolicy), sptree[IPSEC_DIR_MAX]); +static VNET_DEFINE(uint32_t, sp_genid) = 0; +#define V_sp_genid VNET(sp_genid) + +/* SPD */ +TAILQ_HEAD(secpolicy_queue, secpolicy); +LIST_HEAD(secpolicy_list, secpolicy); +static VNET_DEFINE(struct secpolicy_queue, sptree[IPSEC_DIR_MAX]); +static VNET_DEFINE(struct secpolicy_queue, sptree_ifnet[IPSEC_DIR_MAX]); static struct rmlock sptree_lock; #define V_sptree VNET(sptree) +#define V_sptree_ifnet VNET(sptree_ifnet) #define SPTREE_LOCK_INIT() rm_init(&sptree_lock, "sptree") #define SPTREE_LOCK_DESTROY() rm_destroy(&sptree_lock) #define SPTREE_RLOCK_TRACKER struct rm_priotracker sptree_tracker #define SPTREE_RLOCK() rm_rlock(&sptree_lock, &sptree_tracker) #define SPTREE_RUNLOCK() rm_runlock(&sptree_lock, &sptree_tracker) #define SPTREE_RLOCK_ASSERT() rm_assert(&sptree_lock, RA_RLOCKED) #define SPTREE_WLOCK() rm_wlock(&sptree_lock) #define SPTREE_WUNLOCK() rm_wunlock(&sptree_lock) #define SPTREE_WLOCK_ASSERT() rm_assert(&sptree_lock, RA_WLOCKED) #define SPTREE_UNLOCK_ASSERT() rm_assert(&sptree_lock, RA_UNLOCKED) -static VNET_DEFINE(LIST_HEAD(_sahtree, secashead), sahtree); /* SAD */ +/* Hash table for lookup SP using unique id */ +static VNET_DEFINE(struct secpolicy_list *, sphashtbl); +static VNET_DEFINE(u_long, sphash_mask); +#define V_sphashtbl VNET(sphashtbl) +#define V_sphash_mask VNET(sphash_mask) + +#define SPHASH_NHASH_LOG2 7 +#define SPHASH_NHASH (1 << SPHASH_NHASH_LOG2) +#define SPHASH_HASHVAL(id) (key_u32hash(id) & V_sphash_mask) +#define SPHASH_HASH(id) &V_sphashtbl[SPHASH_HASHVAL(id)] + +/* SAD */ +TAILQ_HEAD(secashead_queue, secashead); +LIST_HEAD(secashead_list, secashead); +static VNET_DEFINE(struct secashead_queue, sahtree); +static struct rmlock sahtree_lock; #define V_sahtree VNET(sahtree) -static struct mtx sahtree_lock; -#define SAHTREE_LOCK_INIT() \ - mtx_init(&sahtree_lock, "sahtree", \ - "fast ipsec security association database", MTX_DEF) -#define SAHTREE_LOCK_DESTROY() mtx_destroy(&sahtree_lock) -#define SAHTREE_LOCK() mtx_lock(&sahtree_lock) -#define SAHTREE_UNLOCK() mtx_unlock(&sahtree_lock) -#define SAHTREE_LOCK_ASSERT() mtx_assert(&sahtree_lock, MA_OWNED) +#define SAHTREE_LOCK_INIT() rm_init(&sahtree_lock, "sahtree") +#define SAHTREE_LOCK_DESTROY() rm_destroy(&sahtree_lock) +#define SAHTREE_RLOCK_TRACKER struct rm_priotracker sahtree_tracker +#define SAHTREE_RLOCK() rm_rlock(&sahtree_lock, &sahtree_tracker) +#define SAHTREE_RUNLOCK() rm_runlock(&sahtree_lock, &sahtree_tracker) +#define SAHTREE_RLOCK_ASSERT() rm_assert(&sahtree_lock, RA_RLOCKED) +#define SAHTREE_WLOCK() rm_wlock(&sahtree_lock) +#define SAHTREE_WUNLOCK() rm_wunlock(&sahtree_lock) +#define SAHTREE_WLOCK_ASSERT() rm_assert(&sahtree_lock, RA_WLOCKED) +#define SAHTREE_UNLOCK_ASSERT() rm_assert(&sahtree_lock, RA_UNLOCKED) + +/* Hash table for lookup in SAD using SA addresses */ +static VNET_DEFINE(struct secashead_list *, sahaddrhashtbl); +static VNET_DEFINE(u_long, sahaddrhash_mask); +#define V_sahaddrhashtbl VNET(sahaddrhashtbl) +#define V_sahaddrhash_mask VNET(sahaddrhash_mask) + +#define SAHHASH_NHASH_LOG2 7 +#define SAHHASH_NHASH (1 << SAHHASH_NHASH_LOG2) +#define SAHADDRHASH_HASHVAL(saidx) \ + (key_saidxhash(saidx) & V_sahaddrhash_mask) +#define SAHADDRHASH_HASH(saidx) \ + &V_sahaddrhashtbl[SAHADDRHASH_HASHVAL(saidx)] + +/* Hash table for lookup in SAD using SPI */ +LIST_HEAD(secasvar_list, secasvar); +static VNET_DEFINE(struct secasvar_list *, savhashtbl); +static VNET_DEFINE(u_long, savhash_mask); +#define V_savhashtbl VNET(savhashtbl) +#define V_savhash_mask VNET(savhash_mask) +#define SAVHASH_NHASH_LOG2 7 +#define SAVHASH_NHASH (1 << SAVHASH_NHASH_LOG2) +#define SAVHASH_HASHVAL(spi) (key_u32hash(spi) & V_savhash_mask) +#define SAVHASH_HASH(spi) &V_savhashtbl[SAVHASH_HASHVAL(spi)] + +static uint32_t +key_saidxhash(const struct secasindex *saidx) +{ + uint32_t hval; + + hval = fnv_32_buf(&saidx->proto, sizeof(saidx->proto), + FNV1_32_INIT); + switch (saidx->dst.sa.sa_family) { +#ifdef INET + case AF_INET: + hval = fnv_32_buf(&saidx->src.sin.sin_addr, + sizeof(in_addr_t), hval); + hval = fnv_32_buf(&saidx->dst.sin.sin_addr, + sizeof(in_addr_t), hval); + break; +#endif +#ifdef INET6 + case AF_INET6: + hval = fnv_32_buf(&saidx->src.sin6.sin6_addr, + sizeof(struct in6_addr), hval); + hval = fnv_32_buf(&saidx->dst.sin6.sin6_addr, + sizeof(struct in6_addr), hval); + break; +#endif + default: + hval = 0; + ipseclog((LOG_DEBUG, "%s: unknown address family %d", + __func__, saidx->dst.sa.sa_family)); + } + return (hval); +} + +static uint32_t +key_u32hash(uint32_t val) +{ + + return (fnv_32_buf(&val, sizeof(val), FNV1_32_INIT)); +} /* registed list */ static VNET_DEFINE(LIST_HEAD(_regtree, secreg), regtree[SADB_SATYPE_MAX + 1]); #define V_regtree VNET(regtree) static struct mtx regtree_lock; #define REGTREE_LOCK_INIT() \ mtx_init(®tree_lock, "regtree", "fast ipsec regtree", MTX_DEF) #define REGTREE_LOCK_DESTROY() mtx_destroy(®tree_lock) #define REGTREE_LOCK() mtx_lock(®tree_lock) #define REGTREE_UNLOCK() mtx_unlock(®tree_lock) #define REGTREE_LOCK_ASSERT() mtx_assert(®tree_lock, MA_OWNED) -static VNET_DEFINE(LIST_HEAD(_acqtree, secacq), acqtree); /* acquiring list */ +/* Acquiring list */ +LIST_HEAD(secacq_list, secacq); +static VNET_DEFINE(struct secacq_list, acqtree); #define V_acqtree VNET(acqtree) static struct mtx acq_lock; #define ACQ_LOCK_INIT() \ - mtx_init(&acq_lock, "acqtree", "fast ipsec acquire list", MTX_DEF) + mtx_init(&acq_lock, "acqtree", "ipsec SA acquiring list", MTX_DEF) #define ACQ_LOCK_DESTROY() mtx_destroy(&acq_lock) #define ACQ_LOCK() mtx_lock(&acq_lock) #define ACQ_UNLOCK() mtx_unlock(&acq_lock) #define ACQ_LOCK_ASSERT() mtx_assert(&acq_lock, MA_OWNED) +/* Hash table for lookup in ACQ list using SA addresses */ +static VNET_DEFINE(struct secacq_list *, acqaddrhashtbl); +static VNET_DEFINE(u_long, acqaddrhash_mask); +#define V_acqaddrhashtbl VNET(acqaddrhashtbl) +#define V_acqaddrhash_mask VNET(acqaddrhash_mask) + +/* Hash table for lookup in ACQ list using SEQ number */ +static VNET_DEFINE(struct secacq_list *, acqseqhashtbl); +static VNET_DEFINE(u_long, acqseqhash_mask); +#define V_acqseqhashtbl VNET(acqseqhashtbl) +#define V_acqseqhash_mask VNET(acqseqhash_mask) + +#define ACQHASH_NHASH_LOG2 7 +#define ACQHASH_NHASH (1 << ACQHASH_NHASH_LOG2) +#define ACQADDRHASH_HASHVAL(saidx) \ + (key_saidxhash(saidx) & V_acqaddrhash_mask) +#define ACQSEQHASH_HASHVAL(seq) \ + (key_u32hash(seq) & V_acqseqhash_mask) +#define ACQADDRHASH_HASH(saidx) \ + &V_acqaddrhashtbl[ACQADDRHASH_HASHVAL(saidx)] +#define ACQSEQHASH_HASH(seq) \ + &V_acqseqhashtbl[ACQSEQHASH_HASHVAL(seq)] /* SP acquiring list */ static VNET_DEFINE(LIST_HEAD(_spacqtree, secspacq), spacqtree); #define V_spacqtree VNET(spacqtree) static struct mtx spacq_lock; #define SPACQ_LOCK_INIT() \ mtx_init(&spacq_lock, "spacqtree", \ "fast ipsec security policy acquire list", MTX_DEF) #define SPACQ_LOCK_DESTROY() mtx_destroy(&spacq_lock) #define SPACQ_LOCK() mtx_lock(&spacq_lock) #define SPACQ_UNLOCK() mtx_unlock(&spacq_lock) #define SPACQ_LOCK_ASSERT() mtx_assert(&spacq_lock, MA_OWNED) -/* search order for SAs */ -static const u_int saorder_state_valid_prefer_old[] = { - SADB_SASTATE_DYING, SADB_SASTATE_MATURE, -}; -static const u_int saorder_state_valid_prefer_new[] = { - SADB_SASTATE_MATURE, SADB_SASTATE_DYING, -}; -static const u_int saorder_state_alive[] = { - /* except DEAD */ - SADB_SASTATE_MATURE, SADB_SASTATE_DYING, SADB_SASTATE_LARVAL -}; -static const u_int saorder_state_any[] = { - SADB_SASTATE_MATURE, SADB_SASTATE_DYING, - SADB_SASTATE_LARVAL, SADB_SASTATE_DEAD -}; - static const int minsize[] = { sizeof(struct sadb_msg), /* SADB_EXT_RESERVED */ sizeof(struct sadb_sa), /* SADB_EXT_SA */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_CURRENT */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_HARD */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_SOFT */ sizeof(struct sadb_address), /* SADB_EXT_ADDRESS_SRC */ sizeof(struct sadb_address), /* SADB_EXT_ADDRESS_DST */ sizeof(struct sadb_address), /* SADB_EXT_ADDRESS_PROXY */ sizeof(struct sadb_key), /* SADB_EXT_KEY_AUTH */ sizeof(struct sadb_key), /* SADB_EXT_KEY_ENCRYPT */ sizeof(struct sadb_ident), /* SADB_EXT_IDENTITY_SRC */ sizeof(struct sadb_ident), /* SADB_EXT_IDENTITY_DST */ sizeof(struct sadb_sens), /* SADB_EXT_SENSITIVITY */ sizeof(struct sadb_prop), /* SADB_EXT_PROPOSAL */ sizeof(struct sadb_supported), /* SADB_EXT_SUPPORTED_AUTH */ sizeof(struct sadb_supported), /* SADB_EXT_SUPPORTED_ENCRYPT */ sizeof(struct sadb_spirange), /* SADB_EXT_SPIRANGE */ 0, /* SADB_X_EXT_KMPRIVATE */ sizeof(struct sadb_x_policy), /* SADB_X_EXT_POLICY */ sizeof(struct sadb_x_sa2), /* SADB_X_SA2 */ sizeof(struct sadb_x_nat_t_type),/* SADB_X_EXT_NAT_T_TYPE */ sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_SPORT */ sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_DPORT */ sizeof(struct sadb_address), /* SADB_X_EXT_NAT_T_OAI */ sizeof(struct sadb_address), /* SADB_X_EXT_NAT_T_OAR */ sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */ sizeof(struct sadb_x_sa_replay), /* SADB_X_EXT_SA_REPLAY */ + sizeof(struct sadb_address), /* SADB_X_EXT_NEW_ADDRESS_SRC */ + sizeof(struct sadb_address), /* SADB_X_EXT_NEW_ADDRESS_DST */ }; _Static_assert(sizeof(minsize)/sizeof(int) == SADB_EXT_MAX + 1, "minsize size mismatch"); static const int maxsize[] = { sizeof(struct sadb_msg), /* SADB_EXT_RESERVED */ sizeof(struct sadb_sa), /* SADB_EXT_SA */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_CURRENT */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_HARD */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_SOFT */ 0, /* SADB_EXT_ADDRESS_SRC */ 0, /* SADB_EXT_ADDRESS_DST */ 0, /* SADB_EXT_ADDRESS_PROXY */ 0, /* SADB_EXT_KEY_AUTH */ 0, /* SADB_EXT_KEY_ENCRYPT */ 0, /* SADB_EXT_IDENTITY_SRC */ 0, /* SADB_EXT_IDENTITY_DST */ 0, /* SADB_EXT_SENSITIVITY */ 0, /* SADB_EXT_PROPOSAL */ 0, /* SADB_EXT_SUPPORTED_AUTH */ 0, /* SADB_EXT_SUPPORTED_ENCRYPT */ sizeof(struct sadb_spirange), /* SADB_EXT_SPIRANGE */ 0, /* SADB_X_EXT_KMPRIVATE */ 0, /* SADB_X_EXT_POLICY */ sizeof(struct sadb_x_sa2), /* SADB_X_SA2 */ sizeof(struct sadb_x_nat_t_type),/* SADB_X_EXT_NAT_T_TYPE */ sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_SPORT */ sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_DPORT */ 0, /* SADB_X_EXT_NAT_T_OAI */ 0, /* SADB_X_EXT_NAT_T_OAR */ sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */ sizeof(struct sadb_x_sa_replay), /* SADB_X_EXT_SA_REPLAY */ + 0, /* SADB_X_EXT_NEW_ADDRESS_SRC */ + 0, /* SADB_X_EXT_NEW_ADDRESS_DST */ }; _Static_assert(sizeof(maxsize)/sizeof(int) == SADB_EXT_MAX + 1, "minsize size mismatch"); +/* + * Internal values for SA flags: + * SADB_X_EXT_F_CLONED means that SA was cloned by key_updateaddresses, + * thus we will not free the most of SA content in key_delsav(). + */ +#define SADB_X_EXT_F_CLONED 0x80000000 + +#define SADB_CHECKLEN(_mhp, _ext) \ + ((_mhp)->extlen[(_ext)] < minsize[(_ext)] || (maxsize[(_ext)] != 0 && \ + ((_mhp)->extlen[(_ext)] > maxsize[(_ext)]))) +#define SADB_CHECKHDR(_mhp, _ext) ((_mhp)->ext[(_ext)] == NULL) + static VNET_DEFINE(int, ipsec_esp_keymin) = 256; static VNET_DEFINE(int, ipsec_esp_auth) = 0; static VNET_DEFINE(int, ipsec_ah_keymin) = 128; #define V_ipsec_esp_keymin VNET(ipsec_esp_keymin) #define V_ipsec_esp_auth VNET(ipsec_esp_auth) #define V_ipsec_ah_keymin VNET(ipsec_ah_keymin) -#ifdef SYSCTL_DECL -SYSCTL_DECL(_net_key); +#ifdef IPSEC_DEBUG +VNET_DEFINE(int, ipsec_debug) = 1; +#else +VNET_DEFINE(int, ipsec_debug) = 0; +#endif + +#ifdef INET +SYSCTL_DECL(_net_inet_ipsec); +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEBUG, debug, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0, + "Enable IPsec debugging output when set."); +#endif +#ifdef INET6 +SYSCTL_DECL(_net_inet6_ipsec6); +SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG, debug, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0, + "Enable IPsec debugging output when set."); #endif +SYSCTL_DECL(_net_key); SYSCTL_INT(_net_key, KEYCTL_DEBUG_LEVEL, debug, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_debug_level), 0, ""); /* max count of trial for the decision of spi value */ SYSCTL_INT(_net_key, KEYCTL_SPI_TRY, spi_trycnt, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_trycnt), 0, ""); /* minimum spi value to allocate automatically. */ SYSCTL_INT(_net_key, KEYCTL_SPI_MIN_VALUE, spi_minval, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_minval), 0, ""); /* maximun spi value to allocate automatically. */ SYSCTL_INT(_net_key, KEYCTL_SPI_MAX_VALUE, spi_maxval, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_maxval), 0, ""); /* interval to initialize randseed */ SYSCTL_INT(_net_key, KEYCTL_RANDOM_INT, int_random, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_int_random), 0, ""); /* lifetime for larval SA */ SYSCTL_INT(_net_key, KEYCTL_LARVAL_LIFETIME, larval_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_larval_lifetime), 0, ""); /* counter for blocking to send SADB_ACQUIRE to IKEd */ SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_COUNT, blockacq_count, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_blockacq_count), 0, ""); /* lifetime for blocking to send SADB_ACQUIRE to IKEd */ SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_LIFETIME, blockacq_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_blockacq_lifetime), 0, ""); /* ESP auth */ SYSCTL_INT(_net_key, KEYCTL_ESP_AUTH, esp_auth, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_esp_auth), 0, ""); /* minimum ESP key length */ SYSCTL_INT(_net_key, KEYCTL_ESP_KEYMIN, esp_keymin, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_esp_keymin), 0, ""); /* minimum AH key length */ SYSCTL_INT(_net_key, KEYCTL_AH_KEYMIN, ah_keymin, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_ah_keymin), 0, ""); /* perfered old SA rather than new SA */ SYSCTL_INT(_net_key, KEYCTL_PREFERED_OLDSA, preferred_oldsa, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_preferred_oldsa), 0, ""); #define __LIST_CHAINED(elm) \ (!((elm)->chain.le_next == NULL && (elm)->chain.le_prev == NULL)) -#define LIST_INSERT_TAIL(head, elm, type, field) \ -do {\ - struct type *curelm = LIST_FIRST(head); \ - if (curelm == NULL) {\ - LIST_INSERT_HEAD(head, elm, field); \ - } else { \ - while (LIST_NEXT(curelm, field)) \ - curelm = LIST_NEXT(curelm, field);\ - LIST_INSERT_AFTER(curelm, elm, field);\ - }\ -} while (0) - -#define KEY_CHKSASTATE(head, sav, name) \ -do { \ - if ((head) != (sav)) { \ - ipseclog((LOG_DEBUG, "%s: state mismatched (TREE=%d SA=%d)\n", \ - (name), (head), (sav))); \ - break; \ - } \ -} while (0) - -#define KEY_CHKSPDIR(head, sp, name) \ -do { \ - if ((head) != (sp)) { \ - ipseclog((LOG_DEBUG, "%s: direction mismatched (TREE=%d SP=%d), " \ - "anyway continue.\n", \ - (name), (head), (sp))); \ - } \ -} while (0) MALLOC_DEFINE(M_IPSEC_SA, "secasvar", "ipsec security association"); MALLOC_DEFINE(M_IPSEC_SAH, "sahead", "ipsec sa head"); MALLOC_DEFINE(M_IPSEC_SP, "ipsecpolicy", "ipsec security policy"); MALLOC_DEFINE(M_IPSEC_SR, "ipsecrequest", "ipsec security request"); MALLOC_DEFINE(M_IPSEC_MISC, "ipsec-misc", "ipsec miscellaneous"); MALLOC_DEFINE(M_IPSEC_SAQ, "ipsec-saq", "ipsec sa acquire"); MALLOC_DEFINE(M_IPSEC_SAR, "ipsec-reg", "ipsec sa acquire"); +static VNET_DEFINE(uma_zone_t, key_lft_zone); +#define V_key_lft_zone VNET(key_lft_zone) + +static LIST_HEAD(xforms_list, xformsw) xforms = LIST_HEAD_INITIALIZER(); +static struct mtx xforms_lock; +#define XFORMS_LOCK_INIT() \ + mtx_init(&xforms_lock, "xforms_list", "IPsec transforms list", MTX_DEF) +#define XFORMS_LOCK_DESTROY() mtx_destroy(&xforms_lock) +#define XFORMS_LOCK() mtx_lock(&xforms_lock) +#define XFORMS_UNLOCK() mtx_unlock(&xforms_lock) + /* * set parameters into secpolicyindex buffer. * Must allocate secpolicyindex buffer passed to this function. */ #define KEY_SETSECSPIDX(_dir, s, d, ps, pd, ulp, idx) \ do { \ bzero((idx), sizeof(struct secpolicyindex)); \ (idx)->dir = (_dir); \ (idx)->prefs = (ps); \ (idx)->prefd = (pd); \ (idx)->ul_proto = (ulp); \ bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len); \ bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len); \ } while (0) /* * set parameters into secasindex buffer. * Must allocate secasindex buffer before calling this function. */ #define KEY_SETSECASIDX(p, m, r, s, d, idx) \ do { \ bzero((idx), sizeof(struct secasindex)); \ (idx)->proto = (p); \ (idx)->mode = (m); \ (idx)->reqid = (r); \ bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len); \ bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len); \ + key_porttosaddr(&(idx)->src.sa, 0); \ + key_porttosaddr(&(idx)->dst.sa, 0); \ } while (0) /* key statistics */ struct _keystat { u_long getspi_count; /* the avarage of count to try to get new SPI */ } keystat; struct sadb_msghdr { struct sadb_msg *msg; struct sadb_ext *ext[SADB_EXT_MAX + 1]; int extoff[SADB_EXT_MAX + 1]; int extlen[SADB_EXT_MAX + 1]; }; +static struct supported_ealgs { + int sadb_alg; + const struct enc_xform *xform; +} supported_ealgs[] = { + { SADB_EALG_DESCBC, &enc_xform_des }, + { SADB_EALG_3DESCBC, &enc_xform_3des }, + { SADB_X_EALG_AES, &enc_xform_rijndael128 }, + { SADB_X_EALG_BLOWFISHCBC, &enc_xform_blf }, + { SADB_X_EALG_CAST128CBC, &enc_xform_cast5 }, + { SADB_EALG_NULL, &enc_xform_null }, + { SADB_X_EALG_CAMELLIACBC, &enc_xform_camellia }, + { SADB_X_EALG_AESCTR, &enc_xform_aes_icm }, + { SADB_X_EALG_AESGCM16, &enc_xform_aes_nist_gcm }, + { SADB_X_EALG_AESGMAC, &enc_xform_aes_nist_gmac }, +}; + +static struct supported_aalgs { + int sadb_alg; + const struct auth_hash *xform; +} supported_aalgs[] = { + { SADB_X_AALG_NULL, &auth_hash_null }, + { SADB_AALG_MD5HMAC, &auth_hash_hmac_md5 }, + { SADB_AALG_SHA1HMAC, &auth_hash_hmac_sha1 }, + { SADB_X_AALG_RIPEMD160HMAC, &auth_hash_hmac_ripemd_160 }, + { SADB_X_AALG_MD5, &auth_hash_key_md5 }, + { SADB_X_AALG_SHA, &auth_hash_key_sha1 }, + { SADB_X_AALG_SHA2_256, &auth_hash_hmac_sha2_256 }, + { SADB_X_AALG_SHA2_384, &auth_hash_hmac_sha2_384 }, + { SADB_X_AALG_SHA2_512, &auth_hash_hmac_sha2_512 }, + { SADB_X_AALG_AES128GMAC, &auth_hash_nist_gmac_aes_128 }, + { SADB_X_AALG_AES192GMAC, &auth_hash_nist_gmac_aes_192 }, + { SADB_X_AALG_AES256GMAC, &auth_hash_nist_gmac_aes_256 }, +}; + +static struct supported_calgs { + int sadb_alg; + const struct comp_algo *xform; +} supported_calgs[] = { + { SADB_X_CALG_DEFLATE, &comp_algo_deflate }, +}; + #ifndef IPSEC_DEBUG2 static struct callout key_timer; #endif -static struct secasvar *key_allocsa_policy(const struct secasindex *); -static void key_freesp_so(struct secpolicy **); -static struct secasvar *key_do_allocsa_policy(struct secashead *, u_int); static void key_unlink(struct secpolicy *); static struct secpolicy *key_getsp(struct secpolicyindex *); static struct secpolicy *key_getspbyid(u_int32_t); -static u_int32_t key_newreqid(void); static struct mbuf *key_gather_mbuf(struct mbuf *, const struct sadb_msghdr *, int, int, ...); static int key_spdadd(struct socket *, struct mbuf *, const struct sadb_msghdr *); -static u_int32_t key_getnewspid(void); +static uint32_t key_getnewspid(void); static int key_spddelete(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_spddelete2(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_spdget(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_spdflush(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_spddump(struct socket *, struct mbuf *, const struct sadb_msghdr *); static struct mbuf *key_setdumpsp(struct secpolicy *, u_int8_t, u_int32_t, u_int32_t); -static u_int key_getspreqmsglen(struct secpolicy *); +static struct mbuf *key_sp2mbuf(struct secpolicy *); +static size_t key_getspreqmsglen(struct secpolicy *); static int key_spdexpire(struct secpolicy *); static struct secashead *key_newsah(struct secasindex *); +static void key_freesah(struct secashead **); static void key_delsah(struct secashead *); -static struct secasvar *key_newsav(struct mbuf *, - const struct sadb_msghdr *, struct secashead *, int *, - const char*, int); -#define KEY_NEWSAV(m, sadb, sah, e) \ - key_newsav(m, sadb, sah, e, __FILE__, __LINE__) +static struct secasvar *key_newsav(const struct sadb_msghdr *, + struct secasindex *, uint32_t, int *); static void key_delsav(struct secasvar *); +static void key_unlinksav(struct secasvar *); static struct secashead *key_getsah(struct secasindex *); -static struct secasvar *key_checkspidup(struct secasindex *, u_int32_t); -static struct secasvar *key_getsavbyspi(struct secashead *, u_int32_t); -static int key_setsaval(struct secasvar *, struct mbuf *, - const struct sadb_msghdr *); -static int key_mature(struct secasvar *); +static int key_checkspidup(uint32_t); +static struct secasvar *key_getsavbyspi(uint32_t); +static int key_setnatt(struct secasvar *, const struct sadb_msghdr *); +static int key_setsaval(struct secasvar *, const struct sadb_msghdr *); +static int key_updatelifetimes(struct secasvar *, const struct sadb_msghdr *); +static int key_updateaddresses(struct socket *, struct mbuf *, + const struct sadb_msghdr *, struct secasvar *, struct secasindex *); + static struct mbuf *key_setdumpsa(struct secasvar *, u_int8_t, u_int8_t, u_int32_t, u_int32_t); static struct mbuf *key_setsadbmsg(u_int8_t, u_int16_t, u_int8_t, u_int32_t, pid_t, u_int16_t); static struct mbuf *key_setsadbsa(struct secasvar *); static struct mbuf *key_setsadbaddr(u_int16_t, const struct sockaddr *, u_int8_t, u_int16_t); -#ifdef IPSEC_NAT_T static struct mbuf *key_setsadbxport(u_int16_t, u_int16_t); static struct mbuf *key_setsadbxtype(u_int16_t); -#endif -static void key_porttosaddr(struct sockaddr *, u_int16_t); -#define KEY_PORTTOSADDR(saddr, port) \ - key_porttosaddr((struct sockaddr *)(saddr), (port)) static struct mbuf *key_setsadbxsa2(u_int8_t, u_int32_t, u_int32_t); static struct mbuf *key_setsadbxsareplay(u_int32_t); static struct mbuf *key_setsadbxpolicy(u_int16_t, u_int8_t, u_int32_t, u_int32_t); -static struct seckey *key_dup_keymsg(const struct sadb_key *, u_int, - struct malloc_type *); +static struct seckey *key_dup_keymsg(const struct sadb_key *, size_t, + struct malloc_type *); static struct seclifetime *key_dup_lifemsg(const struct sadb_lifetime *src, - struct malloc_type *type); -#ifdef INET6 -static int key_ismyaddr6(struct sockaddr_in6 *); -#endif + struct malloc_type *); /* flags for key_cmpsaidx() */ #define CMP_HEAD 1 /* protocol, addresses. */ #define CMP_MODE_REQID 2 /* additionally HEAD, reqid, mode. */ #define CMP_REQID 3 /* additionally HEAD, reaid. */ #define CMP_EXACTLY 4 /* all elements. */ static int key_cmpsaidx(const struct secasindex *, const struct secasindex *, int); static int key_cmpspidx_exactly(struct secpolicyindex *, struct secpolicyindex *); static int key_cmpspidx_withmask(struct secpolicyindex *, struct secpolicyindex *); -static int key_sockaddrcmp(const struct sockaddr *, - const struct sockaddr *, int); static int key_bbcmp(const void *, const void *, u_int); -static u_int16_t key_satype2proto(u_int8_t); -static u_int8_t key_proto2satype(u_int16_t); +static uint8_t key_satype2proto(uint8_t); +static uint8_t key_proto2satype(uint8_t); static int key_getspi(struct socket *, struct mbuf *, const struct sadb_msghdr *); -static u_int32_t key_do_getnewspi(struct sadb_spirange *, - struct secasindex *); +static uint32_t key_do_getnewspi(struct sadb_spirange *, struct secasindex *); static int key_update(struct socket *, struct mbuf *, const struct sadb_msghdr *); -#ifdef IPSEC_DOSEQCHECK -static struct secasvar *key_getsavbyseq(struct secashead *, u_int32_t); -#endif static int key_add(struct socket *, struct mbuf *, const struct sadb_msghdr *); -static int key_setident(struct secashead *, struct mbuf *, - const struct sadb_msghdr *); +static int key_setident(struct secashead *, const struct sadb_msghdr *); static struct mbuf *key_getmsgbuf_x1(struct mbuf *, const struct sadb_msghdr *); static int key_delete(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_delete_all(struct socket *, struct mbuf *, - const struct sadb_msghdr *, u_int16_t); + const struct sadb_msghdr *, struct secasindex *); +static void key_delete_xform(const struct xformsw *); static int key_get(struct socket *, struct mbuf *, const struct sadb_msghdr *); static void key_getcomb_setlifetime(struct sadb_comb *); -static struct mbuf *key_getcomb_esp(void); +static struct mbuf *key_getcomb_ealg(void); static struct mbuf *key_getcomb_ah(void); static struct mbuf *key_getcomb_ipcomp(void); static struct mbuf *key_getprop(const struct secasindex *); static int key_acquire(const struct secasindex *, struct secpolicy *); -static struct secacq *key_newacq(const struct secasindex *); -static struct secacq *key_getacq(const struct secasindex *); -static struct secacq *key_getacqbyseq(u_int32_t); +static uint32_t key_newacq(const struct secasindex *, int *); +static uint32_t key_getacq(const struct secasindex *, int *); +static int key_acqdone(const struct secasindex *, uint32_t); +static int key_acqreset(uint32_t); static struct secspacq *key_newspacq(struct secpolicyindex *); static struct secspacq *key_getspacq(struct secpolicyindex *); static int key_acquire2(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_register(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_expire(struct secasvar *, int); static int key_flush(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_dump(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_promisc(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_senderror(struct socket *, struct mbuf *, int); static int key_validate_ext(const struct sadb_ext *, int); static int key_align(struct mbuf *, struct sadb_msghdr *); -static struct mbuf *key_setlifetime(struct seclifetime *src, - u_int16_t exttype); -static struct mbuf *key_setkey(struct seckey *src, u_int16_t exttype); - -#if 0 -static const char *key_getfqdn(void); -static const char *key_getuserfqdn(void); -#endif -static void key_sa_chgstate(struct secasvar *, u_int8_t); - -static __inline void -sa_initref(struct secasvar *sav) -{ +static struct mbuf *key_setlifetime(struct seclifetime *, uint16_t); +static struct mbuf *key_setkey(struct seckey *, uint16_t); +static int xform_init(struct secasvar *, u_short); + +#define DBG_IPSEC_INITREF(t, p) do { \ + refcount_init(&(p)->refcnt, 1); \ + KEYDBG(KEY_STAMP, \ + printf("%s: Initialize refcnt %s(%p) = %u\n", \ + __func__, #t, (p), (p)->refcnt)); \ +} while (0) +#define DBG_IPSEC_ADDREF(t, p) do { \ + refcount_acquire(&(p)->refcnt); \ + KEYDBG(KEY_STAMP, \ + printf("%s: Acquire refcnt %s(%p) -> %u\n", \ + __func__, #t, (p), (p)->refcnt)); \ +} while (0) +#define DBG_IPSEC_DELREF(t, p) do { \ + KEYDBG(KEY_STAMP, \ + printf("%s: Release refcnt %s(%p) -> %u\n", \ + __func__, #t, (p), (p)->refcnt - 1)); \ + refcount_release(&(p)->refcnt); \ +} while (0) - refcount_init(&sav->refcnt, 1); -} -static __inline void -sa_addref(struct secasvar *sav) -{ +#define IPSEC_INITREF(t, p) refcount_init(&(p)->refcnt, 1) +#define IPSEC_ADDREF(t, p) refcount_acquire(&(p)->refcnt) +#define IPSEC_DELREF(t, p) refcount_release(&(p)->refcnt) - refcount_acquire(&sav->refcnt); - IPSEC_ASSERT(sav->refcnt != 0, ("SA refcnt overflow")); -} -static __inline int -sa_delref(struct secasvar *sav) -{ +#define SP_INITREF(p) IPSEC_INITREF(SP, p) +#define SP_ADDREF(p) IPSEC_ADDREF(SP, p) +#define SP_DELREF(p) IPSEC_DELREF(SP, p) - IPSEC_ASSERT(sav->refcnt > 0, ("SA refcnt underflow")); - return (refcount_release(&sav->refcnt)); -} +#define SAH_INITREF(p) IPSEC_INITREF(SAH, p) +#define SAH_ADDREF(p) IPSEC_ADDREF(SAH, p) +#define SAH_DELREF(p) IPSEC_DELREF(SAH, p) -#define SP_ADDREF(p) refcount_acquire(&(p)->refcnt) -#define SP_DELREF(p) refcount_release(&(p)->refcnt) +#define SAV_INITREF(p) IPSEC_INITREF(SAV, p) +#define SAV_ADDREF(p) IPSEC_ADDREF(SAV, p) +#define SAV_DELREF(p) IPSEC_DELREF(SAV, p) /* * Update the refcnt while holding the SPTREE lock. */ void key_addref(struct secpolicy *sp) { SP_ADDREF(sp); } /* * Return 0 when there are known to be no SP's for the specified * direction. Otherwise return 1. This is used by IPsec code * to optimize performance. */ int key_havesp(u_int dir) { return (dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND ? TAILQ_FIRST(&V_sptree[dir]) != NULL : 1); } /* %%% IPsec policy management */ /* - * allocating a SP for OUTBOUND or INBOUND packet. - * Must call key_freesp() later. - * OUT: NULL: not found - * others: found and return the pointer. + * Return current SPDB generation. */ -struct secpolicy * -key_allocsp(struct secpolicyindex *spidx, u_int dir, const char* where, - int tag) +uint32_t +key_getspgen(void) { - SPTREE_RLOCK_TRACKER; - struct secpolicy *sp; - IPSEC_ASSERT(spidx != NULL, ("null spidx")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); + return (V_sp_genid); +} - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); +void +key_bumpspgen(void) +{ - /* get a SP entry */ - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** objects\n"); - kdebug_secpolicyindex(spidx)); + V_sp_genid++; +} - SPTREE_RLOCK(); - TAILQ_FOREACH(sp, &V_sptree[dir], chain) { - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** in SPD\n"); - kdebug_secpolicyindex(&sp->spidx)); - if (key_cmpspidx_withmask(&sp->spidx, spidx)) - goto found; - } - sp = NULL; -found: - if (sp) { - /* sanity check */ - KEY_CHKSPDIR(sp->spidx.dir, dir, __func__); - - /* found a SPD entry */ - sp->lastused = time_second; - SP_ADDREF(sp); - } - SPTREE_RUNLOCK(); +static int +key_checksockaddrs(struct sockaddr *src, struct sockaddr *dst) +{ - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, - sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); - return sp; + /* family match */ + if (src->sa_family != dst->sa_family) + return (EINVAL); + /* sa_len match */ + if (src->sa_len != dst->sa_len) + return (EINVAL); + switch (src->sa_family) { +#ifdef INET + case AF_INET: + if (src->sa_len != sizeof(struct sockaddr_in)) + return (EINVAL); + break; +#endif +#ifdef INET6 + case AF_INET6: + if (src->sa_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + break; +#endif + default: + return (EAFNOSUPPORT); + } + return (0); } /* * allocating a SP for OUTBOUND or INBOUND packet. * Must call key_freesp() later. * OUT: NULL: not found * others: found and return the pointer. */ struct secpolicy * -key_allocsp2(u_int32_t spi, union sockaddr_union *dst, u_int8_t proto, - u_int dir, const char* where, int tag) +key_allocsp(struct secpolicyindex *spidx, u_int dir) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; - IPSEC_ASSERT(dst != NULL, ("null dst")); + IPSEC_ASSERT(spidx != NULL, ("null spidx")); IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, ("invalid direction %u", dir)); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); - - /* get a SP entry */ - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** objects\n"); - printf("spi %u proto %u dir %u\n", spi, proto, dir); - kdebug_sockaddr(&dst->sa)); - SPTREE_RLOCK(); TAILQ_FOREACH(sp, &V_sptree[dir], chain) { - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** in SPD\n"); - kdebug_secpolicyindex(&sp->spidx)); - /* compare simple values, then dst address */ - if (sp->spidx.ul_proto != proto) - continue; - /* NB: spi's must exist and match */ - if (!sp->req || !sp->req->sav || sp->req->sav->spi != spi) - continue; - if (key_sockaddrcmp(&sp->spidx.dst.sa, &dst->sa, 1) == 0) - goto found; + if (key_cmpspidx_withmask(&sp->spidx, spidx)) { + SP_ADDREF(sp); + break; + } } - sp = NULL; -found: - if (sp) { - /* sanity check */ - KEY_CHKSPDIR(sp->spidx.dir, dir, __func__); + SPTREE_RUNLOCK(); - /* found a SPD entry */ + if (sp != NULL) { /* found a SPD entry */ sp->lastused = time_second; - SP_ADDREF(sp); + KEYDBG(IPSEC_STAMP, + printf("%s: return SP(%p)\n", __func__, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); + } else { + KEYDBG(IPSEC_DATA, + printf("%s: lookup failed for ", __func__); + kdebug_secpolicyindex(spidx, NULL)); } - SPTREE_RUNLOCK(); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, - sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); - return sp; + return (sp); } -#if 0 /* - * return a policy that matches this particular inbound packet. - * XXX slow + * Allocating an SA entry for an *INBOUND* or *OUTBOUND* TCP packet, signed + * or should be signed by MD5 signature. + * We don't use key_allocsa() for such lookups, because we don't know SPI. + * Unlike ESP and AH protocols, SPI isn't transmitted in the TCP header with + * signed packet. We use SADB only as storage for password. + * OUT: positive: corresponding SA for given saidx found. + * NULL: SA not found */ -struct secpolicy * -key_gettunnel(const struct sockaddr *osrc, - const struct sockaddr *odst, - const struct sockaddr *isrc, - const struct sockaddr *idst, - const char* where, int tag) +struct secasvar * +key_allocsa_tcpmd5(struct secasindex *saidx) { - struct secpolicy *sp; - const int dir = IPSEC_DIR_INBOUND; - struct ipsecrequest *r1, *r2, *p; - struct secpolicyindex spidx; - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); - - if (isrc->sa_family != idst->sa_family) { - ipseclog((LOG_ERR, "%s: protocol family mismatched %d != %d\n.", - __func__, isrc->sa_family, idst->sa_family)); - sp = NULL; - goto done; - } + SAHTREE_RLOCK_TRACKER; + struct secashead *sah; + struct secasvar *sav; - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[dir], chain) { - if (sp->state == IPSEC_SPSTATE_DEAD) + IPSEC_ASSERT(saidx->proto == IPPROTO_TCP, + ("unexpected security protocol %u", saidx->proto)); + IPSEC_ASSERT(saidx->mode == IPSEC_MODE_TCPMD5, + ("unexpected mode %u", saidx->mode)); + + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + KEYDBG(IPSEC_DUMP, + printf("%s: checking SAH\n", __func__); + kdebug_secash(sah, " ")); + if (sah->saidx.proto != IPPROTO_TCP) continue; - - r1 = r2 = NULL; - for (p = sp->req; p; p = p->next) { - if (p->saidx.mode != IPSEC_MODE_TUNNEL) - continue; - - r1 = r2; - r2 = p; - - if (!r1) { - /* here we look at address matches only */ - spidx = sp->spidx; - if (isrc->sa_len > sizeof(spidx.src) || - idst->sa_len > sizeof(spidx.dst)) - continue; - bcopy(isrc, &spidx.src, isrc->sa_len); - bcopy(idst, &spidx.dst, idst->sa_len); - if (!key_cmpspidx_withmask(&sp->spidx, &spidx)) - continue; - } else { - if (key_sockaddrcmp(&r1->saidx.src.sa, isrc, 0) || - key_sockaddrcmp(&r1->saidx.dst.sa, idst, 0)) - continue; - } - - if (key_sockaddrcmp(&r2->saidx.src.sa, osrc, 0) || - key_sockaddrcmp(&r2->saidx.dst.sa, odst, 0)) - continue; - - goto found; - } + if (!key_sockaddrcmp(&saidx->dst.sa, &sah->saidx.dst.sa, 0)) + break; } - sp = NULL; -found: - if (sp) { - sp->lastused = time_second; - SP_ADDREF(sp); + if (sah != NULL) { + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) + SAV_ADDREF(sav); + } else + sav = NULL; + SAHTREE_RUNLOCK(); + + if (sav != NULL) { + KEYDBG(IPSEC_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + } else { + KEYDBG(IPSEC_STAMP, + printf("%s: SA not found\n", __func__)); + KEYDBG(IPSEC_DATA, kdebug_secasindex(saidx, NULL)); } - SPTREE_UNLOCK(); -done: - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, - sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); - return sp; + return (sav); } -#endif /* - * allocating an SA entry for an *OUTBOUND* packet. - * checking each request entries in SP, and acquire an SA if need. - * OUT: 0: there are valid requests. - * ENOENT: policy may be valid, but SA with REQUIRE is on acquiring. + * Allocating an SA entry for an *OUTBOUND* packet. + * OUT: positive: corresponding SA for given saidx found. + * NULL: SA not found, but will be acquired, check *error + * for acquiring status. */ -int -key_checkrequest(struct ipsecrequest *isr, const struct secasindex *saidx) +struct secasvar * +key_allocsa_policy(struct secpolicy *sp, const struct secasindex *saidx, + int *error) { - u_int level; - int error; + SAHTREE_RLOCK_TRACKER; + struct secashead *sah; struct secasvar *sav; - IPSEC_ASSERT(isr != NULL, ("null isr")); IPSEC_ASSERT(saidx != NULL, ("null saidx")); IPSEC_ASSERT(saidx->mode == IPSEC_MODE_TRANSPORT || saidx->mode == IPSEC_MODE_TUNNEL, ("unexpected policy %u", saidx->mode)); - /* - * XXX guard against protocol callbacks from the crypto - * thread as they reference ipsecrequest.sav which we - * temporarily null out below. Need to rethink how we - * handle bundled SA's in the callback thread. - */ - IPSECREQUEST_LOCK_ASSERT(isr); - - /* get current level */ - level = ipsec_get_reqlevel(isr); - /* * We check new SA in the IPsec request because a different * SA may be involved each time this request is checked, either * because new SAs are being configured, or this request is * associated with an unconnected datagram socket, or this request * is associated with a system default policy. - * - * key_allocsa_policy should allocate the oldest SA available. - * See key_do_allocsa_policy(), and draft-jenkins-ipsec-rekeying-03.txt. */ - sav = key_allocsa_policy(saidx); - if (sav != isr->sav) { - /* SA need to be updated. */ - if (!IPSECREQUEST_UPGRADE(isr)) { - /* Kick everyone off. */ - IPSECREQUEST_UNLOCK(isr); - IPSECREQUEST_WLOCK(isr); - } - if (isr->sav != NULL) - KEY_FREESAV(&isr->sav); - isr->sav = sav; - IPSECREQUEST_DOWNGRADE(isr); - } else if (sav != NULL) - KEY_FREESAV(&sav); - - /* When there is SA. */ - if (isr->sav != NULL) { - if (isr->sav->state != SADB_SASTATE_MATURE && - isr->sav->state != SADB_SASTATE_DYING) - return EINVAL; - return 0; - } + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + KEYDBG(IPSEC_DUMP, + printf("%s: checking SAH\n", __func__); + kdebug_secash(sah, " ")); + if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE_REQID)) + break; - /* there is no SA */ - error = key_acquire(saidx, isr->sp); - if (error != 0) { - /* XXX What should I do ? */ - ipseclog((LOG_DEBUG, "%s: error %d returned from key_acquire\n", - __func__, error)); - return error; } + if (sah != NULL) { + /* + * Allocate the oldest SA available according to + * draft-jenkins-ipsec-rekeying-03. + */ + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) + SAV_ADDREF(sav); + } else + sav = NULL; + SAHTREE_RUNLOCK(); - if (level != IPSEC_LEVEL_REQUIRE) { - /* XXX sigh, the interface to this routine is botched */ - IPSEC_ASSERT(isr->sav == NULL, ("unexpected SA")); - return 0; - } else { - return ENOENT; + if (sav != NULL) { + *error = 0; + KEYDBG(IPSEC_STAMP, + printf("%s: chosen SA(%p) for SP(%p)\n", __func__, + sav, sp)); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + return (sav); /* return referenced SA */ } + + /* there is no SA */ + *error = key_acquire(saidx, sp); + if ((*error) != 0) + ipseclog((LOG_DEBUG, + "%s: error %d returned from key_acquire()\n", + __func__, *error)); + KEYDBG(IPSEC_STAMP, + printf("%s: acquire SA for SP(%p), error %d\n", + __func__, sp, *error)); + KEYDBG(IPSEC_DATA, kdebug_secasindex(saidx, NULL)); + return (NULL); } /* - * allocating a SA for policy entry from SAD. - * NOTE: searching SAD of aliving state. - * OUT: NULL: not found. - * others: found and return the pointer. + * allocating a usable SA entry for a *INBOUND* packet. + * Must call key_freesav() later. + * OUT: positive: pointer to a usable sav (i.e. MATURE or DYING state). + * NULL: not found, or error occurred. + * + * According to RFC 2401 SA is uniquely identified by a triple SPI, + * destination address, and security protocol. But according to RFC 4301, + * SPI by itself suffices to specify an SA. + * + * Note that, however, we do need to keep source address in IPsec SA. + * IKE specification and PF_KEY specification do assume that we + * keep source address in IPsec SA. We see a tricky situation here. */ -static struct secasvar * -key_allocsa_policy(const struct secasindex *saidx) +struct secasvar * +key_allocsa(union sockaddr_union *dst, uint8_t proto, uint32_t spi) { -#define N(a) _ARRAYLEN(a) - struct secashead *sah; + SAHTREE_RLOCK_TRACKER; struct secasvar *sav; - u_int stateidx, arraysize; - const u_int *state_valid; - state_valid = NULL; /* silence gcc */ - arraysize = 0; /* silence gcc */ + IPSEC_ASSERT(proto == IPPROTO_ESP || proto == IPPROTO_AH || + proto == IPPROTO_IPCOMP, ("unexpected security protocol %u", + proto)); - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE_REQID)) { - if (V_key_preferred_oldsa) { - state_valid = saorder_state_valid_prefer_old; - arraysize = N(saorder_state_valid_prefer_old); - } else { - state_valid = saorder_state_valid_prefer_new; - arraysize = N(saorder_state_valid_prefer_new); - } + SAHTREE_RLOCK(); + LIST_FOREACH(sav, SAVHASH_HASH(spi), spihash) { + if (sav->spi == spi) break; - } } - SAHTREE_UNLOCK(); - if (sah == NULL) - return NULL; - - /* search valid state */ - for (stateidx = 0; stateidx < arraysize; stateidx++) { - sav = key_do_allocsa_policy(sah, state_valid[stateidx]); - if (sav != NULL) - return sav; + /* + * We use single SPI namespace for all protocols, so it is + * impossible to have SPI duplicates in the SAVHASH. + */ + if (sav != NULL) { + if (sav->state != SADB_SASTATE_LARVAL && + sav->sah->saidx.proto == proto && + key_sockaddrcmp(&dst->sa, + &sav->sah->saidx.dst.sa, 0) == 0) + SAV_ADDREF(sav); + else + sav = NULL; } + SAHTREE_RUNLOCK(); - return NULL; -#undef N + if (sav == NULL) { + KEYDBG(IPSEC_STAMP, + char buf[IPSEC_ADDRSTRLEN]; + printf("%s: SA not found for spi %u proto %u dst %s\n", + __func__, ntohl(spi), proto, ipsec_address(dst, buf, + sizeof(buf)))); + } else { + KEYDBG(IPSEC_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + } + return (sav); } -/* - * searching SAD with direction, protocol, mode and state. - * called by key_allocsa_policy(). - * OUT: - * NULL : not found - * others : found, pointer to a SA. - */ -static struct secasvar * -key_do_allocsa_policy(struct secashead *sah, u_int state) +struct secasvar * +key_allocsa_tunnel(union sockaddr_union *src, union sockaddr_union *dst, + uint8_t proto) { - struct secasvar *sav, *nextsav, *candidate, *d; - - /* initialize */ - candidate = NULL; - - SAHTREE_LOCK(); - for (sav = LIST_FIRST(&sah->savtree[state]); - sav != NULL; - sav = nextsav) { + SAHTREE_RLOCK_TRACKER; + struct secasindex saidx; + struct secashead *sah; + struct secasvar *sav; - nextsav = LIST_NEXT(sav, chain); + IPSEC_ASSERT(src != NULL, ("null src address")); + IPSEC_ASSERT(dst != NULL, ("null dst address")); - /* sanity check */ - KEY_CHKSASTATE(sav->state, state, __func__); + KEY_SETSECASIDX(proto, IPSEC_MODE_TUNNEL, 0, &src->sa, + &dst->sa, &saidx); - /* initialize */ - if (candidate == NULL) { - candidate = sav; + sav = NULL; + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(&saidx), addrhash) { + if (IPSEC_MODE_TUNNEL != sah->saidx.mode) continue; - } - - /* Which SA is the better ? */ - - IPSEC_ASSERT(candidate->lft_c != NULL, - ("null candidate lifetime")); - IPSEC_ASSERT(sav->lft_c != NULL, ("null sav lifetime")); - - /* What the best method is to compare ? */ - if (V_key_preferred_oldsa) { - if (candidate->lft_c->addtime > - sav->lft_c->addtime) { - candidate = sav; - } + if (proto != sah->saidx.proto) + continue; + if (key_sockaddrcmp(&src->sa, &sav->sah->saidx.src.sa, 0) != 0) + continue; + if (key_sockaddrcmp(&dst->sa, &sav->sah->saidx.dst.sa, 0) != 0) continue; - /*NOTREACHED*/ + /* XXXAE: is key_preferred_oldsa reasonably?*/ + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) { + SAV_ADDREF(sav); + break; } + } + SAHTREE_RUNLOCK(); + KEYDBG(IPSEC_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + if (sav != NULL) + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + return (sav); +} - /* preferred new sa rather than old sa */ - if (candidate->lft_c->addtime < - sav->lft_c->addtime) { - d = candidate; - candidate = sav; - } else - d = sav; +/* + * Must be called after calling key_allocsp(). + */ +void +key_freesp(struct secpolicy **spp) +{ + struct secpolicy *sp = *spp; - /* - * prepared to delete the SA when there is more - * suitable candidate and the lifetime of the SA is not - * permanent. - */ - if (d->lft_h->addtime != 0) { - struct mbuf *m, *result; - u_int8_t satype; + IPSEC_ASSERT(sp != NULL, ("null sp")); + if (SP_DELREF(sp) == 0) + return; - key_sa_chgstate(d, SADB_SASTATE_DEAD); + KEYDBG(IPSEC_STAMP, + printf("%s: last reference to SP(%p)\n", __func__, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); - IPSEC_ASSERT(d->refcnt > 0, ("bogus ref count")); + *spp = NULL; + while (sp->tcount > 0) + ipsec_delisr(sp->req[--sp->tcount]); + free(sp, M_IPSEC_SP); +} - satype = key_proto2satype(d->sah->saidx.proto); - if (satype == 0) - goto msgfail; +static void +key_unlink(struct secpolicy *sp) +{ - m = key_setsadbmsg(SADB_DELETE, 0, - satype, 0, 0, d->refcnt - 1); - if (!m) - goto msgfail; - result = m; - - /* set sadb_address for saidx's. */ - m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, - &d->sah->saidx.src.sa, - d->sah->saidx.src.sa.sa_len << 3, - IPSEC_ULPROTO_ANY); - if (!m) - goto msgfail; - m_cat(result, m); - - /* set sadb_address for saidx's. */ - m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, - &d->sah->saidx.dst.sa, - d->sah->saidx.dst.sa.sa_len << 3, - IPSEC_ULPROTO_ANY); - if (!m) - goto msgfail; - m_cat(result, m); - - /* create SA extension */ - m = key_setsadbsa(d); - if (!m) - goto msgfail; - m_cat(result, m); - - if (result->m_len < sizeof(struct sadb_msg)) { - result = m_pullup(result, - sizeof(struct sadb_msg)); - if (result == NULL) - goto msgfail; - } - - result->m_pkthdr.len = 0; - for (m = result; m; m = m->m_next) - result->m_pkthdr.len += m->m_len; - mtod(result, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(result->m_pkthdr.len); - - if (key_sendup_mbuf(NULL, result, - KEY_SENDUP_REGISTERED)) - goto msgfail; - msgfail: - KEY_FREESAV(&d); - } - } - if (candidate) { - sa_addref(candidate); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s cause refcnt++:%d SA:%p\n", - __func__, candidate->refcnt, candidate)); - } - SAHTREE_UNLOCK(); - - return candidate; -} - -/* - * allocating a usable SA entry for a *INBOUND* packet. - * Must call key_freesav() later. - * OUT: positive: pointer to a usable sav (i.e. MATURE or DYING state). - * NULL: not found, or error occurred. - * - * In the comparison, no source address is used--for RFC2401 conformance. - * To quote, from section 4.1: - * A security association is uniquely identified by a triple consisting - * of a Security Parameter Index (SPI), an IP Destination Address, and a - * security protocol (AH or ESP) identifier. - * Note that, however, we do need to keep source address in IPsec SA. - * IKE specification and PF_KEY specification do assume that we - * keep source address in IPsec SA. We see a tricky situation here. - */ -struct secasvar * -key_allocsa(union sockaddr_union *dst, u_int proto, u_int32_t spi, - const char* where, int tag) -{ - struct secashead *sah; - struct secasvar *sav; - u_int stateidx, arraysize, state; - const u_int *saorder_state_valid; -#ifdef IPSEC_NAT_T - int natt_chkport; -#endif - - IPSEC_ASSERT(dst != NULL, ("null dst address")); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); - -#ifdef IPSEC_NAT_T - natt_chkport = (dst->sa.sa_family == AF_INET && - dst->sa.sa_len == sizeof(struct sockaddr_in) && - dst->sin.sin_port != 0); -#endif - - /* - * searching SAD. - * XXX: to be checked internal IP header somewhere. Also when - * IPsec tunnel packet is received. But ESP tunnel mode is - * encrypted so we can't check internal IP header. - */ - SAHTREE_LOCK(); - if (V_key_preferred_oldsa) { - saorder_state_valid = saorder_state_valid_prefer_old; - arraysize = _ARRAYLEN(saorder_state_valid_prefer_old); - } else { - saorder_state_valid = saorder_state_valid_prefer_new; - arraysize = _ARRAYLEN(saorder_state_valid_prefer_new); - } - LIST_FOREACH(sah, &V_sahtree, chain) { - int checkport; - - /* search valid state */ - for (stateidx = 0; stateidx < arraysize; stateidx++) { - state = saorder_state_valid[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - /* sanity check */ - KEY_CHKSASTATE(sav->state, state, __func__); - /* do not return entries w/ unusable state */ - if (sav->state != SADB_SASTATE_MATURE && - sav->state != SADB_SASTATE_DYING) - continue; - if (proto != sav->sah->saidx.proto) - continue; - if (spi != sav->spi) - continue; - checkport = 0; -#ifdef IPSEC_NAT_T - /* - * Really only check ports when this is a NAT-T - * SA. Otherwise other lookups providing ports - * might suffer. - */ - if (sav->natt_type && natt_chkport) - checkport = 1; -#endif -#if 0 /* don't check src */ - /* check src address */ - if (key_sockaddrcmp(&src->sa, - &sav->sah->saidx.src.sa, checkport) != 0) - continue; -#endif - /* check dst address */ - if (key_sockaddrcmp(&dst->sa, - &sav->sah->saidx.dst.sa, checkport) != 0) - continue; - sa_addref(sav); - goto done; - } - } - } - sav = NULL; -done: - SAHTREE_UNLOCK(); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SA:%p; refcnt %u\n", __func__, - sav, sav ? sav->refcnt : 0)); - return sav; -} - -struct secasvar * -key_allocsa_tunnel(union sockaddr_union *src, union sockaddr_union *dst, - u_int proto, const char* where, int tag) -{ - struct secashead *sah; - struct secasvar *sav; - u_int stateidx, arraysize, state; - const u_int *saorder_state_valid; - - IPSEC_ASSERT(src != NULL, ("null src address")); - IPSEC_ASSERT(dst != NULL, ("null dst address")); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); - - SAHTREE_LOCK(); - if (V_key_preferred_oldsa) { - saorder_state_valid = saorder_state_valid_prefer_old; - arraysize = _ARRAYLEN(saorder_state_valid_prefer_old); - } else { - saorder_state_valid = saorder_state_valid_prefer_new; - arraysize = _ARRAYLEN(saorder_state_valid_prefer_new); - } - LIST_FOREACH(sah, &V_sahtree, chain) { - /* search valid state */ - for (stateidx = 0; stateidx < arraysize; stateidx++) { - state = saorder_state_valid[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - /* sanity check */ - KEY_CHKSASTATE(sav->state, state, __func__); - /* do not return entries w/ unusable state */ - if (sav->state != SADB_SASTATE_MATURE && - sav->state != SADB_SASTATE_DYING) - continue; - if (IPSEC_MODE_TUNNEL != sav->sah->saidx.mode) - continue; - if (proto != sav->sah->saidx.proto) - continue; - /* check src address */ - if (key_sockaddrcmp(&src->sa, - &sav->sah->saidx.src.sa, 0) != 0) - continue; - /* check dst address */ - if (key_sockaddrcmp(&dst->sa, - &sav->sah->saidx.dst.sa, 0) != 0) - continue; - sa_addref(sav); - goto done; - } - } - } - sav = NULL; -done: - SAHTREE_UNLOCK(); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SA:%p; refcnt %u\n", __func__, - sav, sav ? sav->refcnt : 0)); - return (sav); -} - -/* - * Must be called after calling key_allocsp(). - * For both the packet without socket and key_freeso(). - */ -void -_key_freesp(struct secpolicy **spp, const char* where, int tag) -{ - struct ipsecrequest *isr, *nextisr; - struct secpolicy *sp = *spp; - - IPSEC_ASSERT(sp != NULL, ("null sp")); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s SP:%p (ID=%u) from %s:%u; refcnt now %u\n", - __func__, sp, sp->id, where, tag, sp->refcnt)); - - if (SP_DELREF(sp) == 0) - return; - *spp = NULL; - for (isr = sp->req; isr != NULL; isr = nextisr) { - if (isr->sav != NULL) { - KEY_FREESAV(&isr->sav); - isr->sav = NULL; - } - nextisr = isr->next; - ipsec_delisr(isr); - } - free(sp, M_IPSEC_SP); -} - -static void -key_unlink(struct secpolicy *sp) -{ - - IPSEC_ASSERT(sp != NULL, ("null sp")); IPSEC_ASSERT(sp->spidx.dir == IPSEC_DIR_INBOUND || sp->spidx.dir == IPSEC_DIR_OUTBOUND, ("invalid direction %u", sp->spidx.dir)); SPTREE_UNLOCK_ASSERT(); + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); SPTREE_WLOCK(); - if (sp->state == IPSEC_SPSTATE_DEAD) { + if (sp->state != IPSEC_SPSTATE_ALIVE) { + /* SP is already unlinked */ SPTREE_WUNLOCK(); return; } sp->state = IPSEC_SPSTATE_DEAD; TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); + LIST_REMOVE(sp, idhash); + V_sp_genid++; SPTREE_WUNLOCK(); - KEY_FREESP(&sp); + key_freesp(&sp); } /* * insert a secpolicy into the SP database. Lower priorities first */ static void key_insertsp(struct secpolicy *newsp) { struct secpolicy *sp; - SPTREE_WLOCK(); + SPTREE_WLOCK_ASSERT(); TAILQ_FOREACH(sp, &V_sptree[newsp->spidx.dir], chain) { if (newsp->priority < sp->priority) { TAILQ_INSERT_BEFORE(sp, newsp, chain); goto done; } } - TAILQ_INSERT_TAIL(&V_sptree[newsp->spidx.dir], newsp, chain); - done: + LIST_INSERT_HEAD(SPHASH_HASH(newsp->id), newsp, idhash); newsp->state = IPSEC_SPSTATE_ALIVE; - SPTREE_WUNLOCK(); + V_sp_genid++; } /* - * Must be called after calling key_allocsp(). - * For the packet with socket. + * Insert a bunch of VTI secpolicies into the SPDB. + * We keep VTI policies in the separate list due to following reasons: + * 1) they should be immutable to user's or some deamon's attempts to + * delete. The only way delete such policies - destroy or unconfigure + * corresponding virtual inteface. + * 2) such policies have traffic selector that matches all traffic per + * address family. + * Since all VTI policies have the same priority, we don't care about + * policies order. */ -void -key_freeso(struct socket *so) +int +key_register_ifnet(struct secpolicy **spp, u_int count) { - IPSEC_ASSERT(so != NULL, ("null so")); + struct mbuf *m; + u_int i; - switch (so->so_proto->pr_domain->dom_family) { -#if defined(INET) || defined(INET6) -#ifdef INET - case PF_INET: -#endif -#ifdef INET6 - case PF_INET6: -#endif - { - struct inpcb *pcb = sotoinpcb(so); + SPTREE_WLOCK(); + /* + * First of try to acquire id for each SP. + */ + for (i = 0; i < count; i++) { + IPSEC_ASSERT(spp[i]->spidx.dir == IPSEC_DIR_INBOUND || + spp[i]->spidx.dir == IPSEC_DIR_OUTBOUND, + ("invalid direction %u", spp[i]->spidx.dir)); - /* Does it have a PCB ? */ - if (pcb == NULL) - return; - key_freesp_so(&pcb->inp_sp->sp_in); - key_freesp_so(&pcb->inp_sp->sp_out); - } - break; -#endif /* INET || INET6 */ - default: - ipseclog((LOG_DEBUG, "%s: unknown address family=%d.\n", - __func__, so->so_proto->pr_domain->dom_family)); - return; + if ((spp[i]->id = key_getnewspid()) == 0) { + SPTREE_WUNLOCK(); + return (EAGAIN); + } } -} - -static void -key_freesp_so(struct secpolicy **sp) -{ - IPSEC_ASSERT(sp != NULL && *sp != NULL, ("null sp")); - - if ((*sp)->policy == IPSEC_POLICY_ENTRUST || - (*sp)->policy == IPSEC_POLICY_BYPASS) - return; - - IPSEC_ASSERT((*sp)->policy == IPSEC_POLICY_IPSEC, - ("invalid policy %u", (*sp)->policy)); - KEY_FREESP(sp); + for (i = 0; i < count; i++) { + TAILQ_INSERT_TAIL(&V_sptree_ifnet[spp[i]->spidx.dir], + spp[i], chain); + /* + * NOTE: despite the fact that we keep VTI SP in the + * separate list, SPHASH contains policies from both + * sources. Thus SADB_X_SPDGET will correctly return + * SP by id, because it uses SPHASH for lookups. + */ + LIST_INSERT_HEAD(SPHASH_HASH(spp[i]->id), spp[i], idhash); + spp[i]->state = IPSEC_SPSTATE_IFNET; + } + SPTREE_WUNLOCK(); + /* + * Notify user processes about new SP. + */ + for (i = 0; i < count; i++) { + m = key_setdumpsp(spp[i], SADB_X_SPDADD, 0, 0); + if (m != NULL) + key_sendup_mbuf(NULL, m, KEY_SENDUP_ALL); + } + return (0); } void -key_addrefsa(struct secasvar *sav, const char* where, int tag) +key_unregister_ifnet(struct secpolicy **spp, u_int count) { + struct mbuf *m; + u_int i; - IPSEC_ASSERT(sav != NULL, ("null sav")); - IPSEC_ASSERT(sav->refcnt > 0, ("refcount must exist")); + SPTREE_WLOCK(); + for (i = 0; i < count; i++) { + IPSEC_ASSERT(spp[i]->spidx.dir == IPSEC_DIR_INBOUND || + spp[i]->spidx.dir == IPSEC_DIR_OUTBOUND, + ("invalid direction %u", spp[i]->spidx.dir)); + + if (spp[i]->state != IPSEC_SPSTATE_IFNET) + continue; + spp[i]->state = IPSEC_SPSTATE_DEAD; + TAILQ_REMOVE(&V_sptree_ifnet[spp[i]->spidx.dir], + spp[i], chain); + LIST_REMOVE(spp[i], idhash); + } + SPTREE_WUNLOCK(); - sa_addref(sav); + for (i = 0; i < count; i++) { + m = key_setdumpsp(spp[i], SADB_X_SPDDELETE, 0, 0); + if (m != NULL) + key_sendup_mbuf(NULL, m, KEY_SENDUP_ALL); + } } /* * Must be called after calling key_allocsa(). * This function is called by key_freesp() to free some SA allocated * for a policy. */ void -key_freesav(struct secasvar **psav, const char* where, int tag) +key_freesav(struct secasvar **psav) { struct secasvar *sav = *psav; IPSEC_ASSERT(sav != NULL, ("null sav")); + if (SAV_DELREF(sav) == 0) + return; - if (sa_delref(sav)) { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s SA:%p (SPI %u) from %s:%u; refcnt now %u\n", - __func__, sav, ntohl(sav->spi), where, tag, sav->refcnt)); - *psav = NULL; - key_delsav(sav); - } else { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s SA:%p (SPI %u) from %s:%u; refcnt now %u\n", - __func__, sav, ntohl(sav->spi), where, tag, sav->refcnt)); + KEYDBG(IPSEC_STAMP, + printf("%s: last reference to SA(%p)\n", __func__, sav)); + + *psav = NULL; + key_delsav(sav); +} + +/* + * Unlink SA from SAH and SPI hash under SAHTREE_WLOCK. + * Expect that SA has extra reference due to lookup. + * Release this references, also release SAH reference after unlink. + */ +static void +key_unlinksav(struct secasvar *sav) +{ + struct secashead *sah; + + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + + SAHTREE_UNLOCK_ASSERT(); + SAHTREE_WLOCK(); + if (sav->state == SADB_SASTATE_DEAD) { + /* SA is already unlinked */ + SAHTREE_WUNLOCK(); + return; } + /* Unlink from SAH */ + if (sav->state == SADB_SASTATE_LARVAL) + TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); + else + TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); + /* Unlink from SPI hash */ + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + sah = sav->sah; + SAHTREE_WUNLOCK(); + key_freesav(&sav); + /* Since we are unlinked, release reference to SAH */ + key_freesah(&sah); } /* %%% SPD management */ /* * search SPD * OUT: NULL : not found * others : found, pointer to a SP. */ static struct secpolicy * key_getsp(struct secpolicyindex *spidx) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; IPSEC_ASSERT(spidx != NULL, ("null spidx")); SPTREE_RLOCK(); TAILQ_FOREACH(sp, &V_sptree[spidx->dir], chain) { if (key_cmpspidx_exactly(spidx, &sp->spidx)) { SP_ADDREF(sp); break; } } SPTREE_RUNLOCK(); return sp; } /* * get SP by index. * OUT: NULL : not found - * others : found, pointer to a SP. + * others : found, pointer to referenced SP. */ static struct secpolicy * -key_getspbyid(u_int32_t id) +key_getspbyid(uint32_t id) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; SPTREE_RLOCK(); - TAILQ_FOREACH(sp, &V_sptree[IPSEC_DIR_INBOUND], chain) { - if (sp->id == id) { - SP_ADDREF(sp); - goto done; - } - } - - TAILQ_FOREACH(sp, &V_sptree[IPSEC_DIR_OUTBOUND], chain) { + LIST_FOREACH(sp, SPHASH_HASH(id), idhash) { if (sp->id == id) { SP_ADDREF(sp); - goto done; + break; } } -done: SPTREE_RUNLOCK(); - - return sp; + return (sp); } struct secpolicy * -key_newsp(const char* where, int tag) +key_newsp(void) +{ + struct secpolicy *sp; + + sp = malloc(sizeof(*sp), M_IPSEC_SP, M_NOWAIT | M_ZERO); + if (sp != NULL) + SP_INITREF(sp); + return (sp); +} + +struct ipsecrequest * +ipsec_newisr(void) { - struct secpolicy *newsp = NULL; - newsp = (struct secpolicy *) - malloc(sizeof(struct secpolicy), M_IPSEC_SP, M_NOWAIT|M_ZERO); - if (newsp) - refcount_init(&newsp->refcnt, 1); + return (malloc(sizeof(struct ipsecrequest), M_IPSEC_SR, + M_NOWAIT | M_ZERO)); +} + +void +ipsec_delisr(struct ipsecrequest *p) +{ - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u return SP:%p\n", __func__, - where, tag, newsp)); - return newsp; + free(p, M_IPSEC_SR); } /* * create secpolicy structure from sadb_x_policy structure. - * NOTE: `state', `secpolicyindex' in secpolicy structure are not set, - * so must be set properly later. + * NOTE: `state', `secpolicyindex' and 'id' in secpolicy structure + * are not set, so must be set properly later. */ struct secpolicy * key_msg2sp(struct sadb_x_policy *xpl0, size_t len, int *error) { struct secpolicy *newsp; IPSEC_ASSERT(xpl0 != NULL, ("null xpl0")); IPSEC_ASSERT(len >= sizeof(*xpl0), ("policy too short: %zu", len)); if (len != PFKEY_EXTLEN(xpl0)) { ipseclog((LOG_DEBUG, "%s: Invalid msg length.\n", __func__)); *error = EINVAL; return NULL; } - if ((newsp = KEY_NEWSP()) == NULL) { + if ((newsp = key_newsp()) == NULL) { *error = ENOBUFS; return NULL; } newsp->spidx.dir = xpl0->sadb_x_policy_dir; newsp->policy = xpl0->sadb_x_policy_type; newsp->priority = xpl0->sadb_x_policy_priority; + newsp->tcount = 0; /* check policy */ switch (xpl0->sadb_x_policy_type) { case IPSEC_POLICY_DISCARD: case IPSEC_POLICY_NONE: case IPSEC_POLICY_ENTRUST: case IPSEC_POLICY_BYPASS: - newsp->req = NULL; break; case IPSEC_POLICY_IPSEC: { - int tlen; struct sadb_x_ipsecrequest *xisr; - struct ipsecrequest **p_isr = &newsp->req; + struct ipsecrequest *isr; + int tlen; /* validity check */ if (PFKEY_EXTLEN(xpl0) < sizeof(*xpl0)) { ipseclog((LOG_DEBUG, "%s: Invalid msg length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } tlen = PFKEY_EXTLEN(xpl0) - sizeof(*xpl0); xisr = (struct sadb_x_ipsecrequest *)(xpl0 + 1); while (tlen > 0) { /* length check */ if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr)) { ipseclog((LOG_DEBUG, "%s: invalid ipsecrequest " "length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } + if (newsp->tcount >= IPSEC_MAXREQ) { + ipseclog((LOG_DEBUG, + "%s: too many ipsecrequests.\n", + __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } + /* allocate request buffer */ /* NB: data structure is zero'd */ - *p_isr = ipsec_newisr(); - if ((*p_isr) == NULL) { + isr = ipsec_newisr(); + if (isr == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = ENOBUFS; return NULL; } + newsp->req[newsp->tcount++] = isr; + /* set values */ switch (xisr->sadb_x_ipsecrequest_proto) { case IPPROTO_ESP: case IPPROTO_AH: case IPPROTO_IPCOMP: break; default: ipseclog((LOG_DEBUG, "%s: invalid proto type=%u\n", __func__, xisr->sadb_x_ipsecrequest_proto)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EPROTONOSUPPORT; return NULL; } - (*p_isr)->saidx.proto = xisr->sadb_x_ipsecrequest_proto; + isr->saidx.proto = + (uint8_t)xisr->sadb_x_ipsecrequest_proto; switch (xisr->sadb_x_ipsecrequest_mode) { case IPSEC_MODE_TRANSPORT: case IPSEC_MODE_TUNNEL: break; case IPSEC_MODE_ANY: default: ipseclog((LOG_DEBUG, "%s: invalid mode=%u\n", __func__, xisr->sadb_x_ipsecrequest_mode)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - (*p_isr)->saidx.mode = xisr->sadb_x_ipsecrequest_mode; + isr->saidx.mode = xisr->sadb_x_ipsecrequest_mode; switch (xisr->sadb_x_ipsecrequest_level) { case IPSEC_LEVEL_DEFAULT: case IPSEC_LEVEL_USE: case IPSEC_LEVEL_REQUIRE: break; case IPSEC_LEVEL_UNIQUE: /* validity check */ /* * If range violation of reqid, kernel will * update it, don't refuse it. */ if (xisr->sadb_x_ipsecrequest_reqid > IPSEC_MANUAL_REQID_MAX) { ipseclog((LOG_DEBUG, "%s: reqid=%d range " "violation, updated by kernel.\n", __func__, xisr->sadb_x_ipsecrequest_reqid)); xisr->sadb_x_ipsecrequest_reqid = 0; } /* allocate new reqid id if reqid is zero. */ if (xisr->sadb_x_ipsecrequest_reqid == 0) { u_int32_t reqid; if ((reqid = key_newreqid()) == 0) { - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = ENOBUFS; return NULL; } - (*p_isr)->saidx.reqid = reqid; + isr->saidx.reqid = reqid; xisr->sadb_x_ipsecrequest_reqid = reqid; } else { /* set it for manual keying. */ - (*p_isr)->saidx.reqid = - xisr->sadb_x_ipsecrequest_reqid; + isr->saidx.reqid = + xisr->sadb_x_ipsecrequest_reqid; } break; default: ipseclog((LOG_DEBUG, "%s: invalid level=%u\n", __func__, xisr->sadb_x_ipsecrequest_level)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - (*p_isr)->level = xisr->sadb_x_ipsecrequest_level; + isr->level = xisr->sadb_x_ipsecrequest_level; /* set IP addresses if there */ if (xisr->sadb_x_ipsecrequest_len > sizeof(*xisr)) { struct sockaddr *paddr; paddr = (struct sockaddr *)(xisr + 1); - /* validity check */ if (paddr->sa_len - > sizeof((*p_isr)->saidx.src)) { + > sizeof(isr->saidx.src)) { ipseclog((LOG_DEBUG, "%s: invalid " "request address length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - bcopy(paddr, &(*p_isr)->saidx.src, - paddr->sa_len); - - paddr = (struct sockaddr *)((caddr_t)paddr - + paddr->sa_len); + bcopy(paddr, &isr->saidx.src, paddr->sa_len); + paddr = (struct sockaddr *)((caddr_t)paddr + + paddr->sa_len); /* validity check */ if (paddr->sa_len - > sizeof((*p_isr)->saidx.dst)) { + > sizeof(isr->saidx.dst)) { ipseclog((LOG_DEBUG, "%s: invalid " "request address length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - bcopy(paddr, &(*p_isr)->saidx.dst, - paddr->sa_len); + /* AF family should match */ + if (paddr->sa_family != + isr->saidx.src.sa.sa_family) { + ipseclog((LOG_DEBUG, "%s: address " + "family doesn't match.\n", + __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } + bcopy(paddr, &isr->saidx.dst, paddr->sa_len); + } else { + /* + * Addresses for TUNNEL mode requests are + * mandatory. + */ + if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { + ipseclog((LOG_DEBUG, "%s: missing " + "request addresses.\n", __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } } - - (*p_isr)->sp = newsp; - - /* initialization for the next. */ - p_isr = &(*p_isr)->next; tlen -= xisr->sadb_x_ipsecrequest_len; /* validity check */ if (tlen < 0) { ipseclog((LOG_DEBUG, "%s: becoming tlen < 0.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } xisr = (struct sadb_x_ipsecrequest *)((caddr_t)xisr + xisr->sadb_x_ipsecrequest_len); } + /* XXXAE: LARVAL SP */ + if (newsp->tcount < 1) { + ipseclog((LOG_DEBUG, "%s: valid IPSEC transforms " + "not found.\n", __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } } break; default: ipseclog((LOG_DEBUG, "%s: invalid policy type.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } *error = 0; - return newsp; + return (newsp); } -static u_int32_t -key_newreqid() +uint32_t +key_newreqid(void) { - static u_int32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; + static uint32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; - auto_reqid = (auto_reqid == ~0 - ? IPSEC_MANUAL_REQID_MAX + 1 : auto_reqid + 1); + if (auto_reqid == ~0) + auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; + else + auto_reqid++; /* XXX should be unique check */ - - return auto_reqid; + return (auto_reqid); } /* * copy secpolicy struct to sadb_x_policy structure indicated. */ -struct mbuf * -key_sp2msg(struct secpolicy *sp) +static struct mbuf * +key_sp2mbuf(struct secpolicy *sp) { - struct sadb_x_policy *xpl; - int tlen; - caddr_t p; struct mbuf *m; - - IPSEC_ASSERT(sp != NULL, ("null policy")); + size_t tlen; tlen = key_getspreqmsglen(sp); - m = m_get2(tlen, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, tlen); m->m_len = tlen; - xpl = mtod(m, struct sadb_x_policy *); - bzero(xpl, tlen); + if (key_sp2msg(sp, m->m_data, &tlen) != 0) { + m_freem(m); + return (NULL); + } + return (m); +} + +int +key_sp2msg(struct secpolicy *sp, void *request, size_t *len) +{ + struct sadb_x_ipsecrequest *xisr; + struct sadb_x_policy *xpl; + struct ipsecrequest *isr; + size_t xlen, ilen; + caddr_t p; + int error, i; + + IPSEC_ASSERT(sp != NULL, ("null policy")); - xpl->sadb_x_policy_len = PFKEY_UNIT64(tlen); + xlen = sizeof(*xpl); + if (*len < xlen) + return (EINVAL); + + error = 0; + bzero(request, *len); + xpl = (struct sadb_x_policy *)request; xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY; xpl->sadb_x_policy_type = sp->policy; xpl->sadb_x_policy_dir = sp->spidx.dir; xpl->sadb_x_policy_id = sp->id; xpl->sadb_x_policy_priority = sp->priority; - p = (caddr_t)xpl + sizeof(*xpl); /* if is the policy for ipsec ? */ if (sp->policy == IPSEC_POLICY_IPSEC) { - struct sadb_x_ipsecrequest *xisr; - struct ipsecrequest *isr; - - for (isr = sp->req; isr != NULL; isr = isr->next) { - + p = (caddr_t)xpl + sizeof(*xpl); + for (i = 0; i < sp->tcount; i++) { + isr = sp->req[i]; + ilen = PFKEY_ALIGN8(sizeof(*xisr) + + isr->saidx.src.sa.sa_len + + isr->saidx.dst.sa.sa_len); + xlen += ilen; + if (xlen > *len) { + error = ENOBUFS; + /* Calculate needed size */ + continue; + } xisr = (struct sadb_x_ipsecrequest *)p; - + xisr->sadb_x_ipsecrequest_len = ilen; xisr->sadb_x_ipsecrequest_proto = isr->saidx.proto; xisr->sadb_x_ipsecrequest_mode = isr->saidx.mode; xisr->sadb_x_ipsecrequest_level = isr->level; xisr->sadb_x_ipsecrequest_reqid = isr->saidx.reqid; p += sizeof(*xisr); bcopy(&isr->saidx.src, p, isr->saidx.src.sa.sa_len); p += isr->saidx.src.sa.sa_len; bcopy(&isr->saidx.dst, p, isr->saidx.dst.sa.sa_len); - p += isr->saidx.src.sa.sa_len; - - xisr->sadb_x_ipsecrequest_len = - PFKEY_ALIGN8(sizeof(*xisr) - + isr->saidx.src.sa.sa_len - + isr->saidx.dst.sa.sa_len); + p += isr->saidx.dst.sa.sa_len; } } - - return m; + xpl->sadb_x_policy_len = PFKEY_UNIT64(xlen); + if (error == 0) + *len = xlen; + else + *len = sizeof(*xpl); + return (error); } /* m will not be freed nor modified */ static struct mbuf * key_gather_mbuf(struct mbuf *m, const struct sadb_msghdr *mhp, int ndeep, int nitem, ...) { va_list ap; int idx; int i; struct mbuf *result = NULL, *n; int len; IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); va_start(ap, nitem); for (i = 0; i < nitem; i++) { idx = va_arg(ap, int); if (idx < 0 || idx > SADB_EXT_MAX) goto fail; /* don't attempt to pull empty extension */ if (idx == SADB_EXT_RESERVED && mhp->msg == NULL) continue; if (idx != SADB_EXT_RESERVED && (mhp->ext[idx] == NULL || mhp->extlen[idx] == 0)) continue; if (idx == SADB_EXT_RESERVED) { len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); IPSEC_ASSERT(len <= MHLEN, ("header too big %u", len)); MGETHDR(n, M_NOWAIT, MT_DATA); if (!n) goto fail; n->m_len = len; n->m_next = NULL; m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t)); } else if (i < ndeep) { len = mhp->extlen[idx]; n = m_get2(len, M_NOWAIT, MT_DATA, 0); if (n == NULL) goto fail; m_align(n, len); n->m_len = len; m_copydata(m, mhp->extoff[idx], mhp->extlen[idx], mtod(n, caddr_t)); } else { n = m_copym(m, mhp->extoff[idx], mhp->extlen[idx], M_NOWAIT); } if (n == NULL) goto fail; if (result) m_cat(result, n); else result = n; } va_end(ap); if ((result->m_flags & M_PKTHDR) != 0) { result->m_pkthdr.len = 0; for (n = result; n; n = n->m_next) result->m_pkthdr.len += n->m_len; } return result; fail: m_freem(result); va_end(ap); return NULL; } /* * SADB_X_SPDADD, SADB_X_SPDSETIDX or SADB_X_SPDUPDATE processing * add an entry to SP database, when received * * from the user(?). * Adding to SP database, * and send * * to the socket which was send. * * SPDADD set a unique policy entry. * SPDSETIDX like SPDADD without a part of policy requests. * SPDUPDATE replace a unique policy entry. * + * XXXAE: serialize this in PF_KEY to avoid races. * m will always be freed. */ static int key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + struct secpolicyindex spidx; struct sadb_address *src0, *dst0; struct sadb_x_policy *xpl0, *xpl; struct sadb_lifetime *lft = NULL; - struct secpolicyindex spidx; struct secpolicy *newsp; int error; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - mhp->ext[SADB_X_EXT_POLICY] == NULL) { - ipseclog((LOG_DEBUG, "key_spdadd: invalid message is passed.\n")); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL) { - if (mhp->extlen[SADB_EXT_LIFETIME_HARD] - < sizeof(struct sadb_lifetime)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (!SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_LIFETIME_HARD)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); return key_senderror(so, m, EINVAL); } lft = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD]; } src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY]; - /* - * Note: do not parse SADB_X_EXT_NAT_T_* here: - * we are processing traffic endpoints. - */ - - /* make secindex */ - /* XXX boundary check against sa_len */ - KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, - src0 + 1, - dst0 + 1, - src0->sadb_address_prefixlen, - dst0->sadb_address_prefixlen, - src0->sadb_address_proto, - &spidx); - - /* checking the direciton. */ + /* check the direciton */ switch (xpl0->sadb_x_policy_dir) { case IPSEC_DIR_INBOUND: case IPSEC_DIR_OUTBOUND: break; default: - ipseclog((LOG_DEBUG, "%s: Invalid SP direction.\n", __func__)); - mhp->msg->sadb_msg_errno = EINVAL; - return 0; + ipseclog((LOG_DEBUG, "%s: invalid SP direction.\n", __func__)); + return key_senderror(so, m, EINVAL); } - - /* check policy */ /* key_spdadd() accepts DISCARD, NONE and IPSEC. */ - if (xpl0->sadb_x_policy_type == IPSEC_POLICY_ENTRUST - || xpl0->sadb_x_policy_type == IPSEC_POLICY_BYPASS) { - ipseclog((LOG_DEBUG, "%s: Invalid policy type.\n", __func__)); + if (xpl0->sadb_x_policy_type != IPSEC_POLICY_DISCARD && + xpl0->sadb_x_policy_type != IPSEC_POLICY_NONE && + xpl0->sadb_x_policy_type != IPSEC_POLICY_IPSEC) { + ipseclog((LOG_DEBUG, "%s: invalid policy type.\n", __func__)); return key_senderror(so, m, EINVAL); } /* policy requests are mandatory when action is ipsec. */ - if (mhp->msg->sadb_msg_type != SADB_X_SPDSETIDX - && xpl0->sadb_x_policy_type == IPSEC_POLICY_IPSEC - && mhp->extlen[SADB_X_EXT_POLICY] <= sizeof(*xpl0)) { - ipseclog((LOG_DEBUG, "%s: some policy requests part required\n", - __func__)); + if (xpl0->sadb_x_policy_type == IPSEC_POLICY_IPSEC && + mhp->extlen[SADB_X_EXT_POLICY] <= sizeof(*xpl0)) { + ipseclog((LOG_DEBUG, + "%s: policy requests required.\n", __func__)); return key_senderror(so, m, EINVAL); } - /* - * checking there is SP already or not. - * SPDUPDATE doesn't depend on whether there is a SP or not. - * If the type is either SPDADD or SPDSETIDX AND a SP is found, - * then error. - */ - newsp = key_getsp(&spidx); - if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { - if (newsp) { - key_unlink(newsp); - KEY_FREESP(&newsp); - } - } else { - if (newsp != NULL) { - KEY_FREESP(&newsp); - ipseclog((LOG_DEBUG, "%s: a SP entry exists already.\n", - __func__)); - return key_senderror(so, m, EEXIST); - } - } - - /* XXX: there is race between key_getsp and key_msg2sp. */ - - /* allocation new SP entry */ - if ((newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error)) == NULL) { + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0 || + src0->sadb_address_proto != dst0->sadb_address_proto) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); return key_senderror(so, m, error); } - - if ((newsp->id = key_getnewspid()) == 0) { - KEY_FREESP(&newsp); - return key_senderror(so, m, ENOBUFS); - } - - /* XXX boundary check against sa_len */ + /* make secindex */ KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, src0 + 1, dst0 + 1, src0->sadb_address_prefixlen, dst0->sadb_address_prefixlen, src0->sadb_address_proto, - &newsp->spidx); - - /* sanity check on addr pair */ - if (((struct sockaddr *)(src0 + 1))->sa_family != - ((struct sockaddr *)(dst0+ 1))->sa_family) { - KEY_FREESP(&newsp); - return key_senderror(so, m, EINVAL); - } - if (((struct sockaddr *)(src0 + 1))->sa_len != - ((struct sockaddr *)(dst0+ 1))->sa_len) { - KEY_FREESP(&newsp); - return key_senderror(so, m, EINVAL); - } -#if 1 - if (newsp->req && newsp->req->saidx.src.sa.sa_family && - newsp->req->saidx.dst.sa.sa_family) { - if (newsp->req->saidx.src.sa.sa_family != - newsp->req->saidx.dst.sa.sa_family) { - KEY_FREESP(&newsp); - return key_senderror(so, m, EINVAL); + &spidx); + /* Checking there is SP already or not. */ + newsp = key_getsp(&spidx); + if (newsp != NULL) { + if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { + KEYDBG(KEY_STAMP, + printf("%s: unlink SP(%p) for SPDUPDATE\n", + __func__, newsp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(newsp)); + key_unlink(newsp); + key_freesp(&newsp); + } else { + key_freesp(&newsp); + ipseclog((LOG_DEBUG, "%s: a SP entry exists already.", + __func__)); + return (key_senderror(so, m, EEXIST)); } } -#endif - newsp->created = time_second; - newsp->lastused = newsp->created; + /* allocate new SP entry */ + if ((newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error)) == NULL) { + return key_senderror(so, m, error); + } + + newsp->lastused = newsp->created = time_second; newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; + bcopy(&spidx, &newsp->spidx, sizeof(spidx)); + /* XXXAE: there is race between key_getsp() and key_insertsp() */ + SPTREE_WLOCK(); + if ((newsp->id = key_getnewspid()) == 0) { + SPTREE_WUNLOCK(); + key_freesp(&newsp); + return key_senderror(so, m, ENOBUFS); + } key_insertsp(newsp); + SPTREE_WUNLOCK(); - /* delete the entry in spacqtree */ - if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { - struct secspacq *spacq = key_getspacq(&spidx); - if (spacq != NULL) { - /* reset counter in order to deletion by timehandler. */ - spacq->created = time_second; - spacq->count = 0; - SPACQ_UNLOCK(); - } - } + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, newsp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(newsp)); { struct mbuf *n, *mpolicy; struct sadb_msg *newmsg; int off; - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ - /* create new sadb_msg to reply. */ if (lft) { n = key_gather_mbuf(m, mhp, 2, 5, SADB_EXT_RESERVED, SADB_X_EXT_POLICY, SADB_EXT_LIFETIME_HARD, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); } else { n = key_gather_mbuf(m, mhp, 2, 4, SADB_EXT_RESERVED, SADB_X_EXT_POLICY, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); } if (!n) return key_senderror(so, m, ENOBUFS); if (n->m_len < sizeof(*newmsg)) { n = m_pullup(n, sizeof(*newmsg)); if (!n) return key_senderror(so, m, ENOBUFS); } newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); off = 0; mpolicy = m_pulldown(n, PFKEY_ALIGN8(sizeof(struct sadb_msg)), sizeof(*xpl), &off); if (mpolicy == NULL) { /* n is already freed */ return key_senderror(so, m, ENOBUFS); } xpl = (struct sadb_x_policy *)(mtod(mpolicy, caddr_t) + off); if (xpl->sadb_x_policy_exttype != SADB_X_EXT_POLICY) { m_freem(n); return key_senderror(so, m, EINVAL); } xpl->sadb_x_policy_id = newsp->id; m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } /* * get new policy id. * OUT: * 0: failure. * others: success. */ -static u_int32_t -key_getnewspid() +static uint32_t +key_getnewspid(void) { - u_int32_t newid = 0; - int count = V_key_spi_trycnt; /* XXX */ struct secpolicy *sp; + uint32_t newid = 0; + int count = V_key_spi_trycnt; /* XXX */ - /* when requesting to allocate spi ranged */ + SPTREE_WLOCK_ASSERT(); while (count--) { - newid = (V_policy_id = (V_policy_id == ~0 ? 1 : V_policy_id + 1)); - - if ((sp = key_getspbyid(newid)) == NULL) + if (V_policy_id == ~0) /* overflowed */ + newid = V_policy_id = 1; + else + newid = ++V_policy_id; + LIST_FOREACH(sp, SPHASH_HASH(newid), idhash) { + if (sp->id == newid) + break; + } + if (sp == NULL) break; - - KEY_FREESP(&sp); } - if (count == 0 || newid == 0) { - ipseclog((LOG_DEBUG, "%s: to allocate policy id is failed.\n", - __func__)); - return 0; + ipseclog((LOG_DEBUG, "%s: failed to allocate policy id.\n", + __func__)); + return (0); } - - return newid; + return (newid); } /* * SADB_SPDDELETE processing * receive * * from the user(?), and set SADB_SASTATE_DEAD, * and send, * * to the ikmpd. * policy(*) including direction of policy. * * m will always be freed. */ static int key_spddelete(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + struct secpolicyindex spidx; struct sadb_address *src0, *dst0; struct sadb_x_policy *xpl0; - struct secpolicyindex spidx; struct secpolicy *sp; IPSEC_ASSERT(so != NULL, ("null so")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - mhp->ext[SADB_X_EXT_POLICY] == NULL) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY]; - /* - * Note: do not parse SADB_X_EXT_NAT_T_* here: - * we are processing traffic endpoints. - */ - + /* check the direciton */ + switch (xpl0->sadb_x_policy_dir) { + case IPSEC_DIR_INBOUND: + case IPSEC_DIR_OUTBOUND: + break; + default: + ipseclog((LOG_DEBUG, "%s: invalid SP direction.\n", __func__)); + return key_senderror(so, m, EINVAL); + } + /* Only DISCARD, NONE and IPSEC are allowed */ + if (xpl0->sadb_x_policy_type != IPSEC_POLICY_DISCARD && + xpl0->sadb_x_policy_type != IPSEC_POLICY_NONE && + xpl0->sadb_x_policy_type != IPSEC_POLICY_IPSEC) { + ipseclog((LOG_DEBUG, "%s: invalid policy type.\n", __func__)); + return key_senderror(so, m, EINVAL); + } + if (key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)) != 0 || + src0->sadb_address_proto != dst0->sadb_address_proto) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, EINVAL); + } /* make secindex */ - /* XXX boundary check against sa_len */ KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, src0 + 1, dst0 + 1, src0->sadb_address_prefixlen, dst0->sadb_address_prefixlen, src0->sadb_address_proto, &spidx); - /* checking the direciton. */ - switch (xpl0->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - case IPSEC_DIR_OUTBOUND: - break; - default: - ipseclog((LOG_DEBUG, "%s: Invalid SP direction.\n", __func__)); - return key_senderror(so, m, EINVAL); - } - /* Is there SP in SPD ? */ if ((sp = key_getsp(&spidx)) == NULL) { ipseclog((LOG_DEBUG, "%s: no SP found.\n", __func__)); return key_senderror(so, m, EINVAL); } /* save policy id to buffer to be returned. */ xpl0->sadb_x_policy_id = sp->id; + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); key_unlink(sp); - KEY_FREESP(&sp); + key_freesp(&sp); { struct mbuf *n; struct sadb_msg *newmsg; - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ - /* create new sadb_msg to reply. */ n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED, SADB_X_EXT_POLICY, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n) return key_senderror(so, m, ENOBUFS); newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } /* * SADB_SPDDELETE2 processing * receive * * from the user(?), and set SADB_SASTATE_DEAD, * and send, * * to the ikmpd. * policy(*) including direction of policy. * * m will always be freed. */ static int key_spddelete2(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - u_int32_t id; struct secpolicy *sp; + uint32_t id; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_X_EXT_POLICY] == NULL || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); + if (SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", + __func__)); return key_senderror(so, m, EINVAL); } - id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; + id = ((struct sadb_x_policy *) + mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; /* Is there SP in SPD ? */ if ((sp = key_getspbyid(id)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no SP found id:%u.\n", __func__, id)); + ipseclog((LOG_DEBUG, "%s: no SP found for id %u.\n", + __func__, id)); return key_senderror(so, m, EINVAL); } + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); key_unlink(sp); - KEY_FREESP(&sp); + if (sp->state != IPSEC_SPSTATE_DEAD) { + ipseclog((LOG_DEBUG, "%s: failed to delete SP with id %u.\n", + __func__, id)); + key_freesp(&sp); + return (key_senderror(so, m, EACCES)); + } + key_freesp(&sp); { struct mbuf *n, *nn; struct sadb_msg *newmsg; int off, len; /* create new sadb_msg to reply. */ len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); MGETHDR(n, M_NOWAIT, MT_DATA); if (n && len > MHLEN) { if (!(MCLGET(n, M_NOWAIT))) { m_freem(n); n = NULL; } } if (!n) return key_senderror(so, m, ENOBUFS); n->m_len = len; n->m_next = NULL; off = 0; m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); IPSEC_ASSERT(off == len, ("length inconsistency (off %u len %u)", off, len)); n->m_next = m_copym(m, mhp->extoff[SADB_X_EXT_POLICY], mhp->extlen[SADB_X_EXT_POLICY], M_NOWAIT); if (!n->m_next) { m_freem(n); return key_senderror(so, m, ENOBUFS); } n->m_pkthdr.len = 0; for (nn = n; nn; nn = nn->m_next) n->m_pkthdr.len += nn->m_len; newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } /* * SADB_X_SPDGET processing * receive * * from the user(?), * and send, * * to the ikmpd. * policy(*) including direction of policy. * * m will always be freed. */ static int key_spdget(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - u_int32_t id; struct secpolicy *sp; struct mbuf *n; + uint32_t id; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_X_EXT_POLICY] == NULL || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { + if (SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; + id = ((struct sadb_x_policy *) + mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; /* Is there SP in SPD ? */ if ((sp = key_getspbyid(id)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no SP found id:%u.\n", __func__, id)); + ipseclog((LOG_DEBUG, "%s: no SP found for id %u.\n", + __func__, id)); return key_senderror(so, m, ENOENT); } n = key_setdumpsp(sp, SADB_X_SPDGET, mhp->msg->sadb_msg_seq, mhp->msg->sadb_msg_pid); - KEY_FREESP(&sp); + key_freesp(&sp); if (n != NULL) { m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } else return key_senderror(so, m, ENOBUFS); } /* * SADB_X_SPDACQUIRE processing. * Acquire policy and SA(s) for a *OUTBOUND* packet. * send * * to KMD, and expect to receive * with SADB_X_SPDACQUIRE if error occurred, * or * * with SADB_X_SPDUPDATE from KMD by PF_KEY. * policy(*) is without policy requests. * * 0 : succeed * others: error number */ int key_spdacquire(struct secpolicy *sp) { struct mbuf *result = NULL, *m; struct secspacq *newspacq; IPSEC_ASSERT(sp != NULL, ("null secpolicy")); IPSEC_ASSERT(sp->req == NULL, ("policy exists")); IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC, ("policy not IPSEC %u", sp->policy)); /* Get an entry to check whether sent message or not. */ newspacq = key_getspacq(&sp->spidx); if (newspacq != NULL) { if (V_key_blockacq_count < newspacq->count) { /* reset counter and do send message. */ newspacq->count = 0; } else { /* increment counter and do nothing. */ newspacq->count++; SPACQ_UNLOCK(); return (0); } SPACQ_UNLOCK(); } else { /* make new entry for blocking to send SADB_ACQUIRE. */ newspacq = key_newspacq(&sp->spidx); if (newspacq == NULL) return ENOBUFS; } /* create new sadb_msg to reply. */ m = key_setsadbmsg(SADB_X_SPDACQUIRE, 0, 0, 0, 0, 0); if (!m) return ENOBUFS; result = m; result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); return key_sendup_mbuf(NULL, m, KEY_SENDUP_REGISTERED); } /* * SADB_SPDFLUSH processing * receive * * from the user, and free all entries in secpctree. * and send, * * to the user. * NOTE: what to do is only marking SADB_SASTATE_DEAD. * * m will always be freed. */ static int key_spdflush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - TAILQ_HEAD(, secpolicy) drainq; + struct secpolicy_queue drainq; struct sadb_msg *newmsg; struct secpolicy *sp, *nextsp; u_int dir; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); if (m->m_len != PFKEY_ALIGN8(sizeof(struct sadb_msg))) return key_senderror(so, m, EINVAL); TAILQ_INIT(&drainq); SPTREE_WLOCK(); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { TAILQ_CONCAT(&drainq, &V_sptree[dir], chain); } /* * We need to set state to DEAD for each policy to be sure, * that another thread won't try to unlink it. + * Also remove SP from sphash. */ - TAILQ_FOREACH(sp, &drainq, chain) + TAILQ_FOREACH(sp, &drainq, chain) { sp->state = IPSEC_SPSTATE_DEAD; + LIST_REMOVE(sp, idhash); + } + V_sp_genid++; SPTREE_WUNLOCK(); sp = TAILQ_FIRST(&drainq); while (sp != NULL) { nextsp = TAILQ_NEXT(sp, chain); - KEY_FREESP(&sp); + key_freesp(&sp); sp = nextsp; } if (sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return key_senderror(so, m, ENOBUFS); } if (m->m_next) m_freem(m->m_next); m->m_next = NULL; m->m_pkthdr.len = m->m_len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); newmsg = mtod(m, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len); return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } /* * SADB_SPDDUMP processing * receive * * from the user, and dump all SP leaves * and send, * ..... * to the ikmpd. * * m will always be freed. */ static int key_spddump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; + struct mbuf *n; int cnt; u_int dir; - struct mbuf *n; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* search SPD entry and get buffer size. */ cnt = 0; SPTREE_RLOCK(); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { TAILQ_FOREACH(sp, &V_sptree[dir], chain) { cnt++; } + TAILQ_FOREACH(sp, &V_sptree_ifnet[dir], chain) { + cnt++; + } } if (cnt == 0) { SPTREE_RUNLOCK(); return key_senderror(so, m, ENOENT); } for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { TAILQ_FOREACH(sp, &V_sptree[dir], chain) { --cnt; n = key_setdumpsp(sp, SADB_X_SPDDUMP, cnt, mhp->msg->sadb_msg_pid); if (n) key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } + TAILQ_FOREACH(sp, &V_sptree_ifnet[dir], chain) { + --cnt; + n = key_setdumpsp(sp, SADB_X_SPDDUMP, cnt, + mhp->msg->sadb_msg_pid); + + if (n) + key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + } } SPTREE_RUNLOCK(); m_freem(m); - return 0; + return (0); } static struct mbuf * key_setdumpsp(struct secpolicy *sp, u_int8_t type, u_int32_t seq, u_int32_t pid) { struct mbuf *result = NULL, *m; struct seclifetime lt; m = key_setsadbmsg(type, 0, SADB_SATYPE_UNSPEC, seq, pid, sp->refcnt); if (!m) goto fail; result = m; - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sp->spidx.src.sa, sp->spidx.prefs, sp->spidx.ul_proto); if (!m) goto fail; m_cat(result, m); m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &sp->spidx.dst.sa, sp->spidx.prefd, sp->spidx.ul_proto); if (!m) goto fail; m_cat(result, m); - m = key_sp2msg(sp); + m = key_sp2mbuf(sp); if (!m) goto fail; m_cat(result, m); if(sp->lifetime){ lt.addtime=sp->created; lt.usetime= sp->lastused; m = key_setlifetime(<, SADB_EXT_LIFETIME_CURRENT); if (!m) goto fail; m_cat(result, m); lt.addtime=sp->lifetime; lt.usetime= sp->validtime; m = key_setlifetime(<, SADB_EXT_LIFETIME_HARD); if (!m) goto fail; m_cat(result, m); } if ((result->m_flags & M_PKTHDR) == 0) goto fail; if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) goto fail; } result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); return result; fail: m_freem(result); return NULL; } - /* * get PFKEY message length for security policy and request. */ -static u_int +static size_t key_getspreqmsglen(struct secpolicy *sp) { - u_int tlen; + size_t tlen, len; + int i; tlen = sizeof(struct sadb_x_policy); - /* if is the policy for ipsec ? */ if (sp->policy != IPSEC_POLICY_IPSEC) - return tlen; + return (tlen); /* get length of ipsec requests */ - { - struct ipsecrequest *isr; - int len; - - for (isr = sp->req; isr != NULL; isr = isr->next) { + for (i = 0; i < sp->tcount; i++) { len = sizeof(struct sadb_x_ipsecrequest) - + isr->saidx.src.sa.sa_len - + isr->saidx.dst.sa.sa_len; + + sp->req[i]->saidx.src.sa.sa_len + + sp->req[i]->saidx.dst.sa.sa_len; tlen += PFKEY_ALIGN8(len); } - } - - return tlen; + return (tlen); } /* * SADB_SPDEXPIRE processing * send * * to KMD by PF_KEY. * * OUT: 0 : succeed * others : error number */ static int key_spdexpire(struct secpolicy *sp) { - struct mbuf *result = NULL, *m; - int len; - int error = -1; struct sadb_lifetime *lt; - - /* XXX: Why do we lock ? */ + struct mbuf *result = NULL, *m; + int len, error = -1; IPSEC_ASSERT(sp != NULL, ("null secpolicy")); + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); + /* set msg header */ m = key_setsadbmsg(SADB_X_SPDEXPIRE, 0, 0, 0, 0, 0); if (!m) { error = ENOBUFS; goto fail; } result = m; /* create lifetime extension (current and hard) */ len = PFKEY_ALIGN8(sizeof(*lt)) * 2; m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) { error = ENOBUFS; goto fail; } m_align(m, len); m->m_len = len; bzero(mtod(m, caddr_t), len); lt = mtod(m, struct sadb_lifetime *); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; lt->sadb_lifetime_allocations = 0; lt->sadb_lifetime_bytes = 0; lt->sadb_lifetime_addtime = sp->created; lt->sadb_lifetime_usetime = sp->lastused; lt = (struct sadb_lifetime *)(mtod(m, caddr_t) + len / 2); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lt->sadb_lifetime_allocations = 0; lt->sadb_lifetime_bytes = 0; lt->sadb_lifetime_addtime = sp->lifetime; lt->sadb_lifetime_usetime = sp->validtime; m_cat(result, m); - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ - /* set sadb_address for source */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sp->spidx.src.sa, sp->spidx.prefs, sp->spidx.ul_proto); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* set sadb_address for destination */ m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &sp->spidx.dst.sa, sp->spidx.prefd, sp->spidx.ul_proto); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* set secpolicy */ - m = key_sp2msg(sp); + m = key_sp2mbuf(sp); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); if ((result->m_flags & M_PKTHDR) == 0) { error = EINVAL; goto fail; } if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) { error = ENOBUFS; goto fail; } } result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); fail: if (result) m_freem(result); return error; } /* %%% SAD management */ /* - * allocating a memory for new SA head, and copy from the values of mhp. + * allocating and initialize new SA head. * OUT: NULL : failure due to the lack of memory. * others : pointer to new SA head. */ static struct secashead * key_newsah(struct secasindex *saidx) { - struct secashead *newsah; + struct secashead *sah; - IPSEC_ASSERT(saidx != NULL, ("null saidx")); + sah = malloc(sizeof(struct secashead), M_IPSEC_SAH, + M_NOWAIT | M_ZERO); + if (sah == NULL) { + PFKEYSTAT_INC(in_nomem); + return (NULL); + } + TAILQ_INIT(&sah->savtree_larval); + TAILQ_INIT(&sah->savtree_alive); + sah->saidx = *saidx; + sah->state = SADB_SASTATE_DEAD; + SAH_INITREF(sah); - newsah = malloc(sizeof(struct secashead), M_IPSEC_SAH, M_NOWAIT|M_ZERO); - if (newsah != NULL) { - int i; - for (i = 0; i < sizeof(newsah->savtree)/sizeof(newsah->savtree[0]); i++) - LIST_INIT(&newsah->savtree[i]); - newsah->saidx = *saidx; + KEYDBG(KEY_STAMP, + printf("%s: SAH(%p)\n", __func__, sah)); + KEYDBG(KEY_DATA, kdebug_secash(sah, NULL)); + return (sah); +} - /* add to saidxtree */ - newsah->state = SADB_SASTATE_MATURE; +static void +key_freesah(struct secashead **psah) +{ + struct secashead *sah = *psah; - SAHTREE_LOCK(); - LIST_INSERT_HEAD(&V_sahtree, newsah, chain); - SAHTREE_UNLOCK(); - } - return(newsah); + if (SAH_DELREF(sah) == 0) + return; + + KEYDBG(KEY_STAMP, + printf("%s: last reference to SAH(%p)\n", __func__, sah)); + KEYDBG(KEY_DATA, kdebug_secash(sah, NULL)); + + *psah = NULL; + key_delsah(sah); } -/* - * delete SA index and all SA registerd. - */ static void key_delsah(struct secashead *sah) { - struct secasvar *sav, *nextsav; - u_int stateidx; - int zombie = 0; - IPSEC_ASSERT(sah != NULL, ("NULL sah")); - SAHTREE_LOCK_ASSERT(); - - /* searching all SA registerd in the secindex. */ - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_any); - stateidx++) { - u_int state = saorder_state_any[stateidx]; - LIST_FOREACH_SAFE(sav, &sah->savtree[state], chain, nextsav) { - if (sav->refcnt == 0) { - /* sanity check */ - KEY_CHKSASTATE(state, sav->state, __func__); - /* - * do NOT call KEY_FREESAV here: - * it will only delete the sav if refcnt == 1, - * where we already know that refcnt == 0 - */ - key_delsav(sav); - } else { - /* give up to delete this sa */ - zombie++; - } - } - } - if (!zombie) { /* delete only if there are savs */ - /* remove from tree of SA index */ - if (__LIST_CHAINED(sah)) - LIST_REMOVE(sah, chain); - free(sah, M_IPSEC_SAH); - } + IPSEC_ASSERT(sah->state == SADB_SASTATE_DEAD, + ("Attempt to free non DEAD SAH %p", sah)); + IPSEC_ASSERT(TAILQ_EMPTY(&sah->savtree_larval), + ("Attempt to free SAH %p with LARVAL SA", sah)); + IPSEC_ASSERT(TAILQ_EMPTY(&sah->savtree_alive), + ("Attempt to free SAH %p with ALIVE SA", sah)); + + free(sah, M_IPSEC_SAH); } /* - * allocating a new SA with LARVAL state. key_add() and key_getspi() call, + * allocating a new SA for key_add() and key_getspi() call, * and copy the values of mhp into new buffer. - * When SAD message type is GETSPI: - * to set sequence number from acq_seq++, - * to set zero to SPI. - * not to call key_setsava(). + * When SAD message type is SADB_GETSPI set SA state to LARVAL. + * For SADB_ADD create and initialize SA with MATURE state. * OUT: NULL : fail * others : pointer to new secasvar. - * - * does not modify mbuf. does not free mbuf on error. */ static struct secasvar * -key_newsav(struct mbuf *m, const struct sadb_msghdr *mhp, - struct secashead *sah, int *errp, const char *where, int tag) +key_newsav(const struct sadb_msghdr *mhp, struct secasindex *saidx, + uint32_t spi, int *errp) { - struct secasvar *newsav; - const struct sadb_sa *xsa; + struct secashead *sah; + struct secasvar *sav; + int isnew; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - IPSEC_ASSERT(sah != NULL, ("null secashead")); - - newsav = malloc(sizeof(struct secasvar), M_IPSEC_SA, M_NOWAIT|M_ZERO); - if (newsav == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - *errp = ENOBUFS; - goto done; - } + IPSEC_ASSERT(mhp->msg->sadb_msg_type == SADB_GETSPI || + mhp->msg->sadb_msg_type == SADB_ADD, ("wrong message type")); - switch (mhp->msg->sadb_msg_type) { - case SADB_GETSPI: - newsav->spi = 0; - -#ifdef IPSEC_DOSEQCHECK - /* sync sequence number */ - if (mhp->msg->sadb_msg_seq == 0) - newsav->seq = - (V_acq_seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq)); - else -#endif - newsav->seq = mhp->msg->sadb_msg_seq; - break; - - case SADB_ADD: - /* sanity check */ - if (mhp->ext[SADB_EXT_SA] == NULL) { - free(newsav, M_IPSEC_SA); - newsav = NULL; - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + sav = NULL; + sah = NULL; + /* check SPI value */ + switch (saidx->proto) { + case IPPROTO_ESP: + case IPPROTO_AH: + /* + * RFC 4302, 2.4. Security Parameters Index (SPI), SPI values + * 1-255 reserved by IANA for future use, + * 0 for implementation specific, local use. + */ + if (ntohl(spi) <= 255) { + ipseclog((LOG_DEBUG, "%s: illegal range of SPI %u.\n", + __func__, ntohl(spi))); *errp = EINVAL; goto done; } - xsa = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; - newsav->spi = xsa->sadb_sa_spi; - newsav->seq = mhp->msg->sadb_msg_seq; break; - default: - free(newsav, M_IPSEC_SA); - newsav = NULL; - *errp = EINVAL; - goto done; } + sav = malloc(sizeof(struct secasvar), M_IPSEC_SA, M_NOWAIT | M_ZERO); + if (sav == NULL) { + *errp = ENOBUFS; + goto done; + } + sav->lock = malloc(sizeof(struct mtx), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); + if (sav->lock == NULL) { + *errp = ENOBUFS; + goto done; + } + mtx_init(sav->lock, "ipsec association", NULL, MTX_DEF); + sav->lft_c = uma_zalloc(V_key_lft_zone, M_NOWAIT); + if (sav->lft_c == NULL) { + *errp = ENOBUFS; + goto done; + } + counter_u64_zero(sav->lft_c_allocations); + counter_u64_zero(sav->lft_c_bytes); - /* copy sav values */ - if (mhp->msg->sadb_msg_type != SADB_GETSPI) { - *errp = key_setsaval(newsav, m, mhp); - if (*errp) { - free(newsav, M_IPSEC_SA); - newsav = NULL; + sav->spi = spi; + sav->seq = mhp->msg->sadb_msg_seq; + sav->state = SADB_SASTATE_LARVAL; + sav->pid = (pid_t)mhp->msg->sadb_msg_pid; + SAV_INITREF(sav); +again: + sah = key_getsah(saidx); + if (sah == NULL) { + /* create a new SA index */ + sah = key_newsah(saidx); + if (sah == NULL) { + ipseclog((LOG_DEBUG, + "%s: No more memory.\n", __func__)); + *errp = ENOBUFS; goto done; } - } - - SECASVAR_LOCK_INIT(newsav); - - /* reset created */ - newsav->created = time_second; - newsav->pid = mhp->msg->sadb_msg_pid; + isnew = 1; + } else + isnew = 0; - /* add to satree */ - newsav->sah = sah; - sa_initref(newsav); - newsav->state = SADB_SASTATE_LARVAL; + sav->sah = sah; + if (mhp->msg->sadb_msg_type == SADB_GETSPI) { + sav->created = time_second; + } else if (sav->state == SADB_SASTATE_LARVAL) { + /* + * Do not call key_setsaval() second time in case + * of `goto again`. We will have MATURE state. + */ + *errp = key_setsaval(sav, mhp); + if (*errp != 0) + goto done; + sav->state = SADB_SASTATE_MATURE; + } - SAHTREE_LOCK(); - LIST_INSERT_TAIL(&sah->savtree[SADB_SASTATE_LARVAL], newsav, - secasvar, chain); - SAHTREE_UNLOCK(); + SAHTREE_WLOCK(); + /* + * Check that existing SAH wasn't unlinked. + * Since we didn't hold the SAHTREE lock, it is possible, + * that callout handler or key_flush() or key_delete() could + * unlink this SAH. + */ + if (isnew == 0 && sah->state == SADB_SASTATE_DEAD) { + SAHTREE_WUNLOCK(); + key_freesah(&sah); /* reference from key_getsah() */ + goto again; + } + if (isnew != 0) { + /* + * Add new SAH into SADB. + * + * XXXAE: we can serialize key_add and key_getspi calls, so + * several threads will not fight in the race. + * Otherwise we should check under SAHTREE lock, that this + * SAH would not added twice. + */ + TAILQ_INSERT_HEAD(&V_sahtree, sah, chain); + /* Add new SAH into hash by addresses */ + LIST_INSERT_HEAD(SAHADDRHASH_HASH(saidx), sah, addrhash); + /* Now we are linked in the chain */ + sah->state = SADB_SASTATE_MATURE; + /* + * SAV references this new SAH. + * In case of existing SAH we reuse reference + * from key_getsah(). + */ + SAH_ADDREF(sah); + } + /* Link SAV with SAH */ + if (sav->state == SADB_SASTATE_MATURE) + TAILQ_INSERT_HEAD(&sah->savtree_alive, sav, chain); + else + TAILQ_INSERT_HEAD(&sah->savtree_larval, sav, chain); + /* Add SAV into SPI hash */ + LIST_INSERT_HEAD(SAVHASH_HASH(sav->spi), sav, spihash); + SAHTREE_WUNLOCK(); + *errp = 0; /* success */ done: - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u return SP:%p\n", __func__, - where, tag, newsav)); - - return newsav; + if (*errp != 0) { + if (sav != NULL) { + if (sav->lock != NULL) { + mtx_destroy(sav->lock); + free(sav->lock, M_IPSEC_MISC); + } + if (sav->lft_c != NULL) + uma_zfree(V_key_lft_zone, sav->lft_c); + free(sav, M_IPSEC_SA), sav = NULL; + } + if (sah != NULL) + key_freesah(&sah); + if (*errp == ENOBUFS) { + ipseclog((LOG_DEBUG, "%s: No more memory.\n", + __func__)); + PFKEYSTAT_INC(in_nomem); + } + } + return (sav); } /* * free() SA variable entry. */ static void key_cleansav(struct secasvar *sav) { + + if (sav->natt != NULL) { + free(sav->natt, M_IPSEC_MISC); + sav->natt = NULL; + } + if (sav->flags & SADB_X_EXT_F_CLONED) + return; /* * Cleanup xform state. Note that zeroize'ing causes the * keys to be cleared; otherwise we must do it ourself. */ if (sav->tdb_xform != NULL) { sav->tdb_xform->xf_zeroize(sav); sav->tdb_xform = NULL; } else { if (sav->key_auth != NULL) bzero(sav->key_auth->key_data, _KEYLEN(sav->key_auth)); if (sav->key_enc != NULL) bzero(sav->key_enc->key_data, _KEYLEN(sav->key_enc)); } if (sav->key_auth != NULL) { if (sav->key_auth->key_data != NULL) free(sav->key_auth->key_data, M_IPSEC_MISC); free(sav->key_auth, M_IPSEC_MISC); sav->key_auth = NULL; } if (sav->key_enc != NULL) { if (sav->key_enc->key_data != NULL) free(sav->key_enc->key_data, M_IPSEC_MISC); free(sav->key_enc, M_IPSEC_MISC); sav->key_enc = NULL; } - if (sav->sched) { - bzero(sav->sched, sav->schedlen); - free(sav->sched, M_IPSEC_MISC); - sav->sched = NULL; - } if (sav->replay != NULL) { if (sav->replay->bitmap != NULL) free(sav->replay->bitmap, M_IPSEC_MISC); free(sav->replay, M_IPSEC_MISC); sav->replay = NULL; } - if (sav->lft_c != NULL) { - free(sav->lft_c, M_IPSEC_MISC); - sav->lft_c = NULL; - } if (sav->lft_h != NULL) { free(sav->lft_h, M_IPSEC_MISC); sav->lft_h = NULL; } if (sav->lft_s != NULL) { free(sav->lft_s, M_IPSEC_MISC); sav->lft_s = NULL; } } /* * free() SA variable entry. */ static void key_delsav(struct secasvar *sav) { IPSEC_ASSERT(sav != NULL, ("null sav")); - IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", sav->refcnt)); + IPSEC_ASSERT(sav->state == SADB_SASTATE_DEAD, + ("attempt to free non DEAD SA %p", sav)); + IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", + sav->refcnt)); - /* remove from SA header */ - if (__LIST_CHAINED(sav)) - LIST_REMOVE(sav, chain); + /* + * SA must be unlinked from the chain and hashtbl. + * If SA was cloned, we leave all fields untouched, + * except NAT-T config. + */ key_cleansav(sav); - SECASVAR_LOCK_DESTROY(sav); + if ((sav->flags & SADB_X_EXT_F_CLONED) == 0) { + mtx_destroy(sav->lock); + free(sav->lock, M_IPSEC_MISC); + uma_zfree(V_key_lft_zone, sav->lft_c); + } free(sav, M_IPSEC_SA); } /* - * search SAD. + * search SAH. * OUT: * NULL : not found - * others : found, pointer to a SA. + * others : found, referenced pointer to a SAH. */ static struct secashead * key_getsah(struct secasindex *saidx) { + SAHTREE_RLOCK_TRACKER; struct secashead *sah; - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, saidx, CMP_REQID)) - break; + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE_REQID) != 0) { + SAH_ADDREF(sah); + break; + } } - SAHTREE_UNLOCK(); - - return sah; + SAHTREE_RUNLOCK(); + return (sah); } /* - * check not to be duplicated SPI. - * NOTE: this function is too slow due to searching all SAD. + * Check not to be duplicated SPI. * OUT: - * NULL : not found - * others : found, pointer to a SA. + * 0 : not found + * 1 : found SA with given SPI. */ -static struct secasvar * -key_checkspidup(struct secasindex *saidx, u_int32_t spi) +static int +key_checkspidup(uint32_t spi) { - struct secashead *sah; + SAHTREE_RLOCK_TRACKER; struct secasvar *sav; - /* check address family */ - if (saidx->src.sa.sa_family != saidx->dst.sa.sa_family) { - ipseclog((LOG_DEBUG, "%s: address family mismatched.\n", - __func__)); - return NULL; - } - - sav = NULL; - /* check all SAD */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (!key_ismyaddr((struct sockaddr *)&sah->saidx.dst)) - continue; - sav = key_getsavbyspi(sah, spi); - if (sav != NULL) + /* Assume SPI is in network byte order */ + SAHTREE_RLOCK(); + LIST_FOREACH(sav, SAVHASH_HASH(spi), spihash) { + if (sav->spi == spi) break; } - SAHTREE_UNLOCK(); - - return sav; + SAHTREE_RUNLOCK(); + return (sav != NULL); } /* - * search SAD litmited alive SA, protocol, SPI. + * Search SA by SPI. * OUT: * NULL : not found - * others : found, pointer to a SA. + * others : found, referenced pointer to a SA. */ static struct secasvar * -key_getsavbyspi(struct secashead *sah, u_int32_t spi) +key_getsavbyspi(uint32_t spi) { + SAHTREE_RLOCK_TRACKER; struct secasvar *sav; - u_int stateidx, state; - sav = NULL; - SAHTREE_LOCK_ASSERT(); - /* search all status */ - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_alive); - stateidx++) { - - state = saorder_state_alive[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - - /* sanity check */ - if (sav->state != state) { - ipseclog((LOG_DEBUG, "%s: " - "invalid sav->state (queue: %d SA: %d)\n", - __func__, state, sav->state)); - continue; - } - - if (sav->spi == spi) - return sav; - } + /* Assume SPI is in network byte order */ + SAHTREE_RLOCK(); + LIST_FOREACH(sav, SAVHASH_HASH(spi), spihash) { + if (sav->spi != spi) + continue; + SAV_ADDREF(sav); + break; } - - return NULL; + SAHTREE_RUNLOCK(); + return (sav); } -/* - * copy SA values from PF_KEY message except *SPI, SEQ, PID, STATE and TYPE*. - * You must update these if need. - * OUT: 0: success. - * !0: failure. - * - * does not modify mbuf. does not free mbuf on error. - */ static int -key_setsaval(struct secasvar *sav, struct mbuf *m, - const struct sadb_msghdr *mhp) +key_updatelifetimes(struct secasvar *sav, const struct sadb_msghdr *mhp) { - int error = 0; + struct seclifetime *lft_h, *lft_s, *tmp; + + /* Lifetime extension is optional, check that it is present. */ + if (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) { + /* + * In case of SADB_UPDATE we may need to change + * existing lifetimes. + */ + if (sav->state == SADB_SASTATE_MATURE) { + lft_h = lft_s = NULL; + goto reset; + } + return (0); + } + /* Both HARD and SOFT extensions must present */ + if ((SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); + return (EINVAL); + } + if (SADB_CHECKLEN(mhp, SADB_EXT_LIFETIME_HARD) || + SADB_CHECKLEN(mhp, SADB_EXT_LIFETIME_SOFT)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); + return (EINVAL); + } + lft_h = key_dup_lifemsg((const struct sadb_lifetime *) + mhp->ext[SADB_EXT_LIFETIME_HARD], M_IPSEC_MISC); + if (lft_h == NULL) { + PFKEYSTAT_INC(in_nomem); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } + lft_s = key_dup_lifemsg((const struct sadb_lifetime *) + mhp->ext[SADB_EXT_LIFETIME_SOFT], M_IPSEC_MISC); + if (lft_s == NULL) { + PFKEYSTAT_INC(in_nomem); + free(lft_h, M_IPSEC_MISC); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } +reset: + if (sav->state != SADB_SASTATE_LARVAL) { + /* + * key_update() holds reference to this SA, + * so it won't be deleted in meanwhile. + */ + SECASVAR_LOCK(sav); + tmp = sav->lft_h; + sav->lft_h = lft_h; + lft_h = tmp; + + tmp = sav->lft_s; + sav->lft_s = lft_s; + lft_s = tmp; + SECASVAR_UNLOCK(sav); + if (lft_h != NULL) + free(lft_h, M_IPSEC_MISC); + if (lft_s != NULL) + free(lft_s, M_IPSEC_MISC); + return (0); + } + /* We can update lifetime without holding a lock */ + IPSEC_ASSERT(sav->lft_h == NULL, ("lft_h is already initialized\n")); + IPSEC_ASSERT(sav->lft_s == NULL, ("lft_s is already initialized\n")); + sav->lft_h = lft_h; + sav->lft_s = lft_s; + return (0); +} + +/* + * copy SA values from PF_KEY message except *SPI, SEQ, PID and TYPE*. + * You must update these if need. Expects only LARVAL SAs. + * OUT: 0: success. + * !0: failure. + */ +static int +key_setsaval(struct secasvar *sav, const struct sadb_msghdr *mhp) +{ + const struct sadb_sa *sa0; + const struct sadb_key *key0; + uint32_t replay; + size_t len; + int error; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); + IPSEC_ASSERT(sav->state == SADB_SASTATE_LARVAL, + ("Attempt to update non LARVAL SA")); - /* initialization */ - sav->replay = NULL; - sav->key_auth = NULL; - sav->key_enc = NULL; - sav->sched = NULL; - sav->schedlen = 0; - sav->lft_c = NULL; - sav->lft_h = NULL; - sav->lft_s = NULL; - sav->tdb_xform = NULL; /* transform */ - sav->tdb_encalgxform = NULL; /* encoding algorithm */ - sav->tdb_authalgxform = NULL; /* authentication algorithm */ - sav->tdb_compalgxform = NULL; /* compression algorithm */ - /* Initialize even if NAT-T not compiled in: */ - sav->natt_type = 0; - sav->natt_esp_frag_len = 0; + /* XXX rewrite */ + error = key_setident(sav->sah, mhp); + if (error != 0) + goto fail; /* SA */ - if (mhp->ext[SADB_EXT_SA] != NULL) { - const struct sadb_sa *sa0; - u_int32_t replay; - - sa0 = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; - if (mhp->extlen[SADB_EXT_SA] < sizeof(*sa0)) { + if (!SADB_CHECKHDR(mhp, SADB_EXT_SA)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_SA)) { error = EINVAL; goto fail; } - + sa0 = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; sav->alg_auth = sa0->sadb_sa_auth; sav->alg_enc = sa0->sadb_sa_encrypt; sav->flags = sa0->sadb_sa_flags; + if ((sav->flags & SADB_KEY_FLAGS_MAX) != sav->flags) { + ipseclog((LOG_DEBUG, + "%s: invalid sa_flags 0x%08x.\n", __func__, + sav->flags)); + error = EINVAL; + goto fail; + } /* Optional replay window */ replay = 0; if ((sa0->sadb_sa_flags & SADB_X_EXT_OLD) == 0) replay = sa0->sadb_sa_replay; - if ((mhp->ext[SADB_X_EXT_SA_REPLAY]) != NULL) { + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_SA_REPLAY)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA_REPLAY)) { + error = EINVAL; + goto fail; + } replay = ((const struct sadb_x_sa_replay *) - mhp->ext[SADB_X_EXT_SA_REPLAY])->sadb_x_sa_replay_replay; + mhp->ext[SADB_X_EXT_SA_REPLAY])->sadb_x_sa_replay_replay; if (replay > UINT32_MAX - 32) { - ipseclog((LOG_DEBUG, "%s: replay window too big.\n", - __func__)); + ipseclog((LOG_DEBUG, + "%s: replay window too big.\n", __func__)); error = EINVAL; goto fail; } replay = (replay + 7) >> 3; } - sav->replay = (struct secreplay *) - malloc(sizeof(struct secreplay), - M_IPSEC_MISC, M_NOWAIT|M_ZERO); + sav->replay = malloc(sizeof(struct secreplay), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); if (sav->replay == NULL) { + PFKEYSTAT_INC(in_nomem); ipseclog((LOG_DEBUG, "%s: No more memory.\n", - __func__)); + __func__)); error = ENOBUFS; goto fail; } if (replay != 0) { /* number of 32b blocks to be allocated */ - u_int32_t bitmap_size; + uint32_t bitmap_size; /* RFC 6479: - * - the allocated replay window size must be a power of two - * - use an extra 32b block as a redundant window + * - the allocated replay window size must be + * a power of two. + * - use an extra 32b block as a redundant window. */ bitmap_size = 1; while (replay + 4 > bitmap_size) bitmap_size <<= 1; bitmap_size = bitmap_size / 4; - sav->replay->bitmap = malloc(bitmap_size*sizeof(u_int32_t), - M_IPSEC_MISC, M_NOWAIT|M_ZERO); + sav->replay->bitmap = malloc( + bitmap_size * sizeof(uint32_t), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); if (sav->replay->bitmap == NULL) { + PFKEYSTAT_INC(in_nomem); ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); error = ENOBUFS; goto fail; } sav->replay->bitmap_size = bitmap_size; sav->replay->wsize = replay; } } /* Authentication keys */ - if (mhp->ext[SADB_EXT_KEY_AUTH] != NULL) { - const struct sadb_key *key0; - int len; - - key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_AUTH]; - len = mhp->extlen[SADB_EXT_KEY_AUTH]; - - error = 0; - if (len < sizeof(*key0)) { + if (!SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_KEY_AUTH)) { error = EINVAL; goto fail; } + error = 0; + key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_AUTH]; + len = mhp->extlen[SADB_EXT_KEY_AUTH]; switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_AH: case SADB_SATYPE_ESP: case SADB_X_SATYPE_TCPSIGNATURE: if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) && sav->alg_auth != SADB_X_AALG_NULL) error = EINVAL; break; case SADB_X_SATYPE_IPCOMP: default: error = EINVAL; break; } if (error) { ipseclog((LOG_DEBUG, "%s: invalid key_auth values.\n", __func__)); goto fail; } - sav->key_auth = (struct seckey *)key_dup_keymsg(key0, len, - M_IPSEC_MISC); + sav->key_auth = key_dup_keymsg(key0, len, M_IPSEC_MISC); if (sav->key_auth == NULL ) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + PFKEYSTAT_INC(in_nomem); error = ENOBUFS; goto fail; } } /* Encryption key */ - if (mhp->ext[SADB_EXT_KEY_ENCRYPT] != NULL) { - const struct sadb_key *key0; - int len; - - key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_ENCRYPT]; - len = mhp->extlen[SADB_EXT_KEY_ENCRYPT]; - - error = 0; - if (len < sizeof(*key0)) { + if (!SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_KEY_ENCRYPT)) { error = EINVAL; goto fail; } + error = 0; + key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_ENCRYPT]; + len = mhp->extlen[SADB_EXT_KEY_ENCRYPT]; switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_ESP: if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) && sav->alg_enc != SADB_EALG_NULL) { error = EINVAL; break; } - sav->key_enc = (struct seckey *)key_dup_keymsg(key0, - len, - M_IPSEC_MISC); + sav->key_enc = key_dup_keymsg(key0, len, M_IPSEC_MISC); if (sav->key_enc == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + PFKEYSTAT_INC(in_nomem); error = ENOBUFS; goto fail; } break; case SADB_X_SATYPE_IPCOMP: if (len != PFKEY_ALIGN8(sizeof(struct sadb_key))) error = EINVAL; sav->key_enc = NULL; /*just in case*/ break; case SADB_SATYPE_AH: case SADB_X_SATYPE_TCPSIGNATURE: default: error = EINVAL; break; } if (error) { ipseclog((LOG_DEBUG, "%s: invalid key_enc value.\n", __func__)); goto fail; } } /* set iv */ sav->ivlen = 0; - switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_AH: - error = xform_init(sav, XF_AH); - break; - case SADB_SATYPE_ESP: - error = xform_init(sav, XF_ESP); - break; - case SADB_X_SATYPE_IPCOMP: - error = xform_init(sav, XF_IPCOMP); - break; - case SADB_X_SATYPE_TCPSIGNATURE: - error = xform_init(sav, XF_TCPSIGNATURE); - break; - } - if (error) { - ipseclog((LOG_DEBUG, "%s: unable to initialize SA type %u.\n", - __func__, mhp->msg->sadb_msg_satype)); - goto fail; - } - - /* reset created */ - sav->created = time_second; - - /* make lifetime for CURRENT */ - sav->lft_c = malloc(sizeof(struct seclifetime), M_IPSEC_MISC, M_NOWAIT); - if (sav->lft_c == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - error = ENOBUFS; - goto fail; - } - - sav->lft_c->allocations = 0; - sav->lft_c->bytes = 0; - sav->lft_c->addtime = time_second; - sav->lft_c->usetime = 0; - - /* lifetimes for HARD and SOFT */ - { - const struct sadb_lifetime *lft0; - - lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD]; - if (lft0 != NULL) { - if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) { + if (sav->flags & SADB_X_EXT_DERIV) { + ipseclog((LOG_DEBUG, "%s: invalid flag (derived) " + "given to AH SA.\n", __func__)); error = EINVAL; goto fail; } - sav->lft_h = key_dup_lifemsg(lft0, M_IPSEC_MISC); - if (sav->lft_h == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - error = ENOBUFS; - goto fail; - } - /* to be initialize ? */ - } - - lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_SOFT]; - if (lft0 != NULL) { - if (mhp->extlen[SADB_EXT_LIFETIME_SOFT] < sizeof(*lft0)) { + if (sav->alg_enc != SADB_EALG_NONE) { + ipseclog((LOG_DEBUG, "%s: protocol and algorithm " + "mismated.\n", __func__)); error = EINVAL; goto fail; } - sav->lft_s = key_dup_lifemsg(lft0, M_IPSEC_MISC); - if (sav->lft_s == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - error = ENOBUFS; - goto fail; - } - /* to be initialize ? */ - } - } - - return 0; - - fail: - /* initialization */ - key_cleansav(sav); - - return error; -} - -/* - * validation with a secasvar entry, and set SADB_SATYPE_MATURE. - * OUT: 0: valid - * other: errno - */ -static int -key_mature(struct secasvar *sav) -{ - int error; - - /* check SPI value */ - switch (sav->sah->saidx.proto) { - case IPPROTO_ESP: - case IPPROTO_AH: - /* - * RFC 4302, 2.4. Security Parameters Index (SPI), SPI values - * 1-255 reserved by IANA for future use, - * 0 for implementation specific, local use. - */ - if (ntohl(sav->spi) <= 255) { - ipseclog((LOG_DEBUG, "%s: illegal range of SPI %u.\n", - __func__, (u_int32_t)ntohl(sav->spi))); - return EINVAL; - } + error = xform_init(sav, XF_AH); break; - } - - /* check satype */ - switch (sav->sah->saidx.proto) { - case IPPROTO_ESP: - /* check flags */ - if ((sav->flags & (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) == - (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) { + case SADB_SATYPE_ESP: + if ((sav->flags & (SADB_X_EXT_OLD | SADB_X_EXT_DERIV)) == + (SADB_X_EXT_OLD | SADB_X_EXT_DERIV)) { ipseclog((LOG_DEBUG, "%s: invalid flag (derived) " - "given to old-esp.\n", __func__)); - return EINVAL; + "given to old-esp.\n", __func__)); + error = EINVAL; + goto fail; } error = xform_init(sav, XF_ESP); break; - case IPPROTO_AH: - /* check flags */ - if (sav->flags & SADB_X_EXT_DERIV) { - ipseclog((LOG_DEBUG, "%s: invalid flag (derived) " - "given to AH SA.\n", __func__)); - return EINVAL; - } - if (sav->alg_enc != SADB_EALG_NONE) { - ipseclog((LOG_DEBUG, "%s: protocol and algorithm " - "mismated.\n", __func__)); - return(EINVAL); - } - error = xform_init(sav, XF_AH); - break; - case IPPROTO_IPCOMP: + case SADB_X_SATYPE_IPCOMP: if (sav->alg_auth != SADB_AALG_NONE) { ipseclog((LOG_DEBUG, "%s: protocol and algorithm " - "mismated.\n", __func__)); - return(EINVAL); + "mismated.\n", __func__)); + error = EINVAL; + goto fail; } - if ((sav->flags & SADB_X_EXT_RAWCPI) == 0 - && ntohl(sav->spi) >= 0x10000) { + if ((sav->flags & SADB_X_EXT_RAWCPI) == 0 && + ntohl(sav->spi) >= 0x10000) { ipseclog((LOG_DEBUG, "%s: invalid cpi for IPComp.\n", - __func__)); - return(EINVAL); + __func__)); + error = EINVAL; + goto fail; } error = xform_init(sav, XF_IPCOMP); break; - case IPPROTO_TCP: + case SADB_X_SATYPE_TCPSIGNATURE: if (sav->alg_enc != SADB_EALG_NONE) { ipseclog((LOG_DEBUG, "%s: protocol and algorithm " - "mismated.\n", __func__)); - return(EINVAL); + "mismated.\n", __func__)); + error = EINVAL; + goto fail; } error = xform_init(sav, XF_TCPSIGNATURE); break; default: ipseclog((LOG_DEBUG, "%s: Invalid satype.\n", __func__)); error = EPROTONOSUPPORT; - break; + goto fail; } - if (error == 0) { - SAHTREE_LOCK(); - key_sa_chgstate(sav, SADB_SASTATE_MATURE); - SAHTREE_UNLOCK(); + if (error) { + ipseclog((LOG_DEBUG, "%s: unable to initialize SA type %u.\n", + __func__, mhp->msg->sadb_msg_satype)); + goto fail; } + + /* Handle NAT-T headers */ + error = key_setnatt(sav, mhp); + if (error != 0) + goto fail; + + /* Initialize lifetime for CURRENT */ + sav->firstused = 0; + sav->created = time_second; + + /* lifetimes for HARD and SOFT */ + error = key_updatelifetimes(sav, mhp); + if (error == 0) + return (0); +fail: + key_cleansav(sav); return (error); } /* * subroutine for SADB_GET and SADB_DUMP. */ static struct mbuf * -key_setdumpsa(struct secasvar *sav, u_int8_t type, u_int8_t satype, - u_int32_t seq, u_int32_t pid) +key_setdumpsa(struct secasvar *sav, uint8_t type, uint8_t satype, + uint32_t seq, uint32_t pid) { + struct seclifetime lft_c; struct mbuf *result = NULL, *tres = NULL, *m; - int i; - int dumporder[] = { + int i, dumporder[] = { SADB_EXT_SA, SADB_X_EXT_SA2, SADB_X_EXT_SA_REPLAY, SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT, SADB_EXT_LIFETIME_CURRENT, SADB_EXT_ADDRESS_SRC, - SADB_EXT_ADDRESS_DST, SADB_EXT_ADDRESS_PROXY, SADB_EXT_KEY_AUTH, - SADB_EXT_KEY_ENCRYPT, SADB_EXT_IDENTITY_SRC, - SADB_EXT_IDENTITY_DST, SADB_EXT_SENSITIVITY, -#ifdef IPSEC_NAT_T + SADB_EXT_ADDRESS_DST, SADB_EXT_ADDRESS_PROXY, + SADB_EXT_KEY_AUTH, SADB_EXT_KEY_ENCRYPT, + SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST, + SADB_EXT_SENSITIVITY, SADB_X_EXT_NAT_T_TYPE, SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NAT_T_FRAG, -#endif }; - u_int32_t replay_count; + uint32_t replay_count; m = key_setsadbmsg(type, 0, satype, seq, pid, sav->refcnt); if (m == NULL) goto fail; result = m; for (i = nitems(dumporder) - 1; i >= 0; i--) { m = NULL; switch (dumporder[i]) { case SADB_EXT_SA: m = key_setsadbsa(sav); if (!m) goto fail; break; case SADB_X_EXT_SA2: SECASVAR_LOCK(sav); replay_count = sav->replay ? sav->replay->count : 0; SECASVAR_UNLOCK(sav); m = key_setsadbxsa2(sav->sah->saidx.mode, replay_count, sav->sah->saidx.reqid); if (!m) goto fail; break; case SADB_X_EXT_SA_REPLAY: if (sav->replay == NULL || - sav->replay->wsize <= UINT8_MAX) + sav->replay->wsize <= UINT8_MAX) continue; m = key_setsadbxsareplay(sav->replay->wsize); if (!m) goto fail; break; case SADB_EXT_ADDRESS_SRC: m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sav->sah->saidx.src.sa, FULLMASK, IPSEC_ULPROTO_ANY); if (!m) goto fail; break; case SADB_EXT_ADDRESS_DST: m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &sav->sah->saidx.dst.sa, FULLMASK, IPSEC_ULPROTO_ANY); if (!m) goto fail; break; case SADB_EXT_KEY_AUTH: if (!sav->key_auth) continue; m = key_setkey(sav->key_auth, SADB_EXT_KEY_AUTH); if (!m) goto fail; break; case SADB_EXT_KEY_ENCRYPT: if (!sav->key_enc) continue; m = key_setkey(sav->key_enc, SADB_EXT_KEY_ENCRYPT); if (!m) goto fail; break; case SADB_EXT_LIFETIME_CURRENT: - if (!sav->lft_c) - continue; - m = key_setlifetime(sav->lft_c, - SADB_EXT_LIFETIME_CURRENT); + lft_c.addtime = sav->created; + lft_c.allocations = (uint32_t)counter_u64_fetch( + sav->lft_c_allocations); + lft_c.bytes = counter_u64_fetch(sav->lft_c_bytes); + lft_c.usetime = sav->firstused; + m = key_setlifetime(&lft_c, SADB_EXT_LIFETIME_CURRENT); if (!m) goto fail; break; case SADB_EXT_LIFETIME_HARD: if (!sav->lft_h) continue; m = key_setlifetime(sav->lft_h, SADB_EXT_LIFETIME_HARD); if (!m) goto fail; break; case SADB_EXT_LIFETIME_SOFT: if (!sav->lft_s) continue; m = key_setlifetime(sav->lft_s, SADB_EXT_LIFETIME_SOFT); if (!m) goto fail; break; -#ifdef IPSEC_NAT_T case SADB_X_EXT_NAT_T_TYPE: - m = key_setsadbxtype(sav->natt_type); + if (sav->natt == NULL) + continue; + m = key_setsadbxtype(UDP_ENCAP_ESPINUDP); if (!m) goto fail; break; - + case SADB_X_EXT_NAT_T_DPORT: - m = key_setsadbxport( - KEY_PORTFROMSADDR(&sav->sah->saidx.dst), + if (sav->natt == NULL) + continue; + m = key_setsadbxport(sav->natt->dport, SADB_X_EXT_NAT_T_DPORT); if (!m) goto fail; break; case SADB_X_EXT_NAT_T_SPORT: - m = key_setsadbxport( - KEY_PORTFROMSADDR(&sav->sah->saidx.src), + if (sav->natt == NULL) + continue; + m = key_setsadbxport(sav->natt->sport, SADB_X_EXT_NAT_T_SPORT); if (!m) goto fail; break; case SADB_X_EXT_NAT_T_OAI: + if (sav->natt == NULL || + (sav->natt->flags & IPSEC_NATT_F_OAI) == 0) + continue; + m = key_setsadbaddr(SADB_X_EXT_NAT_T_OAI, + &sav->natt->oai.sa, FULLMASK, IPSEC_ULPROTO_ANY); + if (!m) + goto fail; + break; case SADB_X_EXT_NAT_T_OAR: + if (sav->natt == NULL || + (sav->natt->flags & IPSEC_NATT_F_OAR) == 0) + continue; + m = key_setsadbaddr(SADB_X_EXT_NAT_T_OAR, + &sav->natt->oar.sa, FULLMASK, IPSEC_ULPROTO_ANY); + if (!m) + goto fail; + break; case SADB_X_EXT_NAT_T_FRAG: /* We do not (yet) support those. */ continue; -#endif case SADB_EXT_ADDRESS_PROXY: case SADB_EXT_IDENTITY_SRC: case SADB_EXT_IDENTITY_DST: /* XXX: should we brought from SPD ? */ case SADB_EXT_SENSITIVITY: default: continue; } if (!m) goto fail; if (tres) m_cat(m, tres); tres = m; - } m_cat(result, tres); tres = NULL; if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) goto fail; } result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); return result; fail: m_freem(result); m_freem(tres); return NULL; } /* * set data into sadb_msg. */ static struct mbuf * key_setsadbmsg(u_int8_t type, u_int16_t tlen, u_int8_t satype, u_int32_t seq, pid_t pid, u_int16_t reserved) { struct mbuf *m; struct sadb_msg *p; int len; len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); if (len > MCLBYTES) return NULL; MGETHDR(m, M_NOWAIT, MT_DATA); if (m && len > MHLEN) { if (!(MCLGET(m, M_NOWAIT))) { m_freem(m); m = NULL; } } if (!m) return NULL; m->m_pkthdr.len = m->m_len = len; m->m_next = NULL; p = mtod(m, struct sadb_msg *); bzero(p, len); p->sadb_msg_version = PF_KEY_V2; p->sadb_msg_type = type; p->sadb_msg_errno = 0; p->sadb_msg_satype = satype; p->sadb_msg_len = PFKEY_UNIT64(tlen); p->sadb_msg_reserved = reserved; p->sadb_msg_seq = seq; p->sadb_msg_pid = (u_int32_t)pid; return m; } /* * copy secasvar data into sadb_address. */ static struct mbuf * key_setsadbsa(struct secasvar *sav) { struct mbuf *m; struct sadb_sa *p; int len; len = PFKEY_ALIGN8(sizeof(struct sadb_sa)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_sa *); bzero(p, len); p->sadb_sa_len = PFKEY_UNIT64(len); p->sadb_sa_exttype = SADB_EXT_SA; p->sadb_sa_spi = sav->spi; p->sadb_sa_replay = sav->replay ? - (sav->replay->wsize > UINT8_MAX ? - UINT8_MAX : sav->replay->wsize) : 0; + (sav->replay->wsize > UINT8_MAX ? UINT8_MAX : + sav->replay->wsize): 0; p->sadb_sa_state = sav->state; p->sadb_sa_auth = sav->alg_auth; p->sadb_sa_encrypt = sav->alg_enc; - p->sadb_sa_flags = sav->flags; - - return m; + p->sadb_sa_flags = sav->flags & SADB_KEY_FLAGS_MAX; + return (m); } /* * set data into sadb_address. */ static struct mbuf * key_setsadbaddr(u_int16_t exttype, const struct sockaddr *saddr, u_int8_t prefixlen, u_int16_t ul_proto) { struct mbuf *m; struct sadb_address *p; size_t len; len = PFKEY_ALIGN8(sizeof(struct sadb_address)) + PFKEY_ALIGN8(saddr->sa_len); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_address *); bzero(p, len); p->sadb_address_len = PFKEY_UNIT64(len); p->sadb_address_exttype = exttype; p->sadb_address_proto = ul_proto; if (prefixlen == FULLMASK) { switch (saddr->sa_family) { case AF_INET: prefixlen = sizeof(struct in_addr) << 3; break; case AF_INET6: prefixlen = sizeof(struct in6_addr) << 3; break; default: ; /*XXX*/ } } p->sadb_address_prefixlen = prefixlen; p->sadb_address_reserved = 0; bcopy(saddr, mtod(m, caddr_t) + PFKEY_ALIGN8(sizeof(struct sadb_address)), saddr->sa_len); return m; } /* * set data into sadb_x_sa2. */ static struct mbuf * key_setsadbxsa2(u_int8_t mode, u_int32_t seq, u_int32_t reqid) { struct mbuf *m; struct sadb_x_sa2 *p; size_t len; len = PFKEY_ALIGN8(sizeof(struct sadb_x_sa2)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_x_sa2 *); bzero(p, len); p->sadb_x_sa2_len = PFKEY_UNIT64(len); p->sadb_x_sa2_exttype = SADB_X_EXT_SA2; p->sadb_x_sa2_mode = mode; p->sadb_x_sa2_reserved1 = 0; p->sadb_x_sa2_reserved2 = 0; p->sadb_x_sa2_sequence = seq; p->sadb_x_sa2_reqid = reqid; return m; } /* * Set data into sadb_x_sa_replay. */ static struct mbuf * key_setsadbxsareplay(u_int32_t replay) { struct mbuf *m; struct sadb_x_sa_replay *p; size_t len; len = PFKEY_ALIGN8(sizeof(struct sadb_x_sa_replay)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_x_sa_replay *); bzero(p, len); p->sadb_x_sa_replay_len = PFKEY_UNIT64(len); p->sadb_x_sa_replay_exttype = SADB_X_EXT_SA_REPLAY; p->sadb_x_sa_replay_replay = (replay << 3); return m; } -#ifdef IPSEC_NAT_T /* * Set a type in sadb_x_nat_t_type. */ static struct mbuf * key_setsadbxtype(u_int16_t type) { struct mbuf *m; size_t len; struct sadb_x_nat_t_type *p; len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_type)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_x_nat_t_type *); bzero(p, len); p->sadb_x_nat_t_type_len = PFKEY_UNIT64(len); p->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE; p->sadb_x_nat_t_type_type = type; return (m); } /* * Set a port in sadb_x_nat_t_port. * In contrast to default RFC 2367 behaviour, port is in network byte order. */ static struct mbuf * key_setsadbxport(u_int16_t port, u_int16_t type) { struct mbuf *m; size_t len; struct sadb_x_nat_t_port *p; len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_port)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_x_nat_t_port *); bzero(p, len); p->sadb_x_nat_t_port_len = PFKEY_UNIT64(len); p->sadb_x_nat_t_port_exttype = type; p->sadb_x_nat_t_port_port = port; return (m); } -/* +/* * Get port from sockaddr. Port is in network byte order. */ -u_int16_t +uint16_t key_portfromsaddr(struct sockaddr *sa) { switch (sa->sa_family) { #ifdef INET case AF_INET: return ((struct sockaddr_in *)sa)->sin_port; #endif #ifdef INET6 case AF_INET6: return ((struct sockaddr_in6 *)sa)->sin6_port; #endif } - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s unexpected address family %d\n", - __func__, sa->sa_family)); return (0); } -#endif /* IPSEC_NAT_T */ /* * Set port in struct sockaddr. Port is in network byte order. */ -static void -key_porttosaddr(struct sockaddr *sa, u_int16_t port) +void +key_porttosaddr(struct sockaddr *sa, uint16_t port) { switch (sa->sa_family) { #ifdef INET case AF_INET: ((struct sockaddr_in *)sa)->sin_port = port; break; #endif #ifdef INET6 case AF_INET6: ((struct sockaddr_in6 *)sa)->sin6_port = port; break; #endif default: ipseclog((LOG_DEBUG, "%s: unexpected address family %d.\n", __func__, sa->sa_family)); break; } } /* * set data into sadb_x_policy */ static struct mbuf * key_setsadbxpolicy(u_int16_t type, u_int8_t dir, u_int32_t id, u_int32_t priority) { struct mbuf *m; struct sadb_x_policy *p; size_t len; len = PFKEY_ALIGN8(sizeof(struct sadb_x_policy)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_x_policy *); bzero(p, len); p->sadb_x_policy_len = PFKEY_UNIT64(len); p->sadb_x_policy_exttype = SADB_X_EXT_POLICY; p->sadb_x_policy_type = type; p->sadb_x_policy_dir = dir; p->sadb_x_policy_id = id; p->sadb_x_policy_priority = priority; return m; } /* %%% utilities */ /* Take a key message (sadb_key) from the socket and turn it into one * of the kernel's key structures (seckey). * * IN: pointer to the src * OUT: NULL no more memory */ struct seckey * -key_dup_keymsg(const struct sadb_key *src, u_int len, +key_dup_keymsg(const struct sadb_key *src, size_t len, struct malloc_type *type) { struct seckey *dst; - dst = (struct seckey *)malloc(sizeof(struct seckey), type, M_NOWAIT); + + dst = malloc(sizeof(*dst), type, M_NOWAIT); if (dst != NULL) { dst->bits = src->sadb_key_bits; - dst->key_data = (char *)malloc(len, type, M_NOWAIT); + dst->key_data = malloc(len, type, M_NOWAIT); if (dst->key_data != NULL) { - bcopy((const char *)src + sizeof(struct sadb_key), - dst->key_data, len); + bcopy((const char *)(src + 1), dst->key_data, len); } else { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", - __func__)); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", + __func__)); free(dst, type); dst = NULL; } } else { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", - __func__)); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", + __func__)); } - return dst; + return (dst); } /* Take a lifetime message (sadb_lifetime) passed in on a socket and * turn it into one of the kernel's lifetime structures (seclifetime). * * IN: pointer to the destination, source and malloc type * OUT: NULL, no more memory */ static struct seclifetime * key_dup_lifemsg(const struct sadb_lifetime *src, struct malloc_type *type) { - struct seclifetime *dst = NULL; + struct seclifetime *dst; - dst = (struct seclifetime *)malloc(sizeof(struct seclifetime), - type, M_NOWAIT); + dst = malloc(sizeof(*dst), type, M_NOWAIT); if (dst == NULL) { - /* XXX counter */ ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - } else { - dst->allocations = src->sadb_lifetime_allocations; - dst->bytes = src->sadb_lifetime_bytes; - dst->addtime = src->sadb_lifetime_addtime; - dst->usetime = src->sadb_lifetime_usetime; - } - return dst; -} - -/* compare my own address - * OUT: 1: true, i.e. my address. - * 0: false - */ -int -key_ismyaddr(struct sockaddr *sa) -{ - - IPSEC_ASSERT(sa != NULL, ("null sockaddr")); - switch (sa->sa_family) { -#ifdef INET - case AF_INET: - return (in_localip(satosin(sa)->sin_addr)); -#endif -#ifdef INET6 - case AF_INET6: - return key_ismyaddr6((struct sockaddr_in6 *)sa); -#endif + return (NULL); } - - return 0; -} - -#ifdef INET6 -/* - * compare my own address for IPv6. - * 1: ours - * 0: other - */ -static int -key_ismyaddr6(struct sockaddr_in6 *sin6) -{ - struct in6_addr in6; - - if (!IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) - return (in6_localip(&sin6->sin6_addr)); - - /* Convert address into kernel-internal form */ - in6 = sin6->sin6_addr; - in6.s6_addr16[1] = htons(sin6->sin6_scope_id & 0xffff); - return (in6_localip(&in6)); + dst->allocations = src->sadb_lifetime_allocations; + dst->bytes = src->sadb_lifetime_bytes; + dst->addtime = src->sadb_lifetime_addtime; + dst->usetime = src->sadb_lifetime_usetime; + return (dst); } -#endif /*INET6*/ /* * compare two secasindex structure. * flag can specify to compare 2 saidxes. * compare two secasindex structure without both mode and reqid. * don't compare port. * IN: * saidx0: source, it can be in SAD. * saidx1: object. * OUT: * 1 : equal * 0 : not equal */ static int key_cmpsaidx(const struct secasindex *saidx0, const struct secasindex *saidx1, int flag) { - int chkport = 0; /* sanity */ if (saidx0 == NULL && saidx1 == NULL) return 1; if (saidx0 == NULL || saidx1 == NULL) return 0; if (saidx0->proto != saidx1->proto) return 0; if (flag == CMP_EXACTLY) { if (saidx0->mode != saidx1->mode) return 0; if (saidx0->reqid != saidx1->reqid) return 0; - if (bcmp(&saidx0->src, &saidx1->src, saidx0->src.sa.sa_len) != 0 || - bcmp(&saidx0->dst, &saidx1->dst, saidx0->dst.sa.sa_len) != 0) + if (bcmp(&saidx0->src, &saidx1->src, + saidx0->src.sa.sa_len) != 0 || + bcmp(&saidx0->dst, &saidx1->dst, + saidx0->dst.sa.sa_len) != 0) return 0; } else { /* CMP_MODE_REQID, CMP_REQID, CMP_HEAD */ - if (flag == CMP_MODE_REQID - ||flag == CMP_REQID) { + if (flag == CMP_MODE_REQID || flag == CMP_REQID) { /* * If reqid of SPD is non-zero, unique SA is required. * The result must be of same reqid in this case. */ - if (saidx1->reqid != 0 && saidx0->reqid != saidx1->reqid) + if (saidx1->reqid != 0 && + saidx0->reqid != saidx1->reqid) return 0; } if (flag == CMP_MODE_REQID) { if (saidx0->mode != IPSEC_MODE_ANY && saidx0->mode != saidx1->mode) return 0; } -#ifdef IPSEC_NAT_T - /* - * If NAT-T is enabled, check ports for tunnel mode. - * Do not check ports if they are set to zero in the SPD. - * Also do not do it for native transport mode, as there - * is no port information available in the SP. - */ - if ((saidx1->mode == IPSEC_MODE_TUNNEL || - (saidx1->mode == IPSEC_MODE_TRANSPORT && - saidx1->proto == IPPROTO_ESP)) && - saidx1->src.sa.sa_family == AF_INET && - saidx1->dst.sa.sa_family == AF_INET && - ((const struct sockaddr_in *)(&saidx1->src))->sin_port && - ((const struct sockaddr_in *)(&saidx1->dst))->sin_port) - chkport = 1; -#endif /* IPSEC_NAT_T */ - - if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, chkport) != 0) { + if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, 0) != 0) return 0; - } - if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, chkport) != 0) { + if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, 0) != 0) return 0; - } } return 1; } /* * compare two secindex structure exactly. * IN: * spidx0: source, it is often in SPD. * spidx1: object, it is often from PFKEY message. * OUT: * 1 : equal * 0 : not equal */ static int key_cmpspidx_exactly(struct secpolicyindex *spidx0, struct secpolicyindex *spidx1) { /* sanity */ if (spidx0 == NULL && spidx1 == NULL) return 1; if (spidx0 == NULL || spidx1 == NULL) return 0; if (spidx0->prefs != spidx1->prefs || spidx0->prefd != spidx1->prefd || spidx0->ul_proto != spidx1->ul_proto) return 0; return key_sockaddrcmp(&spidx0->src.sa, &spidx1->src.sa, 1) == 0 && key_sockaddrcmp(&spidx0->dst.sa, &spidx1->dst.sa, 1) == 0; } /* * compare two secindex structure with mask. * IN: * spidx0: source, it is often in SPD. * spidx1: object, it is often from IP header. * OUT: * 1 : equal * 0 : not equal */ static int key_cmpspidx_withmask(struct secpolicyindex *spidx0, struct secpolicyindex *spidx1) { /* sanity */ if (spidx0 == NULL && spidx1 == NULL) return 1; if (spidx0 == NULL || spidx1 == NULL) return 0; if (spidx0->src.sa.sa_family != spidx1->src.sa.sa_family || spidx0->dst.sa.sa_family != spidx1->dst.sa.sa_family || spidx0->src.sa.sa_len != spidx1->src.sa.sa_len || spidx0->dst.sa.sa_len != spidx1->dst.sa.sa_len) return 0; /* if spidx.ul_proto == IPSEC_ULPROTO_ANY, ignore. */ if (spidx0->ul_proto != (u_int16_t)IPSEC_ULPROTO_ANY && spidx0->ul_proto != spidx1->ul_proto) return 0; switch (spidx0->src.sa.sa_family) { case AF_INET: if (spidx0->src.sin.sin_port != IPSEC_PORT_ANY && spidx0->src.sin.sin_port != spidx1->src.sin.sin_port) return 0; if (!key_bbcmp(&spidx0->src.sin.sin_addr, &spidx1->src.sin.sin_addr, spidx0->prefs)) return 0; break; case AF_INET6: if (spidx0->src.sin6.sin6_port != IPSEC_PORT_ANY && spidx0->src.sin6.sin6_port != spidx1->src.sin6.sin6_port) return 0; /* * scope_id check. if sin6_scope_id is 0, we regard it * as a wildcard scope, which matches any scope zone ID. */ if (spidx0->src.sin6.sin6_scope_id && spidx1->src.sin6.sin6_scope_id && spidx0->src.sin6.sin6_scope_id != spidx1->src.sin6.sin6_scope_id) return 0; if (!key_bbcmp(&spidx0->src.sin6.sin6_addr, &spidx1->src.sin6.sin6_addr, spidx0->prefs)) return 0; break; default: /* XXX */ if (bcmp(&spidx0->src, &spidx1->src, spidx0->src.sa.sa_len) != 0) return 0; break; } switch (spidx0->dst.sa.sa_family) { case AF_INET: if (spidx0->dst.sin.sin_port != IPSEC_PORT_ANY && spidx0->dst.sin.sin_port != spidx1->dst.sin.sin_port) return 0; if (!key_bbcmp(&spidx0->dst.sin.sin_addr, &spidx1->dst.sin.sin_addr, spidx0->prefd)) return 0; break; case AF_INET6: if (spidx0->dst.sin6.sin6_port != IPSEC_PORT_ANY && spidx0->dst.sin6.sin6_port != spidx1->dst.sin6.sin6_port) return 0; /* * scope_id check. if sin6_scope_id is 0, we regard it * as a wildcard scope, which matches any scope zone ID. */ if (spidx0->dst.sin6.sin6_scope_id && spidx1->dst.sin6.sin6_scope_id && spidx0->dst.sin6.sin6_scope_id != spidx1->dst.sin6.sin6_scope_id) return 0; if (!key_bbcmp(&spidx0->dst.sin6.sin6_addr, &spidx1->dst.sin6.sin6_addr, spidx0->prefd)) return 0; break; default: /* XXX */ if (bcmp(&spidx0->dst, &spidx1->dst, spidx0->dst.sa.sa_len) != 0) return 0; break; } /* XXX Do we check other field ? e.g. flowinfo */ return 1; } -/* returns 0 on match */ -static int -key_sockaddrcmp(const struct sockaddr *sa1, const struct sockaddr *sa2, - int port) -{ #ifdef satosin #undef satosin #endif #define satosin(s) ((const struct sockaddr_in *)s) #ifdef satosin6 #undef satosin6 #endif #define satosin6(s) ((const struct sockaddr_in6 *)s) +/* returns 0 on match */ +int +key_sockaddrcmp(const struct sockaddr *sa1, const struct sockaddr *sa2, + int port) +{ if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len) return 1; switch (sa1->sa_family) { +#ifdef INET case AF_INET: if (sa1->sa_len != sizeof(struct sockaddr_in)) return 1; if (satosin(sa1)->sin_addr.s_addr != satosin(sa2)->sin_addr.s_addr) { return 1; } if (port && satosin(sa1)->sin_port != satosin(sa2)->sin_port) return 1; break; +#endif +#ifdef INET6 case AF_INET6: if (sa1->sa_len != sizeof(struct sockaddr_in6)) return 1; /*EINVAL*/ if (satosin6(sa1)->sin6_scope_id != satosin6(sa2)->sin6_scope_id) { return 1; } if (!IN6_ARE_ADDR_EQUAL(&satosin6(sa1)->sin6_addr, &satosin6(sa2)->sin6_addr)) { return 1; } if (port && satosin6(sa1)->sin6_port != satosin6(sa2)->sin6_port) { return 1; } break; +#endif default: if (bcmp(sa1, sa2, sa1->sa_len) != 0) return 1; break; } return 0; -#undef satosin -#undef satosin6 } -/* - * compare two buffers with mask. - * IN: - * addr1: source - * addr2: object - * bits: Number of bits to compare - * OUT: - * 1 : equal - * 0 : not equal - */ -static int -key_bbcmp(const void *a1, const void *a2, u_int bits) +/* returns 0 on match */ +int +key_sockaddrcmp_withmask(const struct sockaddr *sa1, + const struct sockaddr *sa2, size_t mask) { - const unsigned char *p1 = a1; + if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len) + return (1); + + switch (sa1->sa_family) { +#ifdef INET + case AF_INET: + return (!key_bbcmp(&satosin(sa1)->sin_addr, + &satosin(sa2)->sin_addr, mask)); +#endif +#ifdef INET6 + case AF_INET6: + if (satosin6(sa1)->sin6_scope_id != + satosin6(sa2)->sin6_scope_id) + return (1); + return (!key_bbcmp(&satosin6(sa1)->sin6_addr, + &satosin6(sa2)->sin6_addr, mask)); +#endif + } + return (1); +} +#undef satosin +#undef satosin6 + +/* + * compare two buffers with mask. + * IN: + * addr1: source + * addr2: object + * bits: Number of bits to compare + * OUT: + * 1 : equal + * 0 : not equal + */ +static int +key_bbcmp(const void *a1, const void *a2, u_int bits) +{ + const unsigned char *p1 = a1; const unsigned char *p2 = a2; /* XXX: This could be considerably faster if we compare a word * at a time, but it is complicated on LSB Endian machines */ /* Handle null pointers */ if (p1 == NULL || p2 == NULL) return (p1 == p2); while (bits >= 8) { if (*p1++ != *p2++) return 0; bits -= 8; } if (bits > 0) { u_int8_t mask = ~((1<<(8-bits))-1); if ((*p1 & mask) != (*p2 & mask)) return 0; } return 1; /* Match! */ } static void key_flush_spd(time_t now) { SPTREE_RLOCK_TRACKER; - struct secpolicy *sp; + struct secpolicy_list drainq; + struct secpolicy *sp, *nextsp; u_int dir; - /* SPD */ + LIST_INIT(&drainq); + SPTREE_RLOCK(); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { -restart: - SPTREE_RLOCK(); TAILQ_FOREACH(sp, &V_sptree[dir], chain) { if (sp->lifetime == 0 && sp->validtime == 0) continue; if ((sp->lifetime && now - sp->created > sp->lifetime) || (sp->validtime && now - sp->lastused > sp->validtime)) { + /* Hold extra reference to send SPDEXPIRE */ SP_ADDREF(sp); - SPTREE_RUNLOCK(); - key_spdexpire(sp); - key_unlink(sp); - KEY_FREESP(&sp); - goto restart; + LIST_INSERT_HEAD(&drainq, sp, drainq); } } - SPTREE_RUNLOCK(); + } + SPTREE_RUNLOCK(); + if (LIST_EMPTY(&drainq)) + return; + + SPTREE_WLOCK(); + sp = LIST_FIRST(&drainq); + while (sp != NULL) { + nextsp = LIST_NEXT(sp, drainq); + /* Check that SP is still linked */ + if (sp->state != IPSEC_SPSTATE_ALIVE) { + LIST_REMOVE(sp, drainq); + key_freesp(&sp); /* release extra reference */ + sp = nextsp; + continue; + } + TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); + LIST_REMOVE(sp, idhash); + sp->state = IPSEC_SPSTATE_DEAD; + sp = nextsp; + } + V_sp_genid++; + SPTREE_WUNLOCK(); + + sp = LIST_FIRST(&drainq); + while (sp != NULL) { + nextsp = LIST_NEXT(sp, drainq); + key_spdexpire(sp); + key_freesp(&sp); /* release extra reference */ + key_freesp(&sp); /* release last reference */ + sp = nextsp; } } static void key_flush_sad(time_t now) { + SAHTREE_RLOCK_TRACKER; + struct secashead_list emptyq; + struct secasvar_list drainq, hexpireq, sexpireq, freeq; struct secashead *sah, *nextsah; struct secasvar *sav, *nextsav; - /* SAD */ - SAHTREE_LOCK(); - LIST_FOREACH_SAFE(sah, &V_sahtree, chain, nextsah) { - /* if sah has been dead, then delete it and process next sah. */ - if (sah->state == SADB_SASTATE_DEAD) { - key_delsah(sah); + LIST_INIT(&drainq); + LIST_INIT(&hexpireq); + LIST_INIT(&sexpireq); + LIST_INIT(&emptyq); + + SAHTREE_RLOCK(); + TAILQ_FOREACH(sah, &V_sahtree, chain) { + /* Check for empty SAH */ + if (TAILQ_EMPTY(&sah->savtree_larval) && + TAILQ_EMPTY(&sah->savtree_alive)) { + SAH_ADDREF(sah); + LIST_INSERT_HEAD(&emptyq, sah, drainq); continue; } - - /* if LARVAL entry doesn't become MATURE, delete it. */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_LARVAL], chain, nextsav) { - /* Need to also check refcnt for a larval SA ??? */ - if (now - sav->created > V_key_larval_lifetime) - KEY_FREESAV(&sav); + /* Add all stale LARVAL SAs into drainq */ + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + if (now - sav->created < V_key_larval_lifetime) + continue; + SAV_ADDREF(sav); + LIST_INSERT_HEAD(&drainq, sav, drainq); } - - /* - * check MATURE entry to start to send expire message - * whether or not. - */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_MATURE], chain, nextsav) { - /* we don't need to check. */ - if (sav->lft_s == NULL) + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + /* lifetimes aren't specified */ + if (sav->lft_h == NULL) continue; - - /* sanity check */ - if (sav->lft_c == NULL) { - ipseclog((LOG_DEBUG,"%s: there is no CURRENT " - "time, why?\n", __func__)); + SECASVAR_LOCK(sav); + /* + * Check again with lock held, because it may + * be updated by SADB_UPDATE. + */ + if (sav->lft_h == NULL) { + SECASVAR_UNLOCK(sav); continue; } /* * RFC 2367: * HARD lifetimes MUST take precedence over SOFT * lifetimes, meaning if the HARD and SOFT lifetimes * are the same, the HARD lifetime will appear on the * EXPIRE message. */ /* check HARD lifetime */ if ((sav->lft_h->addtime != 0 && now - sav->created > sav->lft_h->addtime) || - (sav->lft_h->bytes != 0 && - sav->lft_h->bytes < sav->lft_c->bytes)) { - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - key_expire(sav, 1); - KEY_FREESAV(&sav); + (sav->lft_h->usetime != 0 && sav->firstused && + now - sav->firstused > sav->lft_h->usetime) || + (sav->lft_h->bytes != 0 && counter_u64_fetch( + sav->lft_c_bytes) > sav->lft_h->bytes)) { + SECASVAR_UNLOCK(sav); + SAV_ADDREF(sav); + LIST_INSERT_HEAD(&hexpireq, sav, drainq); + continue; } - /* check SOFT lifetime */ - else if ((sav->lft_s->addtime != 0 && + /* check SOFT lifetime (only for MATURE SAs) */ + if (sav->state == SADB_SASTATE_MATURE && ( + (sav->lft_s->addtime != 0 && now - sav->created > sav->lft_s->addtime) || - (sav->lft_s->bytes != 0 && - sav->lft_s->bytes < sav->lft_c->bytes)) { - key_sa_chgstate(sav, SADB_SASTATE_DYING); - key_expire(sav, 0); - } - } - - /* check DYING entry to change status to DEAD. */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_DYING], chain, nextsav) { - /* we don't need to check. */ - if (sav->lft_h == NULL) - continue; - - /* sanity check */ - if (sav->lft_c == NULL) { - ipseclog((LOG_DEBUG, "%s: there is no CURRENT " - "time, why?\n", __func__)); + (sav->lft_s->usetime != 0 && sav->firstused && + now - sav->firstused > sav->lft_s->usetime) || + (sav->lft_s->bytes != 0 && counter_u64_fetch( + sav->lft_c_bytes) > sav->lft_s->bytes))) { + SECASVAR_UNLOCK(sav); + SAV_ADDREF(sav); + LIST_INSERT_HEAD(&sexpireq, sav, drainq); continue; } + SECASVAR_UNLOCK(sav); + } + } + SAHTREE_RUNLOCK(); - if (sav->lft_h->addtime != 0 && - now - sav->created > sav->lft_h->addtime) { - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - key_expire(sav, 1); - KEY_FREESAV(&sav); - } -#if 0 /* XXX Should we keep to send expire message until HARD lifetime ? */ - else if (sav->lft_s != NULL - && sav->lft_s->addtime != 0 - && now - sav->created > sav->lft_s->addtime) { - /* - * XXX: should be checked to be - * installed the valid SA. - */ + if (LIST_EMPTY(&emptyq) && LIST_EMPTY(&drainq) && + LIST_EMPTY(&hexpireq) && LIST_EMPTY(&sexpireq)) + return; - /* - * If there is no SA then sending - * expire message. - */ - key_expire(sav, 0); - } -#endif - /* check HARD lifetime by bytes */ - else if (sav->lft_h->bytes != 0 && - sav->lft_h->bytes < sav->lft_c->bytes) { - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - key_expire(sav, 1); - KEY_FREESAV(&sav); - } + LIST_INIT(&freeq); + SAHTREE_WLOCK(); + /* Unlink stale LARVAL SAs */ + sav = LIST_FIRST(&drainq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + /* Check that SA is still LARVAL */ + if (sav->state != SADB_SASTATE_LARVAL) { + LIST_REMOVE(sav, drainq); + LIST_INSERT_HEAD(&freeq, sav, drainq); + sav = nextsav; + continue; } - - /* delete entry in DEAD */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_DEAD], chain, nextsav) { - /* sanity check */ - if (sav->state != SADB_SASTATE_DEAD) { - ipseclog((LOG_DEBUG, "%s: invalid sav->state " - "(queue: %d SA: %d): kill it anyway\n", - __func__, - SADB_SASTATE_DEAD, sav->state)); - } - /* - * do not call key_freesav() here. - * sav should already be freed, and sav->refcnt - * shows other references to sav - * (such as from SPD). - */ + TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + sav = nextsav; + } + /* Unlink all SAs with expired HARD lifetime */ + sav = LIST_FIRST(&hexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + /* Check that SA is not unlinked */ + if (sav->state == SADB_SASTATE_DEAD) { + LIST_REMOVE(sav, drainq); + LIST_INSERT_HEAD(&freeq, sav, drainq); + sav = nextsav; + continue; + } + TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + sav = nextsav; + } + /* Mark all SAs with expired SOFT lifetime as DYING */ + sav = LIST_FIRST(&sexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + /* Check that SA is not unlinked */ + if (sav->state == SADB_SASTATE_DEAD) { + LIST_REMOVE(sav, drainq); + LIST_INSERT_HEAD(&freeq, sav, drainq); + sav = nextsav; + continue; + } + /* + * NOTE: this doesn't change SA order in the chain. + */ + sav->state = SADB_SASTATE_DYING; + sav = nextsav; + } + /* Unlink empty SAHs */ + sah = LIST_FIRST(&emptyq); + while (sah != NULL) { + nextsah = LIST_NEXT(sah, drainq); + /* Check that SAH is still empty and not unlinked */ + if (sah->state == SADB_SASTATE_DEAD || + !TAILQ_EMPTY(&sah->savtree_larval) || + !TAILQ_EMPTY(&sah->savtree_alive)) { + LIST_REMOVE(sah, drainq); + key_freesah(&sah); /* release extra reference */ + sah = nextsah; + continue; } + TAILQ_REMOVE(&V_sahtree, sah, chain); + LIST_REMOVE(sah, addrhash); + sah->state = SADB_SASTATE_DEAD; + sah = nextsah; + } + SAHTREE_WUNLOCK(); + + /* Send SPDEXPIRE messages */ + sav = LIST_FIRST(&hexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_expire(sav, 1); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release extra reference */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; + } + sav = LIST_FIRST(&sexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_expire(sav, 0); + key_freesav(&sav); /* release extra reference */ + sav = nextsav; + } + /* Free stale LARVAL SAs */ + sav = LIST_FIRST(&drainq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release extra reference */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; + } + /* Free SAs that were unlinked/changed by someone else */ + sav = LIST_FIRST(&freeq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_freesav(&sav); /* release extra reference */ + sav = nextsav; + } + /* Free empty SAH */ + sah = LIST_FIRST(&emptyq); + while (sah != NULL) { + nextsah = LIST_NEXT(sah, drainq); + key_freesah(&sah); /* release extra reference */ + key_freesah(&sah); /* release last reference */ + sah = nextsah; } - SAHTREE_UNLOCK(); } static void key_flush_acq(time_t now) { struct secacq *acq, *nextacq; /* ACQ tree */ ACQ_LOCK(); - for (acq = LIST_FIRST(&V_acqtree); acq != NULL; acq = nextacq) { + acq = LIST_FIRST(&V_acqtree); + while (acq != NULL) { nextacq = LIST_NEXT(acq, chain); - if (now - acq->created > V_key_blockacq_lifetime - && __LIST_CHAINED(acq)) { + if (now - acq->created > V_key_blockacq_lifetime) { LIST_REMOVE(acq, chain); + LIST_REMOVE(acq, addrhash); + LIST_REMOVE(acq, seqhash); free(acq, M_IPSEC_SAQ); } + acq = nextacq; } ACQ_UNLOCK(); } static void key_flush_spacq(time_t now) { struct secspacq *acq, *nextacq; /* SP ACQ tree */ SPACQ_LOCK(); for (acq = LIST_FIRST(&V_spacqtree); acq != NULL; acq = nextacq) { nextacq = LIST_NEXT(acq, chain); if (now - acq->created > V_key_blockacq_lifetime && __LIST_CHAINED(acq)) { LIST_REMOVE(acq, chain); free(acq, M_IPSEC_SAQ); } } SPACQ_UNLOCK(); } /* * time handler. * scanning SPD and SAD to check status for each entries, * and do to remove or to expire. * XXX: year 2038 problem may remain. */ static void key_timehandler(void *arg) { VNET_ITERATOR_DECL(vnet_iter); time_t now = time_second; VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); key_flush_spd(now); key_flush_sad(now); key_flush_acq(now); key_flush_spacq(now); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); #ifndef IPSEC_DEBUG2 /* do exchange to tick time !! */ callout_schedule(&key_timer, hz); #endif /* IPSEC_DEBUG2 */ } u_long key_random() { u_long value; key_randomfill(&value, sizeof(value)); return value; } void key_randomfill(void *p, size_t l) { size_t n; u_long v; static int warn = 1; n = 0; n = (size_t)read_random(p, (u_int)l); /* last resort */ while (n < l) { v = random(); bcopy(&v, (u_int8_t *)p + n, l - n < sizeof(v) ? l - n : sizeof(v)); n += sizeof(v); if (warn) { printf("WARNING: pseudo-random number generator " "used for IPsec processing\n"); warn = 0; } } } /* * map SADB_SATYPE_* to IPPROTO_*. * if satype == SADB_SATYPE then satype is mapped to ~0. * OUT: * 0: invalid satype. */ -static u_int16_t -key_satype2proto(u_int8_t satype) +static uint8_t +key_satype2proto(uint8_t satype) { switch (satype) { case SADB_SATYPE_UNSPEC: return IPSEC_PROTO_ANY; case SADB_SATYPE_AH: return IPPROTO_AH; case SADB_SATYPE_ESP: return IPPROTO_ESP; case SADB_X_SATYPE_IPCOMP: return IPPROTO_IPCOMP; case SADB_X_SATYPE_TCPSIGNATURE: return IPPROTO_TCP; default: return 0; } /* NOTREACHED */ } /* * map IPPROTO_* to SADB_SATYPE_* * OUT: * 0: invalid protocol type. */ -static u_int8_t -key_proto2satype(u_int16_t proto) +static uint8_t +key_proto2satype(uint8_t proto) { switch (proto) { case IPPROTO_AH: return SADB_SATYPE_AH; case IPPROTO_ESP: return SADB_SATYPE_ESP; case IPPROTO_IPCOMP: return SADB_X_SATYPE_IPCOMP; case IPPROTO_TCP: return SADB_X_SATYPE_TCPSIGNATURE; default: return 0; } /* NOTREACHED */ } /* %%% PF_KEY */ /* * SADB_GETSPI processing is to receive * * from the IKMPd, to assign a unique spi value, to hang on the INBOUND * tree with the status of LARVAL, and send * * to the IKMPd. * * IN: mhp: pointer to the pointer to each header. * OUT: NULL if fail. * other if success, return pointer to the message to send. */ static int key_getspi(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct sadb_address *src0, *dst0; struct secasindex saidx; - struct secashead *newsah; - struct secasvar *newsav; - u_int8_t proto; - u_int32_t spi; - u_int8_t mode; - u_int32_t reqid; + struct sadb_address *src0, *dst0; + struct secasvar *sav; + uint32_t reqid, spi; int error; + uint8_t mode, proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) +#ifdef PFKEY_STRICT_CHECKS + || SADB_CHECKHDR(mhp, SADB_EXT_SPIRANGE) +#endif + ) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); + error = EINVAL; + goto fail; } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) +#ifdef PFKEY_STRICT_CHECKS + || SADB_CHECKLEN(mhp, SADB_EXT_SPIRANGE) +#endif + ) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); + error = EINVAL; + goto fail; } - if (mhp->ext[SADB_X_EXT_SA2] != NULL) { - mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; - reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; - } else { + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { mode = IPSEC_MODE_ANY; reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + error = EINVAL; + goto fail; + } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); - return key_senderror(so, m, EINVAL); - } - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - switch (((struct sockaddr *)(src0 + 1))->sa_family) { - case AF_INET: - if (((struct sockaddr *)(src0 + 1))->sa_len != - sizeof(struct sockaddr_in)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in *)(src0 + 1))->sin_port = 0; - break; - case AF_INET6: - if (((struct sockaddr *)(src0 + 1))->sa_len != - sizeof(struct sockaddr_in6)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in6 *)(src0 + 1))->sin6_port = 0; - break; - default: - ; /*???*/ + error = EINVAL; + goto fail; } - switch (((struct sockaddr *)(dst0 + 1))->sa_family) { - case AF_INET: - if (((struct sockaddr *)(dst0 + 1))->sa_len != - sizeof(struct sockaddr_in)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in *)(dst0 + 1))->sin_port = 0; - break; - case AF_INET6: - if (((struct sockaddr *)(dst0 + 1))->sa_len != - sizeof(struct sockaddr_in6)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in6 *)(dst0 + 1))->sin6_port = 0; - break; - default: - ; /*???*/ + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + error = EINVAL; + goto fail; } - - /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - * We made sure the port numbers are zero above, so we do - * not have to worry in case we do not update them. - */ - if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL) - ipseclog((LOG_DEBUG, "%s: NAT-T OAi present\n", __func__)); - if (mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) - ipseclog((LOG_DEBUG, "%s: NAT-T OAr present\n", __func__)); - - if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_type *type; - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || - mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid nat-t message " - "passed.\n", __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port); - } -#endif - /* SPI allocation */ - spi = key_do_getnewspi((struct sadb_spirange *)mhp->ext[SADB_EXT_SPIRANGE], - &saidx); - if (spi == 0) - return key_senderror(so, m, EINVAL); - - /* get a SA index */ - if ((newsah = key_getsah(&saidx)) == NULL) { - /* create a new SA index */ - if ((newsah = key_newsah(&saidx)) == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - return key_senderror(so, m, ENOBUFS); - } + spi = key_do_getnewspi( + (struct sadb_spirange *)mhp->ext[SADB_EXT_SPIRANGE], &saidx); + if (spi == 0) { + /* + * Requested SPI or SPI range is not available or + * already used. + */ + error = EEXIST; + goto fail; } + sav = key_newsav(mhp, &saidx, spi, &error); + if (sav == NULL) + goto fail; - /* get a new SA */ - /* XXX rewrite */ - newsav = KEY_NEWSAV(m, mhp, newsah, &error); - if (newsav == NULL) { - /* XXX don't free new SA index allocated in above. */ - return key_senderror(so, m, error); + if (sav->seq != 0) { + /* + * RFC2367: + * If the SADB_GETSPI message is in response to a + * kernel-generated SADB_ACQUIRE, the sadb_msg_seq + * MUST be the same as the SADB_ACQUIRE message. + * + * XXXAE: However it doesn't definethe behaviour how to + * check this and what to do if it doesn't match. + * Also what we should do if it matches? + * + * We can compare saidx used in SADB_ACQUIRE with saidx + * used in SADB_GETSPI, but this probably can break + * existing software. For now just warn if it doesn't match. + * + * XXXAE: anyway it looks useless. + */ + key_acqdone(&saidx, sav->seq); } - - /* set spi */ - newsav->spi = htonl(spi); - - /* delete the entry in acqtree */ - if (mhp->msg->sadb_msg_seq != 0) { - struct secacq *acq; - if ((acq = key_getacqbyseq(mhp->msg->sadb_msg_seq)) != NULL) { - /* reset counter in order to deletion by timehandler. */ - acq->created = time_second; - acq->count = 0; - } - } + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); { struct mbuf *n, *nn; struct sadb_sa *m_sa; struct sadb_msg *newmsg; int off, len; /* create new sadb_msg to reply. */ len = PFKEY_ALIGN8(sizeof(struct sadb_msg)) + PFKEY_ALIGN8(sizeof(struct sadb_sa)); MGETHDR(n, M_NOWAIT, MT_DATA); if (len > MHLEN) { if (!(MCLGET(n, M_NOWAIT))) { m_freem(n); n = NULL; } } - if (!n) - return key_senderror(so, m, ENOBUFS); + if (!n) { + error = ENOBUFS; + goto fail; + } n->m_len = len; n->m_next = NULL; off = 0; m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); m_sa = (struct sadb_sa *)(mtod(n, caddr_t) + off); m_sa->sadb_sa_len = PFKEY_UNIT64(sizeof(struct sadb_sa)); m_sa->sadb_sa_exttype = SADB_EXT_SA; - m_sa->sadb_sa_spi = htonl(spi); + m_sa->sadb_sa_spi = spi; /* SPI is already in network byte order */ off += PFKEY_ALIGN8(sizeof(struct sadb_sa)); IPSEC_ASSERT(off == len, ("length inconsistency (off %u len %u)", off, len)); n->m_next = key_gather_mbuf(m, mhp, 0, 2, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n->m_next) { m_freem(n); - return key_senderror(so, m, ENOBUFS); + error = ENOBUFS; + goto fail; } if (n->m_len < sizeof(struct sadb_msg)) { n = m_pullup(n, sizeof(struct sadb_msg)); if (n == NULL) return key_sendup_mbuf(so, m, KEY_SENDUP_ONE); } n->m_pkthdr.len = 0; for (nn = n; nn; nn = nn->m_next) n->m_pkthdr.len += nn->m_len; newmsg = mtod(n, struct sadb_msg *); - newmsg->sadb_msg_seq = newsav->seq; + newmsg->sadb_msg_seq = sav->seq; newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } + +fail: + return (key_senderror(so, m, error)); } /* * allocating new SPI * called by key_getspi(). * OUT: * 0: failure. - * others: success. + * others: success, SPI in network byte order. */ -static u_int32_t +static uint32_t key_do_getnewspi(struct sadb_spirange *spirange, struct secasindex *saidx) { - u_int32_t newspi; - u_int32_t min, max; + uint32_t min, max, newspi, t; int count = V_key_spi_trycnt; /* set spi range to allocate */ if (spirange != NULL) { min = spirange->sadb_spirange_min; max = spirange->sadb_spirange_max; } else { min = V_key_spi_minval; max = V_key_spi_maxval; } /* IPCOMP needs 2-byte SPI */ if (saidx->proto == IPPROTO_IPCOMP) { - u_int32_t t; if (min >= 0x10000) min = 0xffff; if (max >= 0x10000) max = 0xffff; if (min > max) { t = min; min = max; max = t; } } if (min == max) { - if (key_checkspidup(saidx, min) != NULL) { + if (!key_checkspidup(htonl(min))) { ipseclog((LOG_DEBUG, "%s: SPI %u exists already.\n", - __func__, min)); + __func__, min)); return 0; } count--; /* taking one cost. */ newspi = min; - } else { /* init SPI */ newspi = 0; /* when requesting to allocate spi ranged */ while (count--) { /* generate pseudo-random SPI value ranged. */ newspi = min + (key_random() % (max - min + 1)); - - if (key_checkspidup(saidx, newspi) == NULL) + if (!key_checkspidup(htonl(newspi))) break; } if (count == 0 || newspi == 0) { - ipseclog((LOG_DEBUG, "%s: to allocate spi is failed.\n", - __func__)); + ipseclog((LOG_DEBUG, + "%s: failed to allocate SPI.\n", __func__)); return 0; } } /* statistics */ keystat.getspi_count = - (keystat.getspi_count + V_key_spi_trycnt - count) / 2; + (keystat.getspi_count + V_key_spi_trycnt - count) / 2; - return newspi; + return (htonl(newspi)); } /* - * SADB_UPDATE processing - * receive - * - * from the ikmpd, and update a secasvar entry whose status is SADB_SASTATE_LARVAL. - * and send - * - * to the ikmpd. - * - * m will always be freed. + * Find TCP-MD5 SA with corresponding secasindex. + * If not found, return NULL and fill SPI with usable value if needed. */ -static int -key_update(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) +static struct secasvar * +key_getsav_tcpmd5(struct secasindex *saidx, uint32_t *spi) { - struct sadb_sa *sa0; - struct sadb_address *src0, *dst0; -#ifdef IPSEC_NAT_T - struct sadb_x_nat_t_type *type; - struct sadb_x_nat_t_port *sport, *dport; - struct sadb_address *iaddr, *raddr; - struct sadb_x_nat_t_frag *frag; -#endif - struct secasindex saidx; + SAHTREE_RLOCK_TRACKER; struct secashead *sah; struct secasvar *sav; - u_int16_t proto; - u_int8_t mode; - u_int32_t reqid; - int error; - - IPSEC_ASSERT(so != NULL, ("null socket")); - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(mhp != NULL, ("null msghdr")); - IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - /* map satype to proto */ - if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { - ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); + IPSEC_ASSERT(saidx->proto == IPPROTO_TCP, ("wrong proto")); + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + if (sah->saidx.proto != IPPROTO_TCP) + continue; + if (!key_sockaddrcmp(&saidx->dst.sa, &sah->saidx.dst.sa, 0)) + break; } - - if (mhp->ext[SADB_EXT_SA] == NULL || - mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && - mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && - mhp->ext[SADB_EXT_KEY_AUTH] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); + if (sah != NULL) { + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) { + SAV_ADDREF(sav); + SAHTREE_RUNLOCK(); + return (sav); + } } - if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || - mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); + if (spi == NULL) { + /* No SPI required */ + SAHTREE_RUNLOCK(); + return (NULL); } - if (mhp->ext[SADB_X_EXT_SA2] != NULL) { - mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; - reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; - } else { - mode = IPSEC_MODE_ANY; - reqid = 0; + /* Check that SPI is unique */ + LIST_FOREACH(sav, SAVHASH_HASH(*spi), spihash) { + if (sav->spi == *spi) + break; } - /* XXX boundary checking for other extensions */ - - sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; - src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); - dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); + if (sav == NULL) { + SAHTREE_RUNLOCK(); + /* SPI is already unique */ + return (NULL); + } + SAHTREE_RUNLOCK(); + /* XXX: not optimal */ + *spi = key_do_getnewspi(NULL, saidx); + return (NULL); +} -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - - if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || - mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", +static int +key_updateaddresses(struct socket *so, struct mbuf *m, + const struct sadb_msghdr *mhp, struct secasvar *sav, + struct secasindex *saidx) +{ + struct sockaddr *newaddr; + struct secashead *sah; + struct secasvar *newsav, *tmp; + struct mbuf *n; + int error, isnew; + + /* Check that we need to change SAH */ + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_SRC)) { + newaddr = (struct sockaddr *)( + ((struct sadb_address *) + mhp->ext[SADB_X_EXT_NEW_ADDRESS_SRC]) + 1); + bcopy(newaddr, &saidx->src, newaddr->sa_len); + key_porttosaddr(&saidx->src.sa, 0); + } + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_DST)) { + newaddr = (struct sockaddr *)( + ((struct sadb_address *) + mhp->ext[SADB_X_EXT_NEW_ADDRESS_DST]) + 1); + bcopy(newaddr, &saidx->dst, newaddr->sa_len); + key_porttosaddr(&saidx->dst.sa, 0); + } + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_SRC) || + !SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_DST)) { + error = key_checksockaddrs(&saidx->src.sa, &saidx->dst.sa); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid new sockaddr.\n", __func__)); - return key_senderror(so, m, EINVAL); + return (error); } - type = (struct sadb_x_nat_t_type *) - mhp->ext[SADB_X_EXT_NAT_T_TYPE]; - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; + sah = key_getsah(saidx); + if (sah == NULL) { + /* create a new SA index */ + sah = key_newsah(saidx); + if (sah == NULL) { + ipseclog((LOG_DEBUG, + "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } + isnew = 2; /* SAH is new */ + } else + isnew = 1; /* existing SAH is referenced */ } else { - type = NULL; - sport = dport = NULL; - } - if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr) || - mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", + /* + * src and dst addresses are still the same. + * Do we want to change NAT-T config? + */ + if (sav->sah->saidx.proto != IPPROTO_ESP || + SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_TYPE) || + SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_SPORT) || + SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_DPORT)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", __func__)); - return key_senderror(so, m, EINVAL); + return (EINVAL); } - iaddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI]; - raddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR]; - ipseclog((LOG_DEBUG, "%s: NAT-T OAi/r present\n", __func__)); - } else { - iaddr = raddr = NULL; + /* We hold reference to SA, thus SAH will be referenced too. */ + sah = sav->sah; + isnew = 0; } - if (mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", - __func__)); - return key_senderror(so, m, EINVAL); + + newsav = malloc(sizeof(struct secasvar), M_IPSEC_SA, + M_NOWAIT | M_ZERO); + if (newsav == NULL) { + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + error = ENOBUFS; + goto fail; + } + + /* Clone SA's content into newsav */ + SAV_INITREF(newsav); + bcopy(sav, newsav, offsetof(struct secasvar, chain)); + /* + * We create new NAT-T config if it is needed. + * Old NAT-T config will be freed by key_cleansav() when + * last reference to SA will be released. + */ + newsav->natt = NULL; + newsav->sah = sah; + newsav->state = SADB_SASTATE_MATURE; + error = key_setnatt(sav, mhp); + if (error != 0) + goto fail; + + SAHTREE_WLOCK(); + /* Check that SA is still alive */ + if (sav->state == SADB_SASTATE_DEAD) { + /* SA was unlinked */ + SAHTREE_WUNLOCK(); + error = ESRCH; + goto fail; + } + + /* Unlink SA from SAH and SPI hash */ + IPSEC_ASSERT((sav->flags & SADB_X_EXT_F_CLONED) == 0, + ("SA is already cloned")); + IPSEC_ASSERT(sav->state == SADB_SASTATE_MATURE || + sav->state == SADB_SASTATE_DYING, + ("Wrong SA state %u\n", sav->state)); + TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + + /* + * Link new SA with SAH. Keep SAs ordered by + * create time (newer are first). + */ + TAILQ_FOREACH(tmp, &sah->savtree_alive, chain) { + if (newsav->created > tmp->created) { + TAILQ_INSERT_BEFORE(tmp, newsav, chain); + break; } - frag = (struct sadb_x_nat_t_frag *) - mhp->ext[SADB_X_EXT_NAT_T_FRAG]; - } else { - frag = NULL; } -#endif + if (tmp == NULL) + TAILQ_INSERT_TAIL(&sah->savtree_alive, newsav, chain); - /* get a SA header */ - if ((sah = key_getsah(&saidx)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no SA index found.\n", __func__)); - return key_senderror(so, m, ENOENT); + /* Add new SA into SPI hash. */ + LIST_INSERT_HEAD(SAVHASH_HASH(newsav->spi), newsav, spihash); + + /* Add new SAH into SADB. */ + if (isnew == 2) { + TAILQ_INSERT_HEAD(&V_sahtree, sah, chain); + LIST_INSERT_HEAD(SAHADDRHASH_HASH(saidx), sah, addrhash); + sah->state = SADB_SASTATE_MATURE; + SAH_ADDREF(sah); /* newsav references new SAH */ } + /* + * isnew == 1 -> @sah was referenced by key_getsah(). + * isnew == 0 -> we use the same @sah, that was used by @sav, + * and we use its reference for @newsav. + */ + SECASVAR_LOCK(sav); + /* XXX: replace cntr with pointer? */ + newsav->cntr = sav->cntr; + sav->flags |= SADB_X_EXT_F_CLONED; + SECASVAR_UNLOCK(sav); - /* set spidx if there */ - /* XXX rewrite */ - error = key_setident(sah, m, mhp); - if (error) - return key_senderror(so, m, error); + SAHTREE_WUNLOCK(); - /* find a SA with sequence number. */ -#ifdef IPSEC_DOSEQCHECK - if (mhp->msg->sadb_msg_seq != 0 - && (sav = key_getsavbyseq(sah, mhp->msg->sadb_msg_seq)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no larval SA with sequence %u " - "exists.\n", __func__, mhp->msg->sadb_msg_seq)); - return key_senderror(so, m, ENOENT); + KEYDBG(KEY_STAMP, + printf("%s: SA(%p) cloned into SA(%p)\n", + __func__, sav, newsav)); + KEYDBG(KEY_DATA, kdebug_secasv(newsav)); + + key_freesav(&sav); /* release last reference */ + + /* set msg buf from mhp */ + n = key_getmsgbuf_x1(m, mhp); + if (n == NULL) { + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); } -#else - SAHTREE_LOCK(); - sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); - SAHTREE_UNLOCK(); - if (sav == NULL) { - ipseclog((LOG_DEBUG, "%s: no such a SA found (spi:%u)\n", - __func__, (u_int32_t)ntohl(sa0->sadb_sa_spi))); - return key_senderror(so, m, EINVAL); + m_freem(m); + key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + return (0); +fail: + if (isnew != 0) + key_freesah(&sah); + if (newsav != NULL) { + if (newsav->natt != NULL) + free(newsav->natt, M_IPSEC_MISC); + free(newsav, M_IPSEC_SA); } -#endif + return (error); +} - /* validity check */ - if (sav->sah->saidx.proto != proto) { - ipseclog((LOG_DEBUG, "%s: protocol mismatched " - "(DB=%u param=%u)\n", __func__, - sav->sah->saidx.proto, proto)); +/* + * SADB_UPDATE processing + * receive + * + * from the ikmpd, and update a secasvar entry whose status is SADB_SASTATE_LARVAL. + * and send + * + * to the ikmpd. + * + * m will always be freed. + */ +static int +key_update(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) +{ + struct secasindex saidx; + struct sadb_address *src0, *dst0; + struct sadb_sa *sa0; + struct secasvar *sav; + uint32_t reqid; + int error; + uint8_t mode, proto; + + IPSEC_ASSERT(so != NULL, ("null socket")); + IPSEC_ASSERT(m != NULL, ("null mbuf")); + IPSEC_ASSERT(mhp != NULL, ("null msghdr")); + IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); + + /* map satype to proto */ + if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { + ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", + __func__)); return key_senderror(so, m, EINVAL); } -#ifdef IPSEC_DOSEQCHECK - if (sav->spi != sa0->sadb_sa_spi) { - ipseclog((LOG_DEBUG, "%s: SPI mismatched (DB:%u param:%u)\n", - __func__, - (u_int32_t)ntohl(sav->spi), - (u_int32_t)ntohl(sa0->sadb_sa_spi))); + + if (SADB_CHECKHDR(mhp, SADB_EXT_SA) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } -#endif - if (sav->pid != mhp->msg->sadb_msg_pid) { - ipseclog((LOG_DEBUG, "%s: pid mismatched (DB:%u param:%u)\n", - __func__, sav->pid, mhp->msg->sadb_msg_pid)); + if (SADB_CHECKLEN(mhp, SADB_EXT_SA) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - - /* copy sav values */ - error = key_setsaval(sav, m, mhp); - if (error) { - KEY_FREESAV(&sav); - return key_senderror(so, m, error); + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { + mode = IPSEC_MODE_ANY; + reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return key_senderror(so, m, EINVAL); + } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } -#ifdef IPSEC_NAT_T - /* - * Handle more NAT-T info if present, - * now that we have a sav to fill. - */ - if (type) - sav->natt_type = type->sadb_x_nat_t_type_type; - - if (sport) - KEY_PORTTOSADDR(&sav->sah->saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&sav->sah->saidx.dst, - dport->sadb_x_nat_t_port_port); + sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; + src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); + dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); -#if 0 /* - * In case SADB_X_EXT_NAT_T_FRAG was not given, leave it at 0. - * We should actually check for a minimum MTU here, if we - * want to support it in ip_output. + * Only SADB_SASTATE_MATURE SAs may be submitted in an + * SADB_UPDATE message. */ - if (frag) - sav->natt_esp_frag_len = frag->sadb_x_nat_t_frag_fraglen; -#endif + if (sa0->sadb_sa_state != SADB_SASTATE_MATURE) { + ipseclog((LOG_DEBUG, "%s: invalid state.\n", __func__)); +#ifdef PFKEY_STRICT_CHECKS + return key_senderror(so, m, EINVAL); #endif - - /* check SA values to be mature. */ - if ((mhp->msg->sadb_msg_errno = key_mature(sav)) != 0) { - KEY_FREESAV(&sav); - return key_senderror(so, m, 0); } + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, error); + } + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); + sav = key_getsavbyspi(sa0->sadb_sa_spi); + if (sav == NULL) { + ipseclog((LOG_DEBUG, "%s: no SA found for SPI %u\n", + __func__, ntohl(sa0->sadb_sa_spi))); + return key_senderror(so, m, EINVAL); + } + /* + * Check that SADB_UPDATE issued by the same process that did + * SADB_GETSPI or SADB_ADD. + */ + if (sav->pid != mhp->msg->sadb_msg_pid) { + ipseclog((LOG_DEBUG, + "%s: pid mismatched (SPI %u, pid %u vs. %u)\n", __func__, + ntohl(sav->spi), sav->pid, mhp->msg->sadb_msg_pid)); + key_freesav(&sav); + return key_senderror(so, m, EINVAL); + } + /* saidx should match with SA. */ + if (key_cmpsaidx(&sav->sah->saidx, &saidx, CMP_MODE_REQID) == 0) { + ipseclog((LOG_DEBUG, "%s: saidx mismatched for SPI %u", + __func__, ntohl(sav->spi))); + key_freesav(&sav); + return key_senderror(so, m, ESRCH); + } + + if (sav->state == SADB_SASTATE_LARVAL) { + if ((mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && + SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT)) || + (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && + SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); + key_freesav(&sav); + return key_senderror(so, m, EINVAL); + } + /* + * We can set any values except src, dst and SPI. + */ + error = key_setsaval(sav, mhp); + if (error != 0) { + key_freesav(&sav); + return (key_senderror(so, m, error)); + } + /* Change SA state to MATURE */ + SAHTREE_WLOCK(); + if (sav->state != SADB_SASTATE_LARVAL) { + /* SA was deleted or another thread made it MATURE. */ + SAHTREE_WUNLOCK(); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); + } + /* + * NOTE: we keep SAs in savtree_alive ordered by created + * time. When SA's state changed from LARVAL to MATURE, + * we update its created time in key_setsaval() and move + * it into head of savtree_alive. + */ + TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); + TAILQ_INSERT_HEAD(&sav->sah->savtree_alive, sav, chain); + sav->state = SADB_SASTATE_MATURE; + SAHTREE_WUNLOCK(); + } else { + /* + * For DYING and MATURE SA we can change only state + * and lifetimes. Report EINVAL if something else attempted + * to change. + */ + if (!SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT) || + !SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH)) { + key_freesav(&sav); + return (key_senderror(so, m, EINVAL)); + } + error = key_updatelifetimes(sav, mhp); + if (error != 0) { + key_freesav(&sav); + return (key_senderror(so, m, error)); + } + /* + * This is FreeBSD extension to RFC2367. + * IKEd can specify SADB_X_EXT_NEW_ADDRESS_SRC and/or + * SADB_X_EXT_NEW_ADDRESS_DST when it wants to change + * SA addresses (for example to implement MOBIKE protocol + * as described in RFC4555). Also we allow to change + * NAT-T config. + */ + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_SRC) || + !SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_DST) || + !SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_TYPE) || + sav->natt != NULL) { + error = key_updateaddresses(so, m, mhp, sav, &saidx); + key_freesav(&sav); + if (error != 0) + return (key_senderror(so, m, error)); + return (0); + } + /* Check that SA is still alive */ + SAHTREE_WLOCK(); + if (sav->state == SADB_SASTATE_DEAD) { + /* SA was unlinked */ + SAHTREE_WUNLOCK(); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); + } + /* + * NOTE: there is possible state moving from DYING to MATURE, + * but this doesn't change created time, so we won't reorder + * this SA. + */ + sav->state = SADB_SASTATE_MATURE; + SAHTREE_WUNLOCK(); + } + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + key_freesav(&sav); { struct mbuf *n; /* set msg buf from mhp */ n = key_getmsgbuf_x1(m, mhp); if (n == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return key_senderror(so, m, ENOBUFS); } m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } -/* - * search SAD with sequence for a SA which state is SADB_SASTATE_LARVAL. - * only called by key_update(). - * OUT: - * NULL : not found - * others : found, pointer to a SA. - */ -#ifdef IPSEC_DOSEQCHECK -static struct secasvar * -key_getsavbyseq(struct secashead *sah, u_int32_t seq) -{ - struct secasvar *sav; - u_int state; - - state = SADB_SASTATE_LARVAL; - - /* search SAD with sequence number ? */ - LIST_FOREACH(sav, &sah->savtree[state], chain) { - - KEY_CHKSASTATE(state, sav->state, __func__); - - if (sav->seq == seq) { - sa_addref(sav); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s cause refcnt++:%d SA:%p\n", - __func__, sav->refcnt, sav)); - return sav; - } - } - - return NULL; -} -#endif - /* * SADB_ADD processing * add an entry to SA database, when received * * from the ikmpd, * and send * * to the ikmpd. * * IGNORE identity and sensitivity messages. * * m will always be freed. */ static int key_add(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct sadb_sa *sa0; - struct sadb_address *src0, *dst0; -#ifdef IPSEC_NAT_T - struct sadb_x_nat_t_type *type; - struct sadb_address *iaddr, *raddr; - struct sadb_x_nat_t_frag *frag; -#endif struct secasindex saidx; - struct secashead *newsah; - struct secasvar *newsav; - u_int16_t proto; - u_int8_t mode; - u_int32_t reqid; + struct sadb_address *src0, *dst0; + struct sadb_sa *sa0; + struct secasvar *sav; + uint32_t reqid, spi; + uint8_t mode, proto; int error; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_SA] == NULL || - mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && - mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && - mhp->ext[SADB_EXT_KEY_AUTH] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_SA) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && ( + SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT) || + SADB_CHECKLEN(mhp, SADB_EXT_KEY_ENCRYPT))) || + (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && ( + SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH) || + SADB_CHECKLEN(mhp, SADB_EXT_KEY_AUTH))) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || - mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - /* XXX need more */ - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_SA) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_X_EXT_SA2] != NULL) { - mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; - reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; - } else { + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { mode = IPSEC_MODE_ANY; reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return key_senderror(so, m, EINVAL); + } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. + * Only SADB_SASTATE_MATURE SAs may be submitted in an + * SADB_ADD message. */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || - mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - type = (struct sadb_x_nat_t_type *) - mhp->ext[SADB_X_EXT_NAT_T_TYPE]; - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); - } else { - type = NULL; + if (sa0->sadb_sa_state != SADB_SASTATE_MATURE) { + ipseclog((LOG_DEBUG, "%s: invalid state.\n", __func__)); +#ifdef PFKEY_STRICT_CHECKS + return key_senderror(so, m, EINVAL); +#endif } - if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr) || - mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - iaddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI]; - raddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR]; - ipseclog((LOG_DEBUG, "%s: NAT-T OAi/r present\n", __func__)); - } else { - iaddr = raddr = NULL; + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, error); } - if (mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); + spi = sa0->sadb_sa_spi; + /* + * For TCP-MD5 SAs we don't use SPI. Check the uniqueness using + * secasindex. + * XXXAE: IPComp seems also doesn't use SPI. + */ + if (proto == IPPROTO_TCP) { + sav = key_getsav_tcpmd5(&saidx, &spi); + if (sav == NULL && spi == 0) { + /* Failed to allocate SPI */ + ipseclog((LOG_DEBUG, "%s: SA already exists.\n", __func__)); - return key_senderror(so, m, EINVAL); + return key_senderror(so, m, EEXIST); } - frag = (struct sadb_x_nat_t_frag *) - mhp->ext[SADB_X_EXT_NAT_T_FRAG]; + /* XXX: SPI that we report back can have another value */ } else { - frag = NULL; - } -#endif - - /* get a SA header */ - if ((newsah = key_getsah(&saidx)) == NULL) { - /* create a new SA header */ - if ((newsah = key_newsah(&saidx)) == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - return key_senderror(so, m, ENOBUFS); - } - } - - /* set spidx if there */ - /* XXX rewrite */ - error = key_setident(newsah, m, mhp); - if (error) { - return key_senderror(so, m, error); + /* We can create new SA only if SPI is different. */ + sav = key_getsavbyspi(spi); } - - /* create new SA entry. */ - /* We can create new SA only if SPI is differenct. */ - SAHTREE_LOCK(); - newsav = key_getsavbyspi(newsah, sa0->sadb_sa_spi); - SAHTREE_UNLOCK(); - if (newsav != NULL) { + if (sav != NULL) { + key_freesav(&sav); ipseclog((LOG_DEBUG, "%s: SA already exists.\n", __func__)); return key_senderror(so, m, EEXIST); } - newsav = KEY_NEWSAV(m, mhp, newsah, &error); - if (newsav == NULL) { - return key_senderror(so, m, error); - } - -#ifdef IPSEC_NAT_T - /* - * Handle more NAT-T info if present, - * now that we have a sav to fill. - */ - if (type) - newsav->natt_type = type->sadb_x_nat_t_type_type; - -#if 0 - /* - * In case SADB_X_EXT_NAT_T_FRAG was not given, leave it at 0. - * We should actually check for a minimum MTU here, if we - * want to support it in ip_output. - */ - if (frag) - newsav->natt_esp_frag_len = frag->sadb_x_nat_t_frag_fraglen; -#endif -#endif - /* check SA values to be mature. */ - if ((error = key_mature(newsav)) != 0) { - KEY_FREESAV(&newsav); + sav = key_newsav(mhp, &saidx, spi, &error); + if (sav == NULL) return key_senderror(so, m, error); - } - + KEYDBG(KEY_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); /* - * don't call key_freesav() here, as we would like to keep the SA - * in the database on success. + * If SADB_ADD was in response to SADB_ACQUIRE, we need to schedule + * ACQ for deletion. */ + if (sav->seq != 0) + key_acqdone(&saidx, sav->seq); { + /* + * Don't call key_freesav() on error here, as we would like to + * keep the SA in the database. + */ struct mbuf *n; /* set msg buf from mhp */ n = key_getmsgbuf_x1(m, mhp); if (n == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return key_senderror(so, m, ENOBUFS); } m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } -/* m is retained */ +/* + * NAT-T support. + * IKEd may request the use ESP in UDP encapsulation when it detects the + * presence of NAT. It uses NAT-T extension headers for such SAs to specify + * parameters needed for encapsulation and decapsulation. These PF_KEY + * extension headers are not standardized, so this comment addresses our + * implementation. + * SADB_X_EXT_NAT_T_TYPE specifies type of encapsulation, we support only + * UDP_ENCAP_ESPINUDP as described in RFC3948. + * SADB_X_EXT_NAT_T_SPORT/DPORT specifies source and destination ports for + * UDP header. We use these ports in UDP encapsulation procedure, also we + * can check them in UDP decapsulation procedure. + * SADB_X_EXT_NAT_T_OA[IR] specifies original address of initiator or + * responder. These addresses can be used for transport mode to adjust + * checksum after decapsulation and decryption. Since original IP addresses + * used by peer usually different (we detected presence of NAT), TCP/UDP + * pseudo header checksum and IP header checksum was calculated using original + * addresses. After decapsulation and decryption we need to adjust checksum + * to have correct datagram. + * + * We expect presence of NAT-T extension headers only in SADB_ADD and + * SADB_UPDATE messages. We report NAT-T extension headers in replies + * to SADB_ADD, SADB_UPDATE, SADB_GET, and SADB_DUMP messages. + */ static int -key_setident(struct secashead *sah, struct mbuf *m, - const struct sadb_msghdr *mhp) +key_setnatt(struct secasvar *sav, const struct sadb_msghdr *mhp) +{ + struct sadb_x_nat_t_port *port; + struct sadb_x_nat_t_type *type; + struct sadb_address *oai, *oar; + struct sockaddr *sa; + uint32_t addr; + uint16_t cksum; + + IPSEC_ASSERT(sav->natt == NULL, ("natt is already initialized")); + /* + * Ignore NAT-T headers if sproto isn't ESP. + */ + if (sav->sah->saidx.proto != IPPROTO_ESP) + return (0); + + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_TYPE) && + !SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_SPORT) && + !SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_DPORT)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_TYPE) || + SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_SPORT) || + SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_DPORT)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return (EINVAL); + } + } else + return (0); + + type = (struct sadb_x_nat_t_type *)mhp->ext[SADB_X_EXT_NAT_T_TYPE]; + if (type->sadb_x_nat_t_type_type != UDP_ENCAP_ESPINUDP) { + ipseclog((LOG_DEBUG, "%s: unsupported NAT-T type %u.\n", + __func__, type->sadb_x_nat_t_type_type)); + return (EINVAL); + } + /* + * Allocate storage for NAT-T config. + * On error it will be released by key_cleansav(). + */ + sav->natt = malloc(sizeof(struct secnatt), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); + if (sav->natt == NULL) { + PFKEYSTAT_INC(in_nomem); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } + port = (struct sadb_x_nat_t_port *)mhp->ext[SADB_X_EXT_NAT_T_SPORT]; + if (port->sadb_x_nat_t_port_port == 0) { + ipseclog((LOG_DEBUG, "%s: invalid NAT-T sport specified.\n", + __func__)); + return (EINVAL); + } + sav->natt->sport = port->sadb_x_nat_t_port_port; + port = (struct sadb_x_nat_t_port *)mhp->ext[SADB_X_EXT_NAT_T_DPORT]; + if (port->sadb_x_nat_t_port_port == 0) { + ipseclog((LOG_DEBUG, "%s: invalid NAT-T dport specified.\n", + __func__)); + return (EINVAL); + } + sav->natt->dport = port->sadb_x_nat_t_port_port; + + /* + * SADB_X_EXT_NAT_T_OAI and SADB_X_EXT_NAT_T_OAR are optional + * and needed only for transport mode IPsec. + * Usually NAT translates only one address, but it is possible, + * that both addresses could be translated. + * NOTE: Value of SADB_X_EXT_NAT_T_OAI is equal to SADB_X_EXT_NAT_T_OA. + */ + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_OAI)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_OAI)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return (EINVAL); + } + oai = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI]; + } else + oai = NULL; + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_OAR)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_OAR)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return (EINVAL); + } + oar = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR]; + } else + oar = NULL; + + /* Initialize addresses only for transport mode */ + if (sav->sah->saidx.mode != IPSEC_MODE_TUNNEL) { + cksum = 0; + if (oai != NULL) { + /* Currently we support only AF_INET */ + sa = (struct sockaddr *)(oai + 1); + if (sa->sa_family != AF_INET || + sa->sa_len != sizeof(struct sockaddr_in)) { + ipseclog((LOG_DEBUG, + "%s: wrong NAT-OAi header.\n", + __func__)); + return (EINVAL); + } + /* Ignore address if it the same */ + if (((struct sockaddr_in *)sa)->sin_addr.s_addr != + sav->sah->saidx.src.sin.sin_addr.s_addr) { + bcopy(sa, &sav->natt->oai.sa, sa->sa_len); + sav->natt->flags |= IPSEC_NATT_F_OAI; + /* Calculate checksum delta */ + addr = sav->sah->saidx.src.sin.sin_addr.s_addr; + cksum = in_addword(cksum, ~addr >> 16); + cksum = in_addword(cksum, ~addr & 0xffff); + addr = sav->natt->oai.sin.sin_addr.s_addr; + cksum = in_addword(cksum, addr >> 16); + cksum = in_addword(cksum, addr & 0xffff); + } + } + if (oar != NULL) { + /* Currently we support only AF_INET */ + sa = (struct sockaddr *)(oar + 1); + if (sa->sa_family != AF_INET || + sa->sa_len != sizeof(struct sockaddr_in)) { + ipseclog((LOG_DEBUG, + "%s: wrong NAT-OAr header.\n", + __func__)); + return (EINVAL); + } + /* Ignore address if it the same */ + if (((struct sockaddr_in *)sa)->sin_addr.s_addr != + sav->sah->saidx.dst.sin.sin_addr.s_addr) { + bcopy(sa, &sav->natt->oar.sa, sa->sa_len); + sav->natt->flags |= IPSEC_NATT_F_OAR; + /* Calculate checksum delta */ + addr = sav->sah->saidx.dst.sin.sin_addr.s_addr; + cksum = in_addword(cksum, ~addr >> 16); + cksum = in_addword(cksum, ~addr & 0xffff); + addr = sav->natt->oar.sin.sin_addr.s_addr; + cksum = in_addword(cksum, addr >> 16); + cksum = in_addword(cksum, addr & 0xffff); + } + } + sav->natt->cksum = cksum; + } + return (0); +} + +static int +key_setident(struct secashead *sah, const struct sadb_msghdr *mhp) { const struct sadb_ident *idsrc, *iddst; int idsrclen, iddstlen; IPSEC_ASSERT(sah != NULL, ("null secashead")); - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* don't make buffer if not there */ - if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL && - mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) { + if (SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_SRC) && + SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_DST)) { sah->idents = NULL; sah->identd = NULL; - return 0; + return (0); } - - if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL || - mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) { + + if (SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_DST)) { ipseclog((LOG_DEBUG, "%s: invalid identity.\n", __func__)); - return EINVAL; + return (EINVAL); } idsrc = (const struct sadb_ident *)mhp->ext[SADB_EXT_IDENTITY_SRC]; iddst = (const struct sadb_ident *)mhp->ext[SADB_EXT_IDENTITY_DST]; idsrclen = mhp->extlen[SADB_EXT_IDENTITY_SRC]; iddstlen = mhp->extlen[SADB_EXT_IDENTITY_DST]; /* validity check */ if (idsrc->sadb_ident_type != iddst->sadb_ident_type) { ipseclog((LOG_DEBUG, "%s: ident type mismatch.\n", __func__)); return EINVAL; } switch (idsrc->sadb_ident_type) { case SADB_IDENTTYPE_PREFIX: case SADB_IDENTTYPE_FQDN: case SADB_IDENTTYPE_USERFQDN: default: /* XXX do nothing */ sah->idents = NULL; sah->identd = NULL; return 0; } /* make structure */ sah->idents = malloc(sizeof(struct secident), M_IPSEC_MISC, M_NOWAIT); if (sah->idents == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return ENOBUFS; } sah->identd = malloc(sizeof(struct secident), M_IPSEC_MISC, M_NOWAIT); if (sah->identd == NULL) { free(sah->idents, M_IPSEC_MISC); sah->idents = NULL; ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return ENOBUFS; } sah->idents->type = idsrc->sadb_ident_type; sah->idents->id = idsrc->sadb_ident_id; sah->identd->type = iddst->sadb_ident_type; sah->identd->id = iddst->sadb_ident_id; return 0; } /* * m will not be freed on return. - * it is caller's responsibility to free the result. + * it is caller's responsibility to free the result. + * + * Called from SADB_ADD and SADB_UPDATE. Reply will contain headers + * from the request in defined order. */ static struct mbuf * key_getmsgbuf_x1(struct mbuf *m, const struct sadb_msghdr *mhp) { struct mbuf *n; IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* create new sadb_msg to reply. */ - n = key_gather_mbuf(m, mhp, 1, 9, SADB_EXT_RESERVED, + n = key_gather_mbuf(m, mhp, 1, 16, SADB_EXT_RESERVED, SADB_EXT_SA, SADB_X_EXT_SA2, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST, SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT, - SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST); + SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST, + SADB_X_EXT_NAT_T_TYPE, SADB_X_EXT_NAT_T_SPORT, + SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI, + SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NEW_ADDRESS_SRC, + SADB_X_EXT_NEW_ADDRESS_DST); if (!n) return NULL; if (n->m_len < sizeof(struct sadb_msg)) { n = m_pullup(n, sizeof(struct sadb_msg)); if (n == NULL) return NULL; } mtod(n, struct sadb_msg *)->sadb_msg_errno = 0; mtod(n, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); return n; } /* * SADB_DELETE processing * receive * * from the ikmpd, and set SADB_SASTATE_DEAD, * and send, * * to the ikmpd. * * m will always be freed. */ static int key_delete(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct sadb_sa *sa0; - struct sadb_address *src0, *dst0; struct secasindex saidx; - struct secashead *sah; - struct secasvar *sav = NULL; - u_int16_t proto; + struct sadb_address *src0, *dst0; + struct secasvar *sav; + struct sadb_sa *sa0; + uint8_t proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } + src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); + dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - if (mhp->ext[SADB_EXT_SA] == NULL) { + if (key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)) != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return (key_senderror(so, m, EINVAL)); + } + KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); + if (SADB_CHECKHDR(mhp, SADB_EXT_SA)) { /* * Caller wants us to delete all non-LARVAL SAs * that match the src/dst. This is used during * IKE INITIAL-CONTACT. + * XXXAE: this looks like some extension to RFC2367. */ ipseclog((LOG_DEBUG, "%s: doing delete all.\n", __func__)); - return key_delete_all(so, m, mhp, proto); - } else if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); + return (key_delete_all(so, m, mhp, &saidx)); } - - sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; - src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); - dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); + if (SADB_CHECKLEN(mhp, SADB_EXT_SA)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); + return (key_senderror(so, m, EINVAL)); } -#endif - - /* get a SA header */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) - continue; - - /* get a SA with SPI. */ - sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); - if (sav) - break; - } - if (sah == NULL) { - SAHTREE_UNLOCK(); - ipseclog((LOG_DEBUG, "%s: no SA found.\n", __func__)); - return key_senderror(so, m, ENOENT); - } - - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - KEY_FREESAV(&sav); - SAHTREE_UNLOCK(); + sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; + if (proto == IPPROTO_TCP) + sav = key_getsav_tcpmd5(&saidx, NULL); + else + sav = key_getsavbyspi(sa0->sadb_sa_spi); + if (sav == NULL) { + ipseclog((LOG_DEBUG, "%s: no SA found for SPI %u.\n", + __func__, ntohl(sa0->sadb_sa_spi))); + return (key_senderror(so, m, ESRCH)); + } + if (key_cmpsaidx(&sav->sah->saidx, &saidx, CMP_HEAD) == 0) { + ipseclog((LOG_DEBUG, "%s: saidx mismatched for SPI %u.\n", + __func__, ntohl(sav->spi))); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); + } + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + key_unlinksav(sav); + key_freesav(&sav); { struct mbuf *n; struct sadb_msg *newmsg; /* create new sadb_msg to reply. */ - /* XXX-BZ NAT-T extensions? */ n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED, SADB_EXT_SA, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n) return key_senderror(so, m, ENOBUFS); if (n->m_len < sizeof(struct sadb_msg)) { n = m_pullup(n, sizeof(struct sadb_msg)); if (n == NULL) return key_senderror(so, m, ENOBUFS); } newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } /* * delete all SAs for src/dst. Called from key_delete(). */ static int key_delete_all(struct socket *so, struct mbuf *m, - const struct sadb_msghdr *mhp, u_int16_t proto) + const struct sadb_msghdr *mhp, struct secasindex *saidx) { - struct sadb_address *src0, *dst0; - struct secasindex saidx; + struct secasvar_queue drainq; struct secashead *sah; struct secasvar *sav, *nextsav; - u_int stateidx, state; - src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); - dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); - } -#endif - - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) + TAILQ_INIT(&drainq); + SAHTREE_WLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + if (key_cmpsaidx(&sah->saidx, saidx, CMP_HEAD) == 0) continue; - - /* Delete all non-LARVAL SAs. */ - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_alive); - stateidx++) { - state = saorder_state_alive[stateidx]; - if (state == SADB_SASTATE_LARVAL) - continue; - for (sav = LIST_FIRST(&sah->savtree[state]); - sav != NULL; sav = nextsav) { - nextsav = LIST_NEXT(sav, chain); - /* sanity check */ - if (sav->state != state) { - ipseclog((LOG_DEBUG, "%s: invalid " - "sav->state (queue %d SA %d)\n", - __func__, state, sav->state)); - continue; - } - - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - KEY_FREESAV(&sav); - } - } + /* Move all ALIVE SAs into drainq */ + TAILQ_CONCAT(&drainq, &sah->savtree_alive, chain); + } + /* Unlink all queued SAs from SPI hash */ + TAILQ_FOREACH(sav, &drainq, chain) { + sav->state = SADB_SASTATE_DEAD; + LIST_REMOVE(sav, spihash); + } + SAHTREE_WUNLOCK(); + /* Now we can release reference for all SAs in drainq */ + sav = TAILQ_FIRST(&drainq); + while (sav != NULL) { + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + nextsav = TAILQ_NEXT(sav, chain); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; } - SAHTREE_UNLOCK(); + { struct mbuf *n; struct sadb_msg *newmsg; /* create new sadb_msg to reply. */ - /* XXX-BZ NAT-T extensions? */ n = key_gather_mbuf(m, mhp, 1, 3, SADB_EXT_RESERVED, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n) return key_senderror(so, m, ENOBUFS); if (n->m_len < sizeof(struct sadb_msg)) { n = m_pullup(n, sizeof(struct sadb_msg)); if (n == NULL) return key_senderror(so, m, ENOBUFS); } newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } +/* + * Delete all alive SAs for corresponding xform. + * Larval SAs have not initialized tdb_xform, so it is safe to leave them + * here when xform disappears. + */ +static void +key_delete_xform(const struct xformsw *xsp) +{ + struct secasvar_queue drainq; + struct secashead *sah; + struct secasvar *sav, *nextsav; + + TAILQ_INIT(&drainq); + SAHTREE_WLOCK(); + TAILQ_FOREACH(sah, &V_sahtree, chain) { + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav == NULL) + continue; + if (sav->tdb_xform != xsp) + continue; + /* + * It is supposed that all SAs in the chain are related to + * one xform. + */ + TAILQ_CONCAT(&drainq, &sah->savtree_alive, chain); + } + /* Unlink all queued SAs from SPI hash */ + TAILQ_FOREACH(sav, &drainq, chain) { + sav->state = SADB_SASTATE_DEAD; + LIST_REMOVE(sav, spihash); + } + SAHTREE_WUNLOCK(); + + /* Now we can release reference for all SAs in drainq */ + sav = TAILQ_FIRST(&drainq); + while (sav != NULL) { + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + nextsav = TAILQ_NEXT(sav, chain); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; + } +} + /* * SADB_GET processing * receive * * from the ikmpd, and get a SP and a SA to respond, * and send, * * to the ikmpd. * * m will always be freed. */ static int key_get(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct sadb_sa *sa0; - struct sadb_address *src0, *dst0; struct secasindex saidx; - struct secashead *sah; - struct secasvar *sav = NULL; - u_int16_t proto; + struct sadb_address *src0, *dst0; + struct sadb_sa *sa0; + struct secasvar *sav; + uint8_t proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_SA] == NULL || - mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_SA) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || - mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_SA) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); + if (key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)) != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, EINVAL); } -#endif - - /* get a SA header */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) - continue; + KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - /* get a SA with SPI. */ - sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); - if (sav) - break; - } - SAHTREE_UNLOCK(); - if (sah == NULL) { + if (proto == IPPROTO_TCP) + sav = key_getsav_tcpmd5(&saidx, NULL); + else + sav = key_getsavbyspi(sa0->sadb_sa_spi); + if (sav == NULL) { ipseclog((LOG_DEBUG, "%s: no SA found.\n", __func__)); - return key_senderror(so, m, ENOENT); + return key_senderror(so, m, ESRCH); + } + if (key_cmpsaidx(&sav->sah->saidx, &saidx, CMP_HEAD) == 0) { + ipseclog((LOG_DEBUG, "%s: saidx mismatched for SPI %u.\n", + __func__, ntohl(sa0->sadb_sa_spi))); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); } { struct mbuf *n; - u_int8_t satype; + uint8_t satype; /* map proto to satype */ - if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { + if ((satype = key_proto2satype(sav->sah->saidx.proto)) == 0) { ipseclog((LOG_DEBUG, "%s: there was invalid proto in SAD.\n", - __func__)); + __func__)); + key_freesav(&sav); return key_senderror(so, m, EINVAL); } /* create new sadb_msg to reply. */ n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq, mhp->msg->sadb_msg_pid); + + key_freesav(&sav); if (!n) return key_senderror(so, m, ENOBUFS); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } /* XXX make it sysctl-configurable? */ static void key_getcomb_setlifetime(struct sadb_comb *comb) { comb->sadb_comb_soft_allocations = 1; comb->sadb_comb_hard_allocations = 1; comb->sadb_comb_soft_bytes = 0; comb->sadb_comb_hard_bytes = 0; comb->sadb_comb_hard_addtime = 86400; /* 1 day */ comb->sadb_comb_soft_addtime = comb->sadb_comb_soft_addtime * 80 / 100; comb->sadb_comb_soft_usetime = 28800; /* 8 hours */ comb->sadb_comb_hard_usetime = comb->sadb_comb_hard_usetime * 80 / 100; } /* * XXX reorder combinations by preference * XXX no idea if the user wants ESP authentication or not */ static struct mbuf * -key_getcomb_esp() +key_getcomb_ealg(void) { struct sadb_comb *comb; - struct enc_xform *algo; + const struct enc_xform *algo; struct mbuf *result = NULL, *m, *n; int encmin; int i, off, o; int totlen; const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb)); m = NULL; for (i = 1; i <= SADB_EALG_MAX; i++) { - algo = esp_algorithm_lookup(i); + algo = enc_algorithm_lookup(i); if (algo == NULL) continue; /* discard algorithms with key size smaller than system min */ if (_BITS(algo->maxkey) < V_ipsec_esp_keymin) continue; if (_BITS(algo->minkey) < V_ipsec_esp_keymin) encmin = V_ipsec_esp_keymin; else encmin = _BITS(algo->minkey); if (V_ipsec_esp_auth) m = key_getcomb_ah(); else { IPSEC_ASSERT(l <= MLEN, ("l=%u > MLEN=%lu", l, (u_long) MLEN)); MGET(m, M_NOWAIT, MT_DATA); if (m) { M_ALIGN(m, l); m->m_len = l; m->m_next = NULL; bzero(mtod(m, caddr_t), m->m_len); } } if (!m) goto fail; totlen = 0; for (n = m; n; n = n->m_next) totlen += n->m_len; IPSEC_ASSERT((totlen % l) == 0, ("totlen=%u, l=%u", totlen, l)); for (off = 0; off < totlen; off += l) { n = m_pulldown(m, off, l, &o); if (!n) { /* m is already freed */ goto fail; } comb = (struct sadb_comb *)(mtod(n, caddr_t) + o); bzero(comb, sizeof(*comb)); key_getcomb_setlifetime(comb); comb->sadb_comb_encrypt = i; comb->sadb_comb_encrypt_minbits = encmin; comb->sadb_comb_encrypt_maxbits = _BITS(algo->maxkey); } if (!result) result = m; else m_cat(result, m); } return result; fail: if (result) m_freem(result); return NULL; } static void key_getsizes_ah(const struct auth_hash *ah, int alg, u_int16_t* min, u_int16_t* max) { *min = *max = ah->keysize; if (ah->keysize == 0) { /* * Transform takes arbitrary key size but algorithm * key size is restricted. Enforce this here. */ switch (alg) { case SADB_X_AALG_MD5: *min = *max = 16; break; case SADB_X_AALG_SHA: *min = *max = 20; break; case SADB_X_AALG_NULL: *min = 1; *max = 256; break; case SADB_X_AALG_SHA2_256: *min = *max = 32; break; case SADB_X_AALG_SHA2_384: *min = *max = 48; break; case SADB_X_AALG_SHA2_512: *min = *max = 64; break; default: DPRINTF(("%s: unknown AH algorithm %u\n", __func__, alg)); break; } } } /* * XXX reorder combinations by preference */ static struct mbuf * key_getcomb_ah() { + const struct auth_hash *algo; struct sadb_comb *comb; - struct auth_hash *algo; struct mbuf *m; u_int16_t minkeysize, maxkeysize; int i; const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb)); m = NULL; for (i = 1; i <= SADB_AALG_MAX; i++) { #if 1 /* we prefer HMAC algorithms, not old algorithms */ if (i != SADB_AALG_SHA1HMAC && i != SADB_AALG_MD5HMAC && i != SADB_X_AALG_SHA2_256 && i != SADB_X_AALG_SHA2_384 && i != SADB_X_AALG_SHA2_512) continue; #endif - algo = ah_algorithm_lookup(i); + algo = auth_algorithm_lookup(i); if (!algo) continue; key_getsizes_ah(algo, i, &minkeysize, &maxkeysize); /* discard algorithms with key size smaller than system min */ if (_BITS(minkeysize) < V_ipsec_ah_keymin) continue; if (!m) { IPSEC_ASSERT(l <= MLEN, ("l=%u > MLEN=%lu", l, (u_long) MLEN)); MGET(m, M_NOWAIT, MT_DATA); if (m) { M_ALIGN(m, l); m->m_len = l; m->m_next = NULL; } } else M_PREPEND(m, l, M_NOWAIT); if (!m) return NULL; comb = mtod(m, struct sadb_comb *); bzero(comb, sizeof(*comb)); key_getcomb_setlifetime(comb); comb->sadb_comb_auth = i; comb->sadb_comb_auth_minbits = _BITS(minkeysize); comb->sadb_comb_auth_maxbits = _BITS(maxkeysize); } return m; } /* * not really an official behavior. discussed in pf_key@inner.net in Sep2000. * XXX reorder combinations by preference */ static struct mbuf * key_getcomb_ipcomp() { + const struct comp_algo *algo; struct sadb_comb *comb; - struct comp_algo *algo; struct mbuf *m; int i; const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb)); m = NULL; for (i = 1; i <= SADB_X_CALG_MAX; i++) { - algo = ipcomp_algorithm_lookup(i); + algo = comp_algorithm_lookup(i); if (!algo) continue; if (!m) { IPSEC_ASSERT(l <= MLEN, ("l=%u > MLEN=%lu", l, (u_long) MLEN)); MGET(m, M_NOWAIT, MT_DATA); if (m) { M_ALIGN(m, l); m->m_len = l; m->m_next = NULL; } } else M_PREPEND(m, l, M_NOWAIT); if (!m) return NULL; comb = mtod(m, struct sadb_comb *); bzero(comb, sizeof(*comb)); key_getcomb_setlifetime(comb); comb->sadb_comb_encrypt = i; /* what should we set into sadb_comb_*_{min,max}bits? */ } return m; } /* * XXX no way to pass mode (transport/tunnel) to userland * XXX replay checking? * XXX sysctl interface to ipsec_{ah,esp}_keymin */ static struct mbuf * key_getprop(const struct secasindex *saidx) { struct sadb_prop *prop; struct mbuf *m, *n; const int l = PFKEY_ALIGN8(sizeof(struct sadb_prop)); int totlen; switch (saidx->proto) { case IPPROTO_ESP: - m = key_getcomb_esp(); + m = key_getcomb_ealg(); break; case IPPROTO_AH: m = key_getcomb_ah(); break; case IPPROTO_IPCOMP: m = key_getcomb_ipcomp(); break; default: return NULL; } if (!m) return NULL; M_PREPEND(m, l, M_NOWAIT); if (!m) return NULL; totlen = 0; for (n = m; n; n = n->m_next) totlen += n->m_len; prop = mtod(m, struct sadb_prop *); bzero(prop, sizeof(*prop)); prop->sadb_prop_len = PFKEY_UNIT64(totlen); prop->sadb_prop_exttype = SADB_EXT_PROPOSAL; prop->sadb_prop_replay = 32; /* XXX */ return m; } /* * SADB_ACQUIRE processing called by key_checkrequest() and key_acquire2(). * send * * to KMD, and expect to receive * with SADB_ACQUIRE if error occurred, * or * with SADB_GETSPI * from KMD by PF_KEY. * * XXX x_policy is outside of RFC2367 (KAME extension). * XXX sensitivity is not supported. * XXX for ipcomp, RFC2367 does not define how to fill in proposal. * see comment for key_getcomb_ipcomp(). * * OUT: * 0 : succeed * others: error number */ static int key_acquire(const struct secasindex *saidx, struct secpolicy *sp) { union sockaddr_union addr; struct mbuf *result, *m; - struct secacq *newacq; - u_int32_t seq; + uint32_t seq; int error; - u_int16_t ul_proto; - u_int8_t mask, satype; + uint16_t ul_proto; + uint8_t mask, satype; IPSEC_ASSERT(saidx != NULL, ("null saidx")); satype = key_proto2satype(saidx->proto); IPSEC_ASSERT(satype != 0, ("null satype, protocol %u", saidx->proto)); error = -1; result = NULL; ul_proto = IPSEC_ULPROTO_ANY; - /* - * We never do anything about acquirng SA. There is anather - * solution that kernel blocks to send SADB_ACQUIRE message until - * getting something message from IKEd. In later case, to be - * managed with ACQUIRING list. - */ - /* Get an entry to check whether sending message or not. */ - if ((newacq = key_getacq(saidx)) != NULL) { - if (V_key_blockacq_count < newacq->count) { - /* reset counter and do send message. */ - newacq->count = 0; - } else { - /* increment counter and do nothing. */ - newacq->count++; - return 0; - } - } else { - /* make new entry for blocking to send SADB_ACQUIRE. */ - if ((newacq = key_newacq(saidx)) == NULL) - return ENOBUFS; - } + /* Get seq number to check whether sending message or not. */ + seq = key_getacq(saidx, &error); + if (seq == 0) + return (error); - seq = newacq->seq; m = key_setsadbmsg(SADB_ACQUIRE, 0, satype, seq, 0, 0); if (!m) { error = ENOBUFS; goto fail; } result = m; - /* - * No SADB_X_EXT_NAT_T_* here: we do not know - * anything related to NAT-T at this time. - */ - /* * set sadb_address for saidx's. * * Note that if sp is supplied, then we're being called from - * key_checkrequest and should supply port and protocol information. + * key_allocsa_policy() and should supply port and protocol + * information. + * XXXAE: why only TCP and UDP? ICMP and SCTP looks applicable too. + * XXXAE: probably we can handle this in the ipsec[46]_allocsa(). + * XXXAE: it looks like we should save this info in the ACQ entry. */ if (sp != NULL && (sp->spidx.ul_proto == IPPROTO_TCP || sp->spidx.ul_proto == IPPROTO_UDP)) ul_proto = sp->spidx.ul_proto; addr = saidx->src; mask = FULLMASK; if (ul_proto != IPSEC_ULPROTO_ANY) { switch (sp->spidx.src.sa.sa_family) { case AF_INET: if (sp->spidx.src.sin.sin_port != IPSEC_PORT_ANY) { addr.sin.sin_port = sp->spidx.src.sin.sin_port; mask = sp->spidx.prefs; } break; case AF_INET6: if (sp->spidx.src.sin6.sin6_port != IPSEC_PORT_ANY) { - addr.sin6.sin6_port = sp->spidx.src.sin6.sin6_port; + addr.sin6.sin6_port = + sp->spidx.src.sin6.sin6_port; mask = sp->spidx.prefs; } break; default: break; } } m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &addr.sa, mask, ul_proto); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); addr = saidx->dst; mask = FULLMASK; if (ul_proto != IPSEC_ULPROTO_ANY) { switch (sp->spidx.dst.sa.sa_family) { case AF_INET: if (sp->spidx.dst.sin.sin_port != IPSEC_PORT_ANY) { addr.sin.sin_port = sp->spidx.dst.sin.sin_port; mask = sp->spidx.prefd; } break; case AF_INET6: if (sp->spidx.dst.sin6.sin6_port != IPSEC_PORT_ANY) { - addr.sin6.sin6_port = sp->spidx.dst.sin6.sin6_port; + addr.sin6.sin6_port = + sp->spidx.dst.sin6.sin6_port; mask = sp->spidx.prefd; } break; default: break; } } m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &addr.sa, mask, ul_proto); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* XXX proxy address (optional) */ /* set sadb_x_policy */ - if (sp) { - m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id, sp->priority); + if (sp != NULL) { + m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id, + sp->priority); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); } /* XXX identity (optional) */ #if 0 if (idexttype && fqdn) { /* create identity extension (FQDN) */ struct sadb_ident *id; int fqdnlen; fqdnlen = strlen(fqdn) + 1; /* +1 for terminating-NUL */ id = (struct sadb_ident *)p; bzero(id, sizeof(*id) + PFKEY_ALIGN8(fqdnlen)); id->sadb_ident_len = PFKEY_UNIT64(sizeof(*id) + PFKEY_ALIGN8(fqdnlen)); id->sadb_ident_exttype = idexttype; id->sadb_ident_type = SADB_IDENTTYPE_FQDN; bcopy(fqdn, id + 1, fqdnlen); p += sizeof(struct sadb_ident) + PFKEY_ALIGN8(fqdnlen); } if (idexttype) { /* create identity extension (USERFQDN) */ struct sadb_ident *id; int userfqdnlen; if (userfqdn) { /* +1 for terminating-NUL */ userfqdnlen = strlen(userfqdn) + 1; } else userfqdnlen = 0; id = (struct sadb_ident *)p; bzero(id, sizeof(*id) + PFKEY_ALIGN8(userfqdnlen)); id->sadb_ident_len = PFKEY_UNIT64(sizeof(*id) + PFKEY_ALIGN8(userfqdnlen)); id->sadb_ident_exttype = idexttype; id->sadb_ident_type = SADB_IDENTTYPE_USERFQDN; /* XXX is it correct? */ if (curproc && curproc->p_cred) id->sadb_ident_id = curproc->p_cred->p_ruid; if (userfqdn && userfqdnlen) bcopy(userfqdn, id + 1, userfqdnlen); p += sizeof(struct sadb_ident) + PFKEY_ALIGN8(userfqdnlen); } #endif /* XXX sensitivity (optional) */ /* create proposal/combination extension */ m = key_getprop(saidx); #if 0 /* * spec conformant: always attach proposal/combination extension, * the problem is that we have no way to attach it for ipcomp, * due to the way sadb_comb is declared in RFC2367. */ if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); #else /* * outside of spec; make proposal/combination extension optional. */ if (m) m_cat(result, m); #endif if ((result->m_flags & M_PKTHDR) == 0) { error = EINVAL; goto fail; } if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) { error = ENOBUFS; goto fail; } } result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secasindex(saidx, NULL)); + return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); fail: if (result) m_freem(result); return error; } -static struct secacq * -key_newacq(const struct secasindex *saidx) +static uint32_t +key_newacq(const struct secasindex *saidx, int *perror) { - struct secacq *newacq; + struct secacq *acq; + uint32_t seq; - /* get new entry */ - newacq = malloc(sizeof(struct secacq), M_IPSEC_SAQ, M_NOWAIT|M_ZERO); - if (newacq == NULL) { + acq = malloc(sizeof(*acq), M_IPSEC_SAQ, M_NOWAIT | M_ZERO); + if (acq == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - return NULL; + *perror = ENOBUFS; + return (0); } /* copy secindex */ - bcopy(saidx, &newacq->saidx, sizeof(newacq->saidx)); - newacq->seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq); - newacq->created = time_second; - newacq->count = 0; + bcopy(saidx, &acq->saidx, sizeof(acq->saidx)); + acq->created = time_second; + acq->count = 0; /* add to acqtree */ ACQ_LOCK(); - LIST_INSERT_HEAD(&V_acqtree, newacq, chain); + seq = acq->seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq); + LIST_INSERT_HEAD(&V_acqtree, acq, chain); + LIST_INSERT_HEAD(ACQADDRHASH_HASH(saidx), acq, addrhash); + LIST_INSERT_HEAD(ACQSEQHASH_HASH(seq), acq, seqhash); ACQ_UNLOCK(); - - return newacq; + *perror = 0; + return (seq); } -static struct secacq * -key_getacq(const struct secasindex *saidx) +static uint32_t +key_getacq(const struct secasindex *saidx, int *perror) { struct secacq *acq; + uint32_t seq; ACQ_LOCK(); - LIST_FOREACH(acq, &V_acqtree, chain) { - if (key_cmpsaidx(saidx, &acq->saidx, CMP_EXACTLY)) + LIST_FOREACH(acq, ACQADDRHASH_HASH(saidx), addrhash) { + if (key_cmpsaidx(&acq->saidx, saidx, CMP_EXACTLY)) { + if (acq->count > V_key_blockacq_count) { + /* + * Reset counter and send message. + * Also reset created time to keep ACQ for + * this saidx. + */ + acq->created = time_second; + acq->count = 0; + seq = acq->seq; + } else { + /* + * Increment counter and do nothing. + * We send SADB_ACQUIRE message only + * for each V_key_blockacq_count packet. + */ + acq->count++; + seq = 0; + } break; + } } ACQ_UNLOCK(); - - return acq; + if (acq != NULL) { + *perror = 0; + return (seq); + } + /* allocate new entry */ + return (key_newacq(saidx, perror)); } -static struct secacq * -key_getacqbyseq(u_int32_t seq) +static int +key_acqreset(uint32_t seq) { struct secacq *acq; ACQ_LOCK(); - LIST_FOREACH(acq, &V_acqtree, chain) { - if (acq->seq == seq) + LIST_FOREACH(acq, ACQSEQHASH_HASH(seq), seqhash) { + if (acq->seq == seq) { + acq->count = 0; + acq->created = time_second; break; + } } ACQ_UNLOCK(); + if (acq == NULL) + return (ESRCH); + return (0); +} +/* + * Mark ACQ entry as stale to remove it in key_flush_acq(). + * Called after successful SADB_GETSPI message. + */ +static int +key_acqdone(const struct secasindex *saidx, uint32_t seq) +{ + struct secacq *acq; - return acq; + ACQ_LOCK(); + LIST_FOREACH(acq, ACQSEQHASH_HASH(seq), seqhash) { + if (acq->seq == seq) + break; + } + if (acq != NULL) { + if (key_cmpsaidx(&acq->saidx, saidx, CMP_EXACTLY) == 0) { + ipseclog((LOG_DEBUG, + "%s: Mismatched saidx for ACQ %u", __func__, seq)); + acq = NULL; + } else { + acq->created = 0; + } + } else { + ipseclog((LOG_DEBUG, + "%s: ACQ %u is not found.", __func__, seq)); + } + ACQ_UNLOCK(); + if (acq == NULL) + return (ESRCH); + return (0); } static struct secspacq * key_newspacq(struct secpolicyindex *spidx) { struct secspacq *acq; /* get new entry */ acq = malloc(sizeof(struct secspacq), M_IPSEC_SAQ, M_NOWAIT|M_ZERO); if (acq == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return NULL; } /* copy secindex */ bcopy(spidx, &acq->spidx, sizeof(acq->spidx)); acq->created = time_second; acq->count = 0; /* add to spacqtree */ SPACQ_LOCK(); LIST_INSERT_HEAD(&V_spacqtree, acq, chain); SPACQ_UNLOCK(); return acq; } static struct secspacq * key_getspacq(struct secpolicyindex *spidx) { struct secspacq *acq; SPACQ_LOCK(); LIST_FOREACH(acq, &V_spacqtree, chain) { if (key_cmpspidx_exactly(spidx, &acq->spidx)) { /* NB: return holding spacq_lock */ return acq; } } SPACQ_UNLOCK(); return NULL; } /* * SADB_ACQUIRE processing, * in first situation, is receiving * * from the ikmpd, and clear sequence of its secasvar entry. * * In second situation, is receiving * * from a user land process, and return * * to the socket. * * m will always be freed. */ static int key_acquire2(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - const struct sadb_address *src0, *dst0; + SAHTREE_RLOCK_TRACKER; + struct sadb_address *src0, *dst0; struct secasindex saidx; struct secashead *sah; - u_int16_t proto; + uint32_t reqid; int error; + uint8_t mode, proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* * Error message from KMd. * We assume that if error was occurred in IKEd, the length of PFKEY * message is equal to the size of sadb_msg structure. * We do not raise error even if error occurred in this function. */ if (mhp->msg->sadb_msg_len == PFKEY_UNIT64(sizeof(struct sadb_msg))) { - struct secacq *acq; - /* check sequence number */ - if (mhp->msg->sadb_msg_seq == 0) { + if (mhp->msg->sadb_msg_seq == 0 || + mhp->msg->sadb_msg_errno == 0) { ipseclog((LOG_DEBUG, "%s: must specify sequence " - "number.\n", __func__)); - m_freem(m); - return 0; - } - - if ((acq = key_getacqbyseq(mhp->msg->sadb_msg_seq)) == NULL) { + "number and errno.\n", __func__)); + } else { /* - * the specified larval SA is already gone, or we got - * a bogus sequence number. we can silently ignore it. + * IKEd reported that error occurred. + * XXXAE: what it expects from the kernel? + * Probably we should send SADB_ACQUIRE again? + * If so, reset ACQ's state. + * XXXAE: it looks useless. */ - m_freem(m); - return 0; + key_acqreset(mhp->msg->sadb_msg_seq); } - - /* reset acq counter in order to deletion by timehander. */ - acq->created = time_second; - acq->count = 0; m_freem(m); - return 0; + return (0); } /* * This message is from user land. */ /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - mhp->ext[SADB_EXT_PROPOSAL] == NULL) { - /* error */ - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKHDR(mhp, SADB_EXT_PROPOSAL)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_PROPOSAL] < sizeof(struct sadb_prop)) { - /* error */ - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_EXT_PROPOSAL)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; - dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifndef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { + mode = IPSEC_MODE_ANY; + reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; + } - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; + src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; + dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, EINVAL); } -#endif + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); /* get a SA index */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(&saidx), addrhash) { if (key_cmpsaidx(&sah->saidx, &saidx, CMP_MODE_REQID)) break; } - SAHTREE_UNLOCK(); + SAHTREE_RUNLOCK(); if (sah != NULL) { ipseclog((LOG_DEBUG, "%s: a SA exists already.\n", __func__)); return key_senderror(so, m, EEXIST); } error = key_acquire(&saidx, NULL); if (error != 0) { - ipseclog((LOG_DEBUG, "%s: error %d returned from key_acquire\n", - __func__, mhp->msg->sadb_msg_errno)); + ipseclog((LOG_DEBUG, + "%s: error %d returned from key_acquire()\n", + __func__, error)); return key_senderror(so, m, error); } - - return key_sendup_mbuf(so, m, KEY_SENDUP_REGISTERED); + m_freem(m); + return (0); } /* * SADB_REGISTER processing. * If SATYPE_UNSPEC has been passed as satype, only return sabd_supported. * receive * * from the ikmpd, and register a socket to send PF_KEY messages, * and send * * to KMD by PF_KEY. * If socket is detached, must free from regnode. * * m will always be freed. */ static int key_register(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { struct secreg *reg, *newreg = NULL; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* check for invalid register message */ if (mhp->msg->sadb_msg_satype >= sizeof(V_regtree)/sizeof(V_regtree[0])) return key_senderror(so, m, EINVAL); /* When SATYPE_UNSPEC is specified, only return sabd_supported. */ if (mhp->msg->sadb_msg_satype == SADB_SATYPE_UNSPEC) goto setmsg; /* check whether existing or not */ REGTREE_LOCK(); LIST_FOREACH(reg, &V_regtree[mhp->msg->sadb_msg_satype], chain) { if (reg->so == so) { REGTREE_UNLOCK(); ipseclog((LOG_DEBUG, "%s: socket exists already.\n", __func__)); return key_senderror(so, m, EEXIST); } } /* create regnode */ newreg = malloc(sizeof(struct secreg), M_IPSEC_SAR, M_NOWAIT|M_ZERO); if (newreg == NULL) { REGTREE_UNLOCK(); ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return key_senderror(so, m, ENOBUFS); } newreg->so = so; ((struct keycb *)sotorawcb(so))->kp_registered++; /* add regnode to regtree. */ LIST_INSERT_HEAD(&V_regtree[mhp->msg->sadb_msg_satype], newreg, chain); REGTREE_UNLOCK(); setmsg: { struct mbuf *n; struct sadb_msg *newmsg; struct sadb_supported *sup; u_int len, alen, elen; int off; int i; struct sadb_alg *alg; /* create new sadb_msg to reply. */ alen = 0; for (i = 1; i <= SADB_AALG_MAX; i++) { - if (ah_algorithm_lookup(i)) + if (auth_algorithm_lookup(i)) alen += sizeof(struct sadb_alg); } if (alen) alen += sizeof(struct sadb_supported); elen = 0; for (i = 1; i <= SADB_EALG_MAX; i++) { - if (esp_algorithm_lookup(i)) + if (enc_algorithm_lookup(i)) elen += sizeof(struct sadb_alg); } if (elen) elen += sizeof(struct sadb_supported); len = sizeof(struct sadb_msg) + alen + elen; if (len > MCLBYTES) return key_senderror(so, m, ENOBUFS); MGETHDR(n, M_NOWAIT, MT_DATA); if (len > MHLEN) { if (!(MCLGET(n, M_NOWAIT))) { m_freem(n); n = NULL; } } if (!n) return key_senderror(so, m, ENOBUFS); n->m_pkthdr.len = n->m_len = len; n->m_next = NULL; off = 0; m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(len); off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); /* for authentication algorithm */ if (alen) { sup = (struct sadb_supported *)(mtod(n, caddr_t) + off); sup->sadb_supported_len = PFKEY_UNIT64(alen); sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; off += PFKEY_ALIGN8(sizeof(*sup)); for (i = 1; i <= SADB_AALG_MAX; i++) { - struct auth_hash *aalgo; + const struct auth_hash *aalgo; u_int16_t minkeysize, maxkeysize; - aalgo = ah_algorithm_lookup(i); + aalgo = auth_algorithm_lookup(i); if (!aalgo) continue; alg = (struct sadb_alg *)(mtod(n, caddr_t) + off); alg->sadb_alg_id = i; alg->sadb_alg_ivlen = 0; key_getsizes_ah(aalgo, i, &minkeysize, &maxkeysize); alg->sadb_alg_minbits = _BITS(minkeysize); alg->sadb_alg_maxbits = _BITS(maxkeysize); off += PFKEY_ALIGN8(sizeof(*alg)); } } /* for encryption algorithm */ if (elen) { sup = (struct sadb_supported *)(mtod(n, caddr_t) + off); sup->sadb_supported_len = PFKEY_UNIT64(elen); sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT; off += PFKEY_ALIGN8(sizeof(*sup)); for (i = 1; i <= SADB_EALG_MAX; i++) { - struct enc_xform *ealgo; + const struct enc_xform *ealgo; - ealgo = esp_algorithm_lookup(i); + ealgo = enc_algorithm_lookup(i); if (!ealgo) continue; alg = (struct sadb_alg *)(mtod(n, caddr_t) + off); alg->sadb_alg_id = i; alg->sadb_alg_ivlen = ealgo->ivsize; alg->sadb_alg_minbits = _BITS(ealgo->minkey); alg->sadb_alg_maxbits = _BITS(ealgo->maxkey); off += PFKEY_ALIGN8(sizeof(struct sadb_alg)); } } IPSEC_ASSERT(off == len, ("length assumption failed (off %u len %u)", off, len)); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_REGISTERED); } } /* * free secreg entry registered. * XXX: I want to do free a socket marked done SADB_RESIGER to socket. */ void key_freereg(struct socket *so) { struct secreg *reg; int i; IPSEC_ASSERT(so != NULL, ("NULL so")); /* * check whether existing or not. * check all type of SA, because there is a potential that * one socket is registered to multiple type of SA. */ REGTREE_LOCK(); for (i = 0; i <= SADB_SATYPE_MAX; i++) { LIST_FOREACH(reg, &V_regtree[i], chain) { if (reg->so == so && __LIST_CHAINED(reg)) { LIST_REMOVE(reg, chain); free(reg, M_IPSEC_SAR); break; } } } REGTREE_UNLOCK(); } /* * SADB_EXPIRE processing * send * * to KMD by PF_KEY. * NOTE: We send only soft lifetime extension. * * OUT: 0 : succeed * others : error number */ static int key_expire(struct secasvar *sav, int hard) { - int satype; struct mbuf *result = NULL, *m; - int len; - int error = -1; struct sadb_lifetime *lt; - u_int32_t replay_count; + uint32_t replay_count; + int error, len; + uint8_t satype; IPSEC_ASSERT (sav != NULL, ("null sav")); IPSEC_ASSERT (sav->sah != NULL, ("null sa header")); + KEYDBG(KEY_STAMP, + printf("%s: SA(%p) expired %s lifetime\n", __func__, + sav, hard ? "hard": "soft")); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); /* set msg header */ satype = key_proto2satype(sav->sah->saidx.proto); IPSEC_ASSERT(satype != 0, ("invalid proto, satype %u", satype)); m = key_setsadbmsg(SADB_EXPIRE, 0, satype, sav->seq, 0, sav->refcnt); if (!m) { error = ENOBUFS; goto fail; } result = m; /* create SA extension */ m = key_setsadbsa(sav); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* create SA extension */ SECASVAR_LOCK(sav); replay_count = sav->replay ? sav->replay->count : 0; SECASVAR_UNLOCK(sav); m = key_setsadbxsa2(sav->sah->saidx.mode, replay_count, sav->sah->saidx.reqid); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); if (sav->replay && sav->replay->wsize > UINT8_MAX) { m = key_setsadbxsareplay(sav->replay->wsize); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); } /* create lifetime extension (current and soft) */ len = PFKEY_ALIGN8(sizeof(*lt)) * 2; m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) { error = ENOBUFS; goto fail; } m_align(m, len); m->m_len = len; bzero(mtod(m, caddr_t), len); lt = mtod(m, struct sadb_lifetime *); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; - lt->sadb_lifetime_allocations = sav->lft_c->allocations; - lt->sadb_lifetime_bytes = sav->lft_c->bytes; - lt->sadb_lifetime_addtime = sav->lft_c->addtime; - lt->sadb_lifetime_usetime = sav->lft_c->usetime; + lt->sadb_lifetime_allocations = + (uint32_t)counter_u64_fetch(sav->lft_c_allocations); + lt->sadb_lifetime_bytes = + counter_u64_fetch(sav->lft_c_bytes); + lt->sadb_lifetime_addtime = sav->created; + lt->sadb_lifetime_usetime = sav->firstused; lt = (struct sadb_lifetime *)(mtod(m, caddr_t) + len / 2); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); if (hard) { lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lt->sadb_lifetime_allocations = sav->lft_h->allocations; lt->sadb_lifetime_bytes = sav->lft_h->bytes; lt->sadb_lifetime_addtime = sav->lft_h->addtime; lt->sadb_lifetime_usetime = sav->lft_h->usetime; } else { lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; lt->sadb_lifetime_allocations = sav->lft_s->allocations; lt->sadb_lifetime_bytes = sav->lft_s->bytes; lt->sadb_lifetime_addtime = sav->lft_s->addtime; lt->sadb_lifetime_usetime = sav->lft_s->usetime; } m_cat(result, m); /* set sadb_address for source */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sav->sah->saidx.src.sa, FULLMASK, IPSEC_ULPROTO_ANY); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* set sadb_address for destination */ m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &sav->sah->saidx.dst.sa, FULLMASK, IPSEC_ULPROTO_ANY); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* * XXX-BZ Handle NAT-T extensions here. + * XXXAE: it doesn't seem quite useful. IKEs should not depend on + * this information, we report only significant SA fields. */ if ((result->m_flags & M_PKTHDR) == 0) { error = EINVAL; goto fail; } if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) { error = ENOBUFS; goto fail; } } result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); fail: if (result) m_freem(result); return error; } +static void +key_freesah_flushed(struct secashead_queue *flushq) +{ + struct secashead *sah, *nextsah; + struct secasvar *sav, *nextsav; + + sah = TAILQ_FIRST(flushq); + while (sah != NULL) { + sav = TAILQ_FIRST(&sah->savtree_larval); + while (sav != NULL) { + nextsav = TAILQ_NEXT(sav, chain); + TAILQ_REMOVE(&sah->savtree_larval, sav, chain); + key_freesav(&sav); /* release last reference */ + key_freesah(&sah); /* release reference from SAV */ + sav = nextsav; + } + sav = TAILQ_FIRST(&sah->savtree_alive); + while (sav != NULL) { + nextsav = TAILQ_NEXT(sav, chain); + TAILQ_REMOVE(&sah->savtree_alive, sav, chain); + key_freesav(&sav); /* release last reference */ + key_freesah(&sah); /* release reference from SAV */ + sav = nextsav; + } + nextsah = TAILQ_NEXT(sah, chain); + key_freesah(&sah); /* release last reference */ + sah = nextsah; + } +} + /* * SADB_FLUSH processing * receive * * from the ikmpd, and free all entries in secastree. * and send, * * to the ikmpd. * NOTE: to do is only marking SADB_SASTATE_DEAD. * * m will always be freed. */ static int key_flush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + struct secashead_queue flushq; struct sadb_msg *newmsg; struct secashead *sah, *nextsah; - struct secasvar *sav, *nextsav; - u_int16_t proto; - u_int8_t state; - u_int stateidx; + struct secasvar *sav; + uint8_t proto; + int i; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } - - /* no SATYPE specified, i.e. flushing all SA. */ - SAHTREE_LOCK(); - for (sah = LIST_FIRST(&V_sahtree); - sah != NULL; - sah = nextsah) { - nextsah = LIST_NEXT(sah, chain); - - if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC - && proto != sah->saidx.proto) - continue; - - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_alive); - stateidx++) { - state = saorder_state_any[stateidx]; - for (sav = LIST_FIRST(&sah->savtree[state]); - sav != NULL; - sav = nextsav) { - - nextsav = LIST_NEXT(sav, chain); - - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - KEY_FREESAV(&sav); + KEYDBG(KEY_STAMP, + printf("%s: proto %u\n", __func__, proto)); + + TAILQ_INIT(&flushq); + if (proto == IPSEC_PROTO_ANY) { + /* no SATYPE specified, i.e. flushing all SA. */ + SAHTREE_WLOCK(); + /* Move all SAHs into flushq */ + TAILQ_CONCAT(&flushq, &V_sahtree, chain); + /* Flush all buckets in SPI hash */ + for (i = 0; i < V_savhash_mask + 1; i++) + LIST_INIT(&V_savhashtbl[i]); + /* Flush all buckets in SAHADDRHASH */ + for (i = 0; i < V_sahaddrhash_mask + 1; i++) + LIST_INIT(&V_sahaddrhashtbl[i]); + /* Mark all SAHs as unlinked */ + TAILQ_FOREACH(sah, &flushq, chain) { + sah->state = SADB_SASTATE_DEAD; + /* + * Callout handler makes its job using + * RLOCK and drain queues. In case, when this + * function will be called just before it + * acquires WLOCK, we need to mark SAs as + * unlinked to prevent second unlink. + */ + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + sav->state = SADB_SASTATE_DEAD; + } + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + sav->state = SADB_SASTATE_DEAD; } } - - sah->state = SADB_SASTATE_DEAD; + SAHTREE_WUNLOCK(); + } else { + SAHTREE_WLOCK(); + sah = TAILQ_FIRST(&V_sahtree); + while (sah != NULL) { + IPSEC_ASSERT(sah->state != SADB_SASTATE_DEAD, + ("DEAD SAH %p in SADB_FLUSH", sah)); + nextsah = TAILQ_NEXT(sah, chain); + if (sah->saidx.proto != proto) { + sah = nextsah; + continue; + } + sah->state = SADB_SASTATE_DEAD; + TAILQ_REMOVE(&V_sahtree, sah, chain); + LIST_REMOVE(sah, addrhash); + /* Unlink all SAs from SPI hash */ + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + } + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + } + /* Add SAH into flushq */ + TAILQ_INSERT_HEAD(&flushq, sah, chain); + sah = nextsah; + } + SAHTREE_WUNLOCK(); } - SAHTREE_UNLOCK(); + key_freesah_flushed(&flushq); + /* Free all queued SAs and SAHs */ if (m->m_len < sizeof(struct sadb_msg) || sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return key_senderror(so, m, ENOBUFS); } if (m->m_next) m_freem(m->m_next); m->m_next = NULL; m->m_pkthdr.len = m->m_len = sizeof(struct sadb_msg); newmsg = mtod(m, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len); return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } /* * SADB_DUMP processing * dump all entries including status of DEAD in SAD. * receive * * from the ikmpd, and dump all secasvar leaves * and send, * ..... * to the ikmpd. * * m will always be freed. */ static int key_dump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + SAHTREE_RLOCK_TRACKER; struct secashead *sah; struct secasvar *sav; - u_int16_t proto; - u_int stateidx; - u_int8_t satype; - u_int8_t state; - int cnt; struct sadb_msg *newmsg; struct mbuf *n; + uint32_t cnt; + uint8_t proto, satype; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } /* count sav entries to be sent to the userland. */ cnt = 0; - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC - && proto != sah->saidx.proto) + SAHTREE_RLOCK(); + TAILQ_FOREACH(sah, &V_sahtree, chain) { + if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && + proto != sah->saidx.proto) continue; - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_any); - stateidx++) { - state = saorder_state_any[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - cnt++; - } - } + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) + cnt++; + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) + cnt++; } if (cnt == 0) { - SAHTREE_UNLOCK(); + SAHTREE_RUNLOCK(); return key_senderror(so, m, ENOENT); } /* send this to the userland, one at a time. */ newmsg = NULL; - LIST_FOREACH(sah, &V_sahtree, chain) { - if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC - && proto != sah->saidx.proto) + TAILQ_FOREACH(sah, &V_sahtree, chain) { + if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && + proto != sah->saidx.proto) continue; /* map proto to satype */ if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { - SAHTREE_UNLOCK(); + SAHTREE_RUNLOCK(); ipseclog((LOG_DEBUG, "%s: there was invalid proto in " - "SAD.\n", __func__)); + "SAD.\n", __func__)); return key_senderror(so, m, EINVAL); } - - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_any); - stateidx++) { - state = saorder_state_any[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - n = key_setdumpsa(sav, SADB_DUMP, satype, - --cnt, mhp->msg->sadb_msg_pid); - if (!n) { - SAHTREE_UNLOCK(); - return key_senderror(so, m, ENOBUFS); - } - key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + n = key_setdumpsa(sav, SADB_DUMP, satype, + --cnt, mhp->msg->sadb_msg_pid); + if (n == NULL) { + SAHTREE_RUNLOCK(); + return key_senderror(so, m, ENOBUFS); } + key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + } + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + n = key_setdumpsa(sav, SADB_DUMP, satype, + --cnt, mhp->msg->sadb_msg_pid); + if (n == NULL) { + SAHTREE_RUNLOCK(); + return key_senderror(so, m, ENOBUFS); + } + key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } - SAHTREE_UNLOCK(); - + SAHTREE_RUNLOCK(); m_freem(m); - return 0; + return (0); } - /* * SADB_X_PROMISC processing * * m will always be freed. */ static int key_promisc(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { int olen; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); olen = PFKEY_UNUNIT64(mhp->msg->sadb_msg_len); if (olen < sizeof(struct sadb_msg)) { #if 1 return key_senderror(so, m, EINVAL); #else m_freem(m); return 0; #endif } else if (olen == sizeof(struct sadb_msg)) { /* enable/disable promisc mode */ struct keycb *kp; if ((kp = (struct keycb *)sotorawcb(so)) == NULL) return key_senderror(so, m, EINVAL); mhp->msg->sadb_msg_errno = 0; switch (mhp->msg->sadb_msg_satype) { case 0: case 1: kp->kp_promisc = mhp->msg->sadb_msg_satype; break; default: return key_senderror(so, m, EINVAL); } /* send the original message back to everyone */ mhp->msg->sadb_msg_errno = 0; return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } else { /* send packet as is */ m_adj(m, PFKEY_ALIGN8(sizeof(struct sadb_msg))); /* TODO: if sadb_msg_seq is specified, send to specific pid */ return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } } static int (*key_typesw[])(struct socket *, struct mbuf *, const struct sadb_msghdr *) = { NULL, /* SADB_RESERVED */ key_getspi, /* SADB_GETSPI */ key_update, /* SADB_UPDATE */ key_add, /* SADB_ADD */ key_delete, /* SADB_DELETE */ key_get, /* SADB_GET */ key_acquire2, /* SADB_ACQUIRE */ key_register, /* SADB_REGISTER */ NULL, /* SADB_EXPIRE */ key_flush, /* SADB_FLUSH */ key_dump, /* SADB_DUMP */ key_promisc, /* SADB_X_PROMISC */ NULL, /* SADB_X_PCHANGE */ key_spdadd, /* SADB_X_SPDUPDATE */ key_spdadd, /* SADB_X_SPDADD */ key_spddelete, /* SADB_X_SPDDELETE */ key_spdget, /* SADB_X_SPDGET */ NULL, /* SADB_X_SPDACQUIRE */ key_spddump, /* SADB_X_SPDDUMP */ key_spdflush, /* SADB_X_SPDFLUSH */ key_spdadd, /* SADB_X_SPDSETIDX */ NULL, /* SADB_X_SPDEXPIRE */ key_spddelete2, /* SADB_X_SPDDELETE2 */ }; /* * parse sadb_msg buffer to process PFKEYv2, * and create a data to response if needed. * I think to be dealed with mbuf directly. * IN: * msgp : pointer to pointer to a received buffer pulluped. * This is rewrited to response. * so : pointer to socket. * OUT: * length for buffer to send to user process. */ int key_parse(struct mbuf *m, struct socket *so) { struct sadb_msg *msg; struct sadb_msghdr mh; u_int orglen; int error; int target; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); -#if 0 /*kdebug_sadb assumes msg in linear buffer*/ - KEYDEBUG(KEYDEBUG_KEY_DUMP, - ipseclog((LOG_DEBUG, "%s: passed sadb_msg\n", __func__)); - kdebug_sadb(msg)); -#endif - if (m->m_len < sizeof(struct sadb_msg)) { m = m_pullup(m, sizeof(struct sadb_msg)); if (!m) return ENOBUFS; } msg = mtod(m, struct sadb_msg *); orglen = PFKEY_UNUNIT64(msg->sadb_msg_len); target = KEY_SENDUP_ONE; if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len != orglen) { ipseclog((LOG_DEBUG, "%s: invalid message length.\n",__func__)); PFKEYSTAT_INC(out_invlen); error = EINVAL; goto senderror; } if (msg->sadb_msg_version != PF_KEY_V2) { ipseclog((LOG_DEBUG, "%s: PF_KEY version %u is mismatched.\n", __func__, msg->sadb_msg_version)); PFKEYSTAT_INC(out_invver); error = EINVAL; goto senderror; } if (msg->sadb_msg_type > SADB_MAX) { ipseclog((LOG_DEBUG, "%s: invalid type %u is passed.\n", __func__, msg->sadb_msg_type)); PFKEYSTAT_INC(out_invmsgtype); error = EINVAL; goto senderror; } /* for old-fashioned code - should be nuked */ if (m->m_pkthdr.len > MCLBYTES) { m_freem(m); return ENOBUFS; } if (m->m_next) { struct mbuf *n; MGETHDR(n, M_NOWAIT, MT_DATA); if (n && m->m_pkthdr.len > MHLEN) { if (!(MCLGET(n, M_NOWAIT))) { m_free(n); n = NULL; } } if (!n) { m_freem(m); return ENOBUFS; } m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t)); n->m_pkthdr.len = n->m_len = m->m_pkthdr.len; n->m_next = NULL; m_freem(m); m = n; } /* align the mbuf chain so that extensions are in contiguous region. */ error = key_align(m, &mh); if (error) return error; msg = mh.msg; /* check SA type */ switch (msg->sadb_msg_satype) { case SADB_SATYPE_UNSPEC: switch (msg->sadb_msg_type) { case SADB_GETSPI: case SADB_UPDATE: case SADB_ADD: case SADB_DELETE: case SADB_GET: case SADB_ACQUIRE: case SADB_EXPIRE: ipseclog((LOG_DEBUG, "%s: must specify satype " "when msg type=%u.\n", __func__, msg->sadb_msg_type)); PFKEYSTAT_INC(out_invsatype); error = EINVAL; goto senderror; } break; case SADB_SATYPE_AH: case SADB_SATYPE_ESP: case SADB_X_SATYPE_IPCOMP: case SADB_X_SATYPE_TCPSIGNATURE: switch (msg->sadb_msg_type) { case SADB_X_SPDADD: case SADB_X_SPDDELETE: case SADB_X_SPDGET: case SADB_X_SPDDUMP: case SADB_X_SPDFLUSH: case SADB_X_SPDSETIDX: case SADB_X_SPDUPDATE: case SADB_X_SPDDELETE2: ipseclog((LOG_DEBUG, "%s: illegal satype=%u\n", __func__, msg->sadb_msg_type)); PFKEYSTAT_INC(out_invsatype); error = EINVAL; goto senderror; } break; case SADB_SATYPE_RSVP: case SADB_SATYPE_OSPFV2: case SADB_SATYPE_RIPV2: case SADB_SATYPE_MIP: ipseclog((LOG_DEBUG, "%s: type %u isn't supported.\n", __func__, msg->sadb_msg_satype)); PFKEYSTAT_INC(out_invsatype); error = EOPNOTSUPP; goto senderror; case 1: /* XXX: What does it do? */ if (msg->sadb_msg_type == SADB_X_PROMISC) break; /*FALLTHROUGH*/ default: ipseclog((LOG_DEBUG, "%s: invalid type %u is passed.\n", __func__, msg->sadb_msg_satype)); PFKEYSTAT_INC(out_invsatype); error = EINVAL; goto senderror; } /* check field of upper layer protocol and address family */ if (mh.ext[SADB_EXT_ADDRESS_SRC] != NULL && mh.ext[SADB_EXT_ADDRESS_DST] != NULL) { struct sadb_address *src0, *dst0; u_int plen; src0 = (struct sadb_address *)(mh.ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mh.ext[SADB_EXT_ADDRESS_DST]); /* check upper layer protocol */ if (src0->sadb_address_proto != dst0->sadb_address_proto) { ipseclog((LOG_DEBUG, "%s: upper layer protocol " "mismatched.\n", __func__)); PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } /* check family */ if (PFKEY_ADDR_SADDR(src0)->sa_family != PFKEY_ADDR_SADDR(dst0)->sa_family) { ipseclog((LOG_DEBUG, "%s: address family mismatched.\n", __func__)); PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } if (PFKEY_ADDR_SADDR(src0)->sa_len != PFKEY_ADDR_SADDR(dst0)->sa_len) { ipseclog((LOG_DEBUG, "%s: address struct size " "mismatched.\n", __func__)); PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } switch (PFKEY_ADDR_SADDR(src0)->sa_family) { case AF_INET: if (PFKEY_ADDR_SADDR(src0)->sa_len != sizeof(struct sockaddr_in)) { PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } break; case AF_INET6: if (PFKEY_ADDR_SADDR(src0)->sa_len != sizeof(struct sockaddr_in6)) { PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } break; default: ipseclog((LOG_DEBUG, "%s: unsupported address family\n", __func__)); PFKEYSTAT_INC(out_invaddr); error = EAFNOSUPPORT; goto senderror; } switch (PFKEY_ADDR_SADDR(src0)->sa_family) { case AF_INET: plen = sizeof(struct in_addr) << 3; break; case AF_INET6: plen = sizeof(struct in6_addr) << 3; break; default: plen = 0; /*fool gcc*/ break; } /* check max prefix length */ if (src0->sadb_address_prefixlen > plen || dst0->sadb_address_prefixlen > plen) { ipseclog((LOG_DEBUG, "%s: illegal prefixlen.\n", __func__)); PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } /* * prefixlen == 0 is valid because there can be a case when * all addresses are matched. */ } if (msg->sadb_msg_type >= nitems(key_typesw) || key_typesw[msg->sadb_msg_type] == NULL) { PFKEYSTAT_INC(out_invmsgtype); error = EINVAL; goto senderror; } return (*key_typesw[msg->sadb_msg_type])(so, m, &mh); senderror: msg->sadb_msg_errno = error; return key_sendup_mbuf(so, m, target); } static int key_senderror(struct socket *so, struct mbuf *m, int code) { struct sadb_msg *msg; IPSEC_ASSERT(m->m_len >= sizeof(struct sadb_msg), ("mbuf too small, len %u", m->m_len)); msg = mtod(m, struct sadb_msg *); msg->sadb_msg_errno = code; return key_sendup_mbuf(so, m, KEY_SENDUP_ONE); } /* * set the pointer to each header into message buffer. * m will be freed on error. * XXX larger-than-MCLBYTES extension? */ static int key_align(struct mbuf *m, struct sadb_msghdr *mhp) { struct mbuf *n; struct sadb_ext *ext; size_t off, end; int extlen; int toff; IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(m->m_len >= sizeof(struct sadb_msg), ("mbuf too small, len %u", m->m_len)); /* initialize */ bzero(mhp, sizeof(*mhp)); mhp->msg = mtod(m, struct sadb_msg *); mhp->ext[0] = (struct sadb_ext *)mhp->msg; /*XXX backward compat */ end = PFKEY_UNUNIT64(mhp->msg->sadb_msg_len); extlen = end; /*just in case extlen is not updated*/ for (off = sizeof(struct sadb_msg); off < end; off += extlen) { n = m_pulldown(m, off, sizeof(struct sadb_ext), &toff); if (!n) { /* m is already freed */ return ENOBUFS; } ext = (struct sadb_ext *)(mtod(n, caddr_t) + toff); /* set pointer */ switch (ext->sadb_ext_type) { case SADB_EXT_SA: case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: case SADB_EXT_LIFETIME_CURRENT: case SADB_EXT_LIFETIME_HARD: case SADB_EXT_LIFETIME_SOFT: case SADB_EXT_KEY_AUTH: case SADB_EXT_KEY_ENCRYPT: case SADB_EXT_IDENTITY_SRC: case SADB_EXT_IDENTITY_DST: case SADB_EXT_SENSITIVITY: case SADB_EXT_PROPOSAL: case SADB_EXT_SUPPORTED_AUTH: case SADB_EXT_SUPPORTED_ENCRYPT: case SADB_EXT_SPIRANGE: case SADB_X_EXT_POLICY: case SADB_X_EXT_SA2: -#ifdef IPSEC_NAT_T case SADB_X_EXT_NAT_T_TYPE: case SADB_X_EXT_NAT_T_SPORT: case SADB_X_EXT_NAT_T_DPORT: case SADB_X_EXT_NAT_T_OAI: case SADB_X_EXT_NAT_T_OAR: case SADB_X_EXT_NAT_T_FRAG: -#endif case SADB_X_EXT_SA_REPLAY: + case SADB_X_EXT_NEW_ADDRESS_SRC: + case SADB_X_EXT_NEW_ADDRESS_DST: /* duplicate check */ /* * XXX Are there duplication payloads of either * KEY_AUTH or KEY_ENCRYPT ? */ if (mhp->ext[ext->sadb_ext_type] != NULL) { ipseclog((LOG_DEBUG, "%s: duplicate ext_type " "%u\n", __func__, ext->sadb_ext_type)); m_freem(m); PFKEYSTAT_INC(out_dupext); return EINVAL; } break; default: ipseclog((LOG_DEBUG, "%s: invalid ext_type %u\n", __func__, ext->sadb_ext_type)); m_freem(m); PFKEYSTAT_INC(out_invexttype); return EINVAL; } extlen = PFKEY_UNUNIT64(ext->sadb_ext_len); if (key_validate_ext(ext, extlen)) { m_freem(m); PFKEYSTAT_INC(out_invlen); return EINVAL; } n = m_pulldown(m, off, extlen, &toff); if (!n) { /* m is already freed */ return ENOBUFS; } ext = (struct sadb_ext *)(mtod(n, caddr_t) + toff); mhp->ext[ext->sadb_ext_type] = ext; mhp->extoff[ext->sadb_ext_type] = off; mhp->extlen[ext->sadb_ext_type] = extlen; } if (off != end) { m_freem(m); PFKEYSTAT_INC(out_invlen); return EINVAL; } return 0; } static int key_validate_ext(const struct sadb_ext *ext, int len) { const struct sockaddr *sa; enum { NONE, ADDR } checktype = NONE; int baselen = 0; const int sal = offsetof(struct sockaddr, sa_len) + sizeof(sa->sa_len); if (len != PFKEY_UNUNIT64(ext->sadb_ext_len)) return EINVAL; /* if it does not match minimum/maximum length, bail */ if (ext->sadb_ext_type >= nitems(minsize) || ext->sadb_ext_type >= nitems(maxsize)) return EINVAL; if (!minsize[ext->sadb_ext_type] || len < minsize[ext->sadb_ext_type]) return EINVAL; if (maxsize[ext->sadb_ext_type] && len > maxsize[ext->sadb_ext_type]) return EINVAL; /* more checks based on sadb_ext_type XXX need more */ switch (ext->sadb_ext_type) { case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: + case SADB_X_EXT_NAT_T_OAI: + case SADB_X_EXT_NAT_T_OAR: + case SADB_X_EXT_NEW_ADDRESS_SRC: + case SADB_X_EXT_NEW_ADDRESS_DST: baselen = PFKEY_ALIGN8(sizeof(struct sadb_address)); checktype = ADDR; break; case SADB_EXT_IDENTITY_SRC: case SADB_EXT_IDENTITY_DST: if (((const struct sadb_ident *)ext)->sadb_ident_type == SADB_X_IDENTTYPE_ADDR) { baselen = PFKEY_ALIGN8(sizeof(struct sadb_ident)); checktype = ADDR; } else checktype = NONE; break; default: checktype = NONE; break; } switch (checktype) { case NONE: break; case ADDR: sa = (const struct sockaddr *)(((const u_int8_t*)ext)+baselen); if (len < baselen + sal) return EINVAL; if (baselen + PFKEY_ALIGN8(sa->sa_len) != len) return EINVAL; break; } return 0; } void key_init(void) { int i; - for (i = 0; i < IPSEC_DIR_MAX; i++) + for (i = 0; i < IPSEC_DIR_MAX; i++) { TAILQ_INIT(&V_sptree[i]); + TAILQ_INIT(&V_sptree_ifnet[i]); + } - LIST_INIT(&V_sahtree); + V_key_lft_zone = uma_zcreate("IPsec SA lft_c", + sizeof(uint64_t) * 2, NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_PCPU); + + TAILQ_INIT(&V_sahtree); + V_sphashtbl = hashinit(SPHASH_NHASH, M_IPSEC_SP, &V_sphash_mask); + V_savhashtbl = hashinit(SAVHASH_NHASH, M_IPSEC_SA, &V_savhash_mask); + V_sahaddrhashtbl = hashinit(SAHHASH_NHASH, M_IPSEC_SAH, + &V_sahaddrhash_mask); + V_acqaddrhashtbl = hashinit(ACQHASH_NHASH, M_IPSEC_SAQ, + &V_acqaddrhash_mask); + V_acqseqhashtbl = hashinit(ACQHASH_NHASH, M_IPSEC_SAQ, + &V_acqseqhash_mask); for (i = 0; i <= SADB_SATYPE_MAX; i++) LIST_INIT(&V_regtree[i]); LIST_INIT(&V_acqtree); LIST_INIT(&V_spacqtree); if (!IS_DEFAULT_VNET(curvnet)) return; + XFORMS_LOCK_INIT(); SPTREE_LOCK_INIT(); REGTREE_LOCK_INIT(); SAHTREE_LOCK_INIT(); ACQ_LOCK_INIT(); SPACQ_LOCK_INIT(); #ifndef IPSEC_DEBUG2 callout_init(&key_timer, 1); callout_reset(&key_timer, hz, key_timehandler, NULL); #endif /*IPSEC_DEBUG2*/ /* initialize key statistics */ keystat.getspi_count = 1; if (bootverbose) printf("IPsec: Initialized Security Association Processing.\n"); } #ifdef VIMAGE void key_destroy(void) { - TAILQ_HEAD(, secpolicy) drainq; + struct secashead_queue sahdrainq; + struct secpolicy_queue drainq; struct secpolicy *sp, *nextsp; struct secacq *acq, *nextacq; struct secspacq *spacq, *nextspacq; - struct secashead *sah, *nextsah; + struct secashead *sah; + struct secasvar *sav; struct secreg *reg; int i; + /* + * XXX: can we just call free() for each object without + * walking through safe way with releasing references? + */ TAILQ_INIT(&drainq); SPTREE_WLOCK(); for (i = 0; i < IPSEC_DIR_MAX; i++) { TAILQ_CONCAT(&drainq, &V_sptree[i], chain); + TAILQ_CONCAT(&drainq, &V_sptree_ifnet[i], chain); } SPTREE_WUNLOCK(); sp = TAILQ_FIRST(&drainq); while (sp != NULL) { nextsp = TAILQ_NEXT(sp, chain); - KEY_FREESP(&sp); + key_freesp(&sp); sp = nextsp; } - SAHTREE_LOCK(); - for (sah = LIST_FIRST(&V_sahtree); sah != NULL; sah = nextsah) { - nextsah = LIST_NEXT(sah, chain); - if (__LIST_CHAINED(sah)) { - LIST_REMOVE(sah, chain); - free(sah, M_IPSEC_SAH); + TAILQ_INIT(&sahdrainq); + SAHTREE_WLOCK(); + TAILQ_CONCAT(&sahdrainq, &V_sahtree, chain); + for (i = 0; i < V_savhash_mask + 1; i++) + LIST_INIT(&V_savhashtbl[i]); + for (i = 0; i < V_sahaddrhash_mask + 1; i++) + LIST_INIT(&V_sahaddrhashtbl[i]); + TAILQ_FOREACH(sah, &sahdrainq, chain) { + sah->state = SADB_SASTATE_DEAD; + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + sav->state = SADB_SASTATE_DEAD; + } + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + sav->state = SADB_SASTATE_DEAD; } } - SAHTREE_UNLOCK(); + SAHTREE_WUNLOCK(); + + key_freesah_flushed(&sahdrainq); + hashdestroy(V_sphashtbl, M_IPSEC_SP, V_sphash_mask); + hashdestroy(V_savhashtbl, M_IPSEC_SA, V_savhash_mask); + hashdestroy(V_sahaddrhashtbl, M_IPSEC_SAH, V_sahaddrhash_mask); REGTREE_LOCK(); for (i = 0; i <= SADB_SATYPE_MAX; i++) { LIST_FOREACH(reg, &V_regtree[i], chain) { if (__LIST_CHAINED(reg)) { LIST_REMOVE(reg, chain); free(reg, M_IPSEC_SAR); break; } } } REGTREE_UNLOCK(); ACQ_LOCK(); - for (acq = LIST_FIRST(&V_acqtree); acq != NULL; acq = nextacq) { + acq = LIST_FIRST(&V_acqtree); + while (acq != NULL) { nextacq = LIST_NEXT(acq, chain); - if (__LIST_CHAINED(acq)) { - LIST_REMOVE(acq, chain); - free(acq, M_IPSEC_SAQ); - } + LIST_REMOVE(acq, chain); + free(acq, M_IPSEC_SAQ); + acq = nextacq; } ACQ_UNLOCK(); SPACQ_LOCK(); for (spacq = LIST_FIRST(&V_spacqtree); spacq != NULL; spacq = nextspacq) { nextspacq = LIST_NEXT(spacq, chain); if (__LIST_CHAINED(spacq)) { LIST_REMOVE(spacq, chain); free(spacq, M_IPSEC_SAQ); } } SPACQ_UNLOCK(); + hashdestroy(V_acqaddrhashtbl, M_IPSEC_SAQ, V_acqaddrhash_mask); + hashdestroy(V_acqseqhashtbl, M_IPSEC_SAQ, V_acqseqhash_mask); + uma_zdestroy(V_key_lft_zone); } #endif -/* - * XXX: maybe This function is called after INBOUND IPsec processing. - * - * Special check for tunnel-mode packets. - * We must make some checks for consistency between inner and outer IP header. - * - * xxx more checks to be provided - */ -int -key_checktunnelsanity(struct secasvar *sav, u_int family, caddr_t src, - caddr_t dst) -{ - IPSEC_ASSERT(sav->sah != NULL, ("null SA header")); - - /* XXX: check inner IP header */ - - return 1; -} - /* record data transfer on SA, and update timestamps */ void key_sa_recordxfer(struct secasvar *sav, struct mbuf *m) { IPSEC_ASSERT(sav != NULL, ("Null secasvar")); IPSEC_ASSERT(m != NULL, ("Null mbuf")); - if (!sav->lft_c) - return; /* * XXX Currently, there is a difference of bytes size * between inbound and outbound processing. */ - sav->lft_c->bytes += m->m_pkthdr.len; - /* to check bytes lifetime is done in key_timehandler(). */ + counter_u64_add(sav->lft_c_bytes, m->m_pkthdr.len); /* * We use the number of packets as the unit of * allocations. We increment the variable * whenever {esp,ah}_{in,out}put is called. */ - sav->lft_c->allocations++; - /* XXX check for expires? */ + counter_u64_add(sav->lft_c_allocations, 1); /* * NOTE: We record CURRENT usetime by using wall clock, * in seconds. HARD and SOFT lifetime are measured by the time * difference (again in seconds) from usetime. * * usetime * v expire expire * -----+-----+--------+---> t * <--------------> HARD * <-----> SOFT */ - sav->lft_c->usetime = time_second; - /* XXX check for expires? */ - - return; -} - -static void -key_sa_chgstate(struct secasvar *sav, u_int8_t state) -{ - IPSEC_ASSERT(sav != NULL, ("NULL sav")); - SAHTREE_LOCK_ASSERT(); - - if (sav->state != state) { - if (__LIST_CHAINED(sav)) - LIST_REMOVE(sav, chain); - sav->state = state; - LIST_INSERT_HEAD(&sav->sah->savtree[state], sav, chain); - } + if (sav->firstused == 0) + sav->firstused = time_second; } /* * Take one of the kernel's security keys and convert it into a PF_KEY * structure within an mbuf, suitable for sending up to a waiting * application in user land. * * IN: * src: A pointer to a kernel security key. * exttype: Which type of key this is. Refer to the PF_KEY data structures. * OUT: * a valid mbuf or NULL indicating an error * */ static struct mbuf * -key_setkey(struct seckey *src, u_int16_t exttype) +key_setkey(struct seckey *src, uint16_t exttype) { struct mbuf *m; struct sadb_key *p; int len; if (src == NULL) return NULL; len = PFKEY_ALIGN8(sizeof(struct sadb_key) + _KEYLEN(src)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return NULL; m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_key *); bzero(p, len); p->sadb_key_len = PFKEY_UNIT64(len); p->sadb_key_exttype = exttype; p->sadb_key_bits = src->bits; bcopy(src->key_data, _KEYBUF(p), _KEYLEN(src)); return m; } /* * Take one of the kernel's lifetime data structures and convert it * into a PF_KEY structure within an mbuf, suitable for sending up to * a waiting application in user land. * * IN: * src: A pointer to a kernel lifetime structure. * exttype: Which type of lifetime this is. Refer to the PF_KEY * data structures for more information. * OUT: * a valid mbuf or NULL indicating an error * */ static struct mbuf * -key_setlifetime(struct seclifetime *src, u_int16_t exttype) +key_setlifetime(struct seclifetime *src, uint16_t exttype) { struct mbuf *m = NULL; struct sadb_lifetime *p; int len = PFKEY_ALIGN8(sizeof(struct sadb_lifetime)); if (src == NULL) return NULL; m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return m; m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_lifetime *); bzero(p, len); p->sadb_lifetime_len = PFKEY_UNIT64(len); p->sadb_lifetime_exttype = exttype; p->sadb_lifetime_allocations = src->allocations; p->sadb_lifetime_bytes = src->bytes; p->sadb_lifetime_addtime = src->addtime; p->sadb_lifetime_usetime = src->usetime; return m; } + +const struct enc_xform * +enc_algorithm_lookup(int alg) +{ + int i; + + for (i = 0; i < nitems(supported_ealgs); i++) + if (alg == supported_ealgs[i].sadb_alg) + return (supported_ealgs[i].xform); + return (NULL); +} + +const struct auth_hash * +auth_algorithm_lookup(int alg) +{ + int i; + + for (i = 0; i < nitems(supported_aalgs); i++) + if (alg == supported_aalgs[i].sadb_alg) + return (supported_aalgs[i].xform); + return (NULL); +} + +const struct comp_algo * +comp_algorithm_lookup(int alg) +{ + int i; + + for (i = 0; i < nitems(supported_calgs); i++) + if (alg == supported_calgs[i].sadb_alg) + return (supported_calgs[i].xform); + return (NULL); +} + +/* + * Register a transform. + */ +static int +xform_register(struct xformsw* xsp) +{ + struct xformsw *entry; + + XFORMS_LOCK(); + LIST_FOREACH(entry, &xforms, chain) { + if (entry->xf_type == xsp->xf_type) { + XFORMS_UNLOCK(); + return (EEXIST); + } + } + LIST_INSERT_HEAD(&xforms, xsp, chain); + XFORMS_UNLOCK(); + return (0); +} + +void +xform_attach(void *data) +{ + struct xformsw *xsp = (struct xformsw *)data; + + if (xform_register(xsp) != 0) + printf("%s: failed to register %s xform\n", __func__, + xsp->xf_name); +} + +void +xform_detach(void *data) +{ + struct xformsw *xsp = (struct xformsw *)data; + + XFORMS_LOCK(); + LIST_REMOVE(xsp, chain); + XFORMS_UNLOCK(); + + /* Delete all SAs related to this xform. */ + key_delete_xform(xsp); +} + +/* + * Initialize transform support in an sav. + */ +static int +xform_init(struct secasvar *sav, u_short xftype) +{ + struct xformsw *entry; + int ret; + + IPSEC_ASSERT(sav->tdb_xform == NULL, + ("tdb_xform is already initialized")); + + ret = EINVAL; + XFORMS_LOCK(); + LIST_FOREACH(entry, &xforms, chain) { + if (entry->xf_type == xftype) { + ret = (*entry->xf_init)(sav, entry); + break; + } + } + XFORMS_UNLOCK(); + return (ret); +} + diff --git a/sys/netipsec/key.h b/sys/netipsec/key.h index ad2b53d42583..a646832e01f4 100644 --- a/sys/netipsec/key.h +++ b/sys/netipsec/key.h @@ -1,130 +1,99 @@ /* $FreeBSD$ */ /* $KAME: key.h,v 1.21 2001/07/27 03:51:30 itojun Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NETIPSEC_KEY_H_ #define _NETIPSEC_KEY_H_ #ifdef _KERNEL struct secpolicy; struct secpolicyindex; -struct ipsecrequest; struct secasvar; struct sockaddr; struct socket; struct sadb_msg; struct sadb_x_policy; struct secasindex; union sockaddr_union; -extern void key_addref(struct secpolicy *sp); -extern int key_havesp(u_int dir); -extern struct secpolicy *key_allocsp(struct secpolicyindex *, u_int, - const char*, int); -extern struct secpolicy *key_allocsp2(u_int32_t spi, union sockaddr_union *dst, - u_int8_t proto, u_int dir, const char*, int); -extern struct secpolicy *key_newsp(const char*, int); -#if 0 -extern struct secpolicy *key_gettunnel(const struct sockaddr *, - const struct sockaddr *, const struct sockaddr *, - const struct sockaddr *, const char*, int); -#endif -/* NB: prepend with _ for KAME IPv6 compatbility */ -extern void _key_freesp(struct secpolicy **, const char*, int); +struct secpolicy *key_newsp(void); +struct secpolicy *key_allocsp(struct secpolicyindex *, u_int); +struct secpolicy *key_msg2sp(struct sadb_x_policy *, size_t, int *); +int key_sp2msg(struct secpolicy *, void *, size_t *); +void key_addref(struct secpolicy *); +void key_freesp(struct secpolicy **); +int key_spdacquire(struct secpolicy *); +int key_havesp(u_int); +void key_bumpspgen(void); +uint32_t key_getspgen(void); +uint32_t key_newreqid(void); -#define KEY_ALLOCSP(spidx, dir) \ - key_allocsp(spidx, dir, __FILE__, __LINE__) -#define KEY_ALLOCSP2(spi, dst, proto, dir) \ - key_allocsp2(spi, dst, proto, dir, __FILE__, __LINE__) -#define KEY_NEWSP() \ - key_newsp(__FILE__, __LINE__) -#if 0 -#define KEY_GETTUNNEL(osrc, odst, isrc, idst) \ - key_gettunnel(osrc, odst, isrc, idst, __FILE__, __LINE__) -#endif -#define KEY_FREESP(spp) \ - _key_freesp(spp, __FILE__, __LINE__) +struct secasvar *key_allocsa(union sockaddr_union *, uint8_t, uint32_t); +struct secasvar *key_allocsa_tunnel(union sockaddr_union *, + union sockaddr_union *, uint8_t); +struct secasvar *key_allocsa_policy(struct secpolicy *, + const struct secasindex *, int *); +struct secasvar *key_allocsa_tcpmd5(struct secasindex *); +void key_freesav(struct secasvar **); -extern struct secasvar *key_allocsa(union sockaddr_union *, u_int, u_int32_t, - const char*, int); -extern struct secasvar *key_allocsa_tunnel(union sockaddr_union *, - union sockaddr_union *, u_int, const char*, int); -extern void key_addrefsa(struct secasvar *, const char*, int); -extern void key_freesav(struct secasvar **, const char*, int); +int key_sockaddrcmp(const struct sockaddr *, const struct sockaddr *, int); +int key_sockaddrcmp_withmask(const struct sockaddr *, const struct sockaddr *, + size_t); -#define KEY_ALLOCSA(dst, proto, spi) \ - key_allocsa(dst, proto, spi, __FILE__, __LINE__) -#define KEY_ALLOCSA_TUNNEL(src, dst, proto) \ - key_allocsa_tunnel(src, dst, proto, __FILE__, __LINE__) -#define KEY_ADDREFSA(sav) \ - key_addrefsa(sav, __FILE__, __LINE__) -#define KEY_FREESAV(psav) \ - key_freesav(psav, __FILE__, __LINE__) +int key_register_ifnet(struct secpolicy **, u_int); +void key_unregister_ifnet(struct secpolicy **, u_int); -extern void key_freeso(struct socket *); -extern int key_checktunnelsanity(struct secasvar *, u_int, - caddr_t, caddr_t); -extern int key_checkrequest(struct ipsecrequest *isr, - const struct secasindex *); -extern struct secpolicy *key_msg2sp(struct sadb_x_policy *, - size_t, int *); -extern struct mbuf *key_sp2msg(struct secpolicy *); -extern int key_ismyaddr(struct sockaddr *); -extern int key_spdacquire(struct secpolicy *); extern u_long key_random(void); extern void key_randomfill(void *, size_t); extern void key_freereg(struct socket *); extern int key_parse(struct mbuf *, struct socket *); extern void key_init(void); #ifdef VIMAGE extern void key_destroy(void); #endif extern void key_sa_recordxfer(struct secasvar *, struct mbuf *); -#ifdef IPSEC_NAT_T -u_int16_t key_portfromsaddr(struct sockaddr *); -#define KEY_PORTFROMSADDR(saddr) \ - key_portfromsaddr((struct sockaddr *)(saddr)) -#endif +uint16_t key_portfromsaddr(struct sockaddr *); +void key_porttosaddr(struct sockaddr *, uint16_t port); #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IPSEC_SA); MALLOC_DECLARE(M_IPSEC_SAH); MALLOC_DECLARE(M_IPSEC_SP); MALLOC_DECLARE(M_IPSEC_SR); MALLOC_DECLARE(M_IPSEC_MISC); MALLOC_DECLARE(M_IPSEC_SAQ); MALLOC_DECLARE(M_IPSEC_SAR); MALLOC_DECLARE(M_IPSEC_INPCB); #endif /* MALLOC_DECLARE */ #endif /* defined(_KERNEL) */ #endif /* _NETIPSEC_KEY_H_ */ diff --git a/sys/netipsec/key_debug.c b/sys/netipsec/key_debug.c index 7ce0c79e9b6f..d3c57a88f4ec 100644 --- a/sys/netipsec/key_debug.c +++ b/sys/netipsec/key_debug.c @@ -1,738 +1,915 @@ /* $FreeBSD$ */ /* $KAME: key_debug.c,v 1.26 2001/06/27 10:46:50 sakane Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef _KERNEL #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #endif #include #ifdef _KERNEL #include #include #include #include #include #include #endif #include #include #include #include #include #include #ifdef _KERNEL #include +#include #endif #ifndef _KERNEL #include #include #include #endif /* !_KERNEL */ static void kdebug_sadb_prop(struct sadb_ext *); static void kdebug_sadb_identity(struct sadb_ext *); static void kdebug_sadb_supported(struct sadb_ext *); static void kdebug_sadb_lifetime(struct sadb_ext *); static void kdebug_sadb_sa(struct sadb_ext *); static void kdebug_sadb_address(struct sadb_ext *); static void kdebug_sadb_key(struct sadb_ext *); static void kdebug_sadb_x_sa2(struct sadb_ext *); #ifdef _KERNEL static void kdebug_secreplay(struct secreplay *); #endif #ifndef _KERNEL #define panic(fmt, ...) { printf(fmt, ## __VA_ARGS__); exit(-1); } #endif /* NOTE: host byte order */ /* %%%: about struct sadb_msg */ void kdebug_sadb(struct sadb_msg *base) { struct sadb_ext *ext; int tlen, extlen; /* sanity check */ if (base == NULL) panic("%s: NULL pointer was passed.\n", __func__); printf("sadb_msg{ version=%u type=%u errno=%u satype=%u\n", base->sadb_msg_version, base->sadb_msg_type, base->sadb_msg_errno, base->sadb_msg_satype); printf(" len=%u reserved=%u seq=%u pid=%u\n", base->sadb_msg_len, base->sadb_msg_reserved, base->sadb_msg_seq, base->sadb_msg_pid); tlen = PFKEY_UNUNIT64(base->sadb_msg_len) - sizeof(struct sadb_msg); ext = (struct sadb_ext *)((caddr_t)base + sizeof(struct sadb_msg)); while (tlen > 0) { printf("sadb_ext{ len=%u type=%u }\n", ext->sadb_ext_len, ext->sadb_ext_type); if (ext->sadb_ext_len == 0) { printf("%s: invalid ext_len=0 was passed.\n", __func__); return; } if (ext->sadb_ext_len > tlen) { printf("%s: ext_len too big (%u > %u).\n", __func__, ext->sadb_ext_len, tlen); return; } switch (ext->sadb_ext_type) { case SADB_EXT_SA: kdebug_sadb_sa(ext); break; case SADB_EXT_LIFETIME_CURRENT: case SADB_EXT_LIFETIME_HARD: case SADB_EXT_LIFETIME_SOFT: kdebug_sadb_lifetime(ext); break; case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: kdebug_sadb_address(ext); break; case SADB_EXT_KEY_AUTH: case SADB_EXT_KEY_ENCRYPT: kdebug_sadb_key(ext); break; case SADB_EXT_IDENTITY_SRC: case SADB_EXT_IDENTITY_DST: kdebug_sadb_identity(ext); break; case SADB_EXT_SENSITIVITY: break; case SADB_EXT_PROPOSAL: kdebug_sadb_prop(ext); break; case SADB_EXT_SUPPORTED_AUTH: case SADB_EXT_SUPPORTED_ENCRYPT: kdebug_sadb_supported(ext); break; case SADB_EXT_SPIRANGE: case SADB_X_EXT_KMPRIVATE: break; case SADB_X_EXT_POLICY: kdebug_sadb_x_policy(ext); break; case SADB_X_EXT_SA2: kdebug_sadb_x_sa2(ext); break; default: printf("%s: invalid ext_type %u\n", __func__, ext->sadb_ext_type); return; } extlen = PFKEY_UNUNIT64(ext->sadb_ext_len); tlen -= extlen; ext = (struct sadb_ext *)((caddr_t)ext + extlen); } return; } static void kdebug_sadb_prop(struct sadb_ext *ext) { struct sadb_prop *prop = (struct sadb_prop *)ext; struct sadb_comb *comb; int len; /* sanity check */ if (ext == NULL) panic("%s: NULL pointer was passed.\n", __func__); len = (PFKEY_UNUNIT64(prop->sadb_prop_len) - sizeof(*prop)) / sizeof(*comb); comb = (struct sadb_comb *)(prop + 1); printf("sadb_prop{ replay=%u\n", prop->sadb_prop_replay); while (len--) { printf("sadb_comb{ auth=%u encrypt=%u " "flags=0x%04x reserved=0x%08x\n", comb->sadb_comb_auth, comb->sadb_comb_encrypt, comb->sadb_comb_flags, comb->sadb_comb_reserved); printf(" auth_minbits=%u auth_maxbits=%u " "encrypt_minbits=%u encrypt_maxbits=%u\n", comb->sadb_comb_auth_minbits, comb->sadb_comb_auth_maxbits, comb->sadb_comb_encrypt_minbits, comb->sadb_comb_encrypt_maxbits); printf(" soft_alloc=%u hard_alloc=%u " "soft_bytes=%lu hard_bytes=%lu\n", comb->sadb_comb_soft_allocations, comb->sadb_comb_hard_allocations, (unsigned long)comb->sadb_comb_soft_bytes, (unsigned long)comb->sadb_comb_hard_bytes); printf(" soft_alloc=%lu hard_alloc=%lu " "soft_bytes=%lu hard_bytes=%lu }\n", (unsigned long)comb->sadb_comb_soft_addtime, (unsigned long)comb->sadb_comb_hard_addtime, (unsigned long)comb->sadb_comb_soft_usetime, (unsigned long)comb->sadb_comb_hard_usetime); comb++; } printf("}\n"); return; } static void kdebug_sadb_identity(struct sadb_ext *ext) { struct sadb_ident *id = (struct sadb_ident *)ext; int len; /* sanity check */ if (ext == NULL) panic("%s: NULL pointer was passed.\n", __func__); len = PFKEY_UNUNIT64(id->sadb_ident_len) - sizeof(*id); printf("sadb_ident_%s{", id->sadb_ident_exttype == SADB_EXT_IDENTITY_SRC ? "src" : "dst"); switch (id->sadb_ident_type) { default: printf(" type=%d id=%lu", id->sadb_ident_type, (u_long)id->sadb_ident_id); if (len) { #ifdef _KERNEL ipsec_hexdump((caddr_t)(id + 1), len); /*XXX cast ?*/ #else char *p, *ep; printf("\n str=\""); p = (char *)(id + 1); ep = p + len; for (/*nothing*/; *p && p < ep; p++) { if (isprint(*p)) printf("%c", *p & 0xff); else printf("\\%03o", *p & 0xff); } #endif printf("\""); } break; } printf(" }\n"); return; } static void kdebug_sadb_supported(struct sadb_ext *ext) { struct sadb_supported *sup = (struct sadb_supported *)ext; struct sadb_alg *alg; int len; /* sanity check */ if (ext == NULL) panic("%s: NULL pointer was passed.\n", __func__); len = (PFKEY_UNUNIT64(sup->sadb_supported_len) - sizeof(*sup)) / sizeof(*alg); alg = (struct sadb_alg *)(sup + 1); printf("sadb_sup{\n"); while (len--) { printf(" { id=%d ivlen=%d min=%d max=%d }\n", alg->sadb_alg_id, alg->sadb_alg_ivlen, alg->sadb_alg_minbits, alg->sadb_alg_maxbits); alg++; } printf("}\n"); return; } static void kdebug_sadb_lifetime(struct sadb_ext *ext) { struct sadb_lifetime *lft = (struct sadb_lifetime *)ext; /* sanity check */ if (ext == NULL) panic("%s: NULL pointer was passed.\n", __func__); printf("sadb_lifetime{ alloc=%u, bytes=%u\n", lft->sadb_lifetime_allocations, (u_int32_t)lft->sadb_lifetime_bytes); printf(" addtime=%u, usetime=%u }\n", (u_int32_t)lft->sadb_lifetime_addtime, (u_int32_t)lft->sadb_lifetime_usetime); return; } static void kdebug_sadb_sa(struct sadb_ext *ext) { struct sadb_sa *sa = (struct sadb_sa *)ext; /* sanity check */ if (ext == NULL) panic("%s: NULL pointer was passed.\n", __func__); printf("sadb_sa{ spi=%u replay=%u state=%u\n", (u_int32_t)ntohl(sa->sadb_sa_spi), sa->sadb_sa_replay, sa->sadb_sa_state); printf(" auth=%u encrypt=%u flags=0x%08x }\n", sa->sadb_sa_auth, sa->sadb_sa_encrypt, sa->sadb_sa_flags); return; } static void kdebug_sadb_address(struct sadb_ext *ext) { struct sadb_address *addr = (struct sadb_address *)ext; /* sanity check */ if (ext == NULL) panic("%s: NULL pointer was passed.\n", __func__); printf("sadb_address{ proto=%u prefixlen=%u reserved=0x%02x%02x }\n", addr->sadb_address_proto, addr->sadb_address_prefixlen, ((u_char *)&addr->sadb_address_reserved)[0], ((u_char *)&addr->sadb_address_reserved)[1]); kdebug_sockaddr((struct sockaddr *)((caddr_t)ext + sizeof(*addr))); return; } static void kdebug_sadb_key(struct sadb_ext *ext) { struct sadb_key *key = (struct sadb_key *)ext; /* sanity check */ if (ext == NULL) panic("%s: NULL pointer was passed.\n", __func__); printf("sadb_key{ bits=%u reserved=%u\n", key->sadb_key_bits, key->sadb_key_reserved); printf(" key="); /* sanity check 2 */ if ((key->sadb_key_bits >> 3) > (PFKEY_UNUNIT64(key->sadb_key_len) - sizeof(struct sadb_key))) { printf("%s: key length mismatch, bit:%d len:%ld.\n", __func__, key->sadb_key_bits >> 3, (long)PFKEY_UNUNIT64(key->sadb_key_len) - sizeof(struct sadb_key)); } ipsec_hexdump((caddr_t)key + sizeof(struct sadb_key), key->sadb_key_bits >> 3); printf(" }\n"); return; } static void kdebug_sadb_x_sa2(struct sadb_ext *ext) { struct sadb_x_sa2 *sa2 = (struct sadb_x_sa2 *)ext; /* sanity check */ if (ext == NULL) panic("%s: NULL pointer was passed.\n", __func__); printf("sadb_x_sa2{ mode=%u reqid=%u\n", sa2->sadb_x_sa2_mode, sa2->sadb_x_sa2_reqid); printf(" reserved1=%u reserved2=%u sequence=%u }\n", sa2->sadb_x_sa2_reserved1, sa2->sadb_x_sa2_reserved2, sa2->sadb_x_sa2_sequence); return; } void kdebug_sadb_x_policy(struct sadb_ext *ext) { struct sadb_x_policy *xpl = (struct sadb_x_policy *)ext; struct sockaddr *addr; /* sanity check */ if (ext == NULL) panic("%s: NULL pointer was passed.\n", __func__); printf("sadb_x_policy{ type=%u dir=%u id=%x }\n", xpl->sadb_x_policy_type, xpl->sadb_x_policy_dir, xpl->sadb_x_policy_id); if (xpl->sadb_x_policy_type == IPSEC_POLICY_IPSEC) { int tlen; struct sadb_x_ipsecrequest *xisr; tlen = PFKEY_UNUNIT64(xpl->sadb_x_policy_len) - sizeof(*xpl); xisr = (struct sadb_x_ipsecrequest *)(xpl + 1); while (tlen > 0) { printf(" { len=%u proto=%u mode=%u level=%u reqid=%u\n", xisr->sadb_x_ipsecrequest_len, xisr->sadb_x_ipsecrequest_proto, xisr->sadb_x_ipsecrequest_mode, xisr->sadb_x_ipsecrequest_level, xisr->sadb_x_ipsecrequest_reqid); if (xisr->sadb_x_ipsecrequest_len > sizeof(*xisr)) { addr = (struct sockaddr *)(xisr + 1); kdebug_sockaddr(addr); addr = (struct sockaddr *)((caddr_t)addr + addr->sa_len); kdebug_sockaddr(addr); } printf(" }\n"); /* prevent infinite loop */ if (xisr->sadb_x_ipsecrequest_len <= 0) { printf("%s: wrong policy struct.\n", __func__); return; } /* prevent overflow */ if (xisr->sadb_x_ipsecrequest_len > tlen) { printf("%s: invalid ipsec policy length " "(%u > %u)\n", __func__, xisr->sadb_x_ipsecrequest_len, tlen); return; } tlen -= xisr->sadb_x_ipsecrequest_len; xisr = (struct sadb_x_ipsecrequest *)((caddr_t)xisr + xisr->sadb_x_ipsecrequest_len); } if (tlen != 0) panic("%s: wrong policy struct.\n", __func__); } return; } #ifdef _KERNEL /* %%%: about SPD and SAD */ -void -kdebug_secpolicy(struct secpolicy *sp) +const char* +kdebug_secpolicy_state(u_int state) { - /* sanity check */ - if (sp == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - printf("secpolicy{ refcnt=%u policy=%u\n", - sp->refcnt, sp->policy); + switch (state) { + case IPSEC_SPSTATE_DEAD: + return ("dead"); + case IPSEC_SPSTATE_LARVAL: + return ("larval"); + case IPSEC_SPSTATE_ALIVE: + return ("alive"); + case IPSEC_SPSTATE_PCB: + return ("pcb"); + case IPSEC_SPSTATE_IFNET: + return ("ifnet"); + } + return ("unknown"); +} - kdebug_secpolicyindex(&sp->spidx); +const char* +kdebug_secpolicy_policy(u_int policy) +{ - switch (sp->policy) { + switch (policy) { case IPSEC_POLICY_DISCARD: - printf(" type=discard }\n"); - break; + return ("discard"); case IPSEC_POLICY_NONE: - printf(" type=none }\n"); - break; + return ("none"); case IPSEC_POLICY_IPSEC: - { - struct ipsecrequest *isr; - for (isr = sp->req; isr != NULL; isr = isr->next) { - - printf(" level=%u\n", isr->level); - kdebug_secasindex(&isr->saidx); - - if (isr->sav != NULL) - kdebug_secasv(isr->sav); - } - printf(" }\n"); - } - break; - case IPSEC_POLICY_BYPASS: - printf(" type=bypass }\n"); - break; + return ("ipsec"); case IPSEC_POLICY_ENTRUST: - printf(" type=entrust }\n"); - break; - default: - printf("%s: Invalid policy found. %d\n", __func__, sp->policy); - break; + return ("entrust"); + case IPSEC_POLICY_BYPASS: + return ("bypass"); } - - return; + return ("unknown"); } -void -kdebug_secpolicyindex(struct secpolicyindex *spidx) +const char* +kdebug_secpolicyindex_dir(u_int dir) { - char buf[INET6_ADDRSTRLEN]; - /* sanity check */ - if (spidx == NULL) - panic("%s: NULL pointer was passed.\n", __func__); + switch (dir) { + case IPSEC_DIR_ANY: + return ("any"); + case IPSEC_DIR_INBOUND: + return ("in"); + case IPSEC_DIR_OUTBOUND: + return ("out"); + } + return ("unknown"); +} - printf("secpolicyindex{ dir=%u prefs=%u prefd=%u ul_proto=%u\n", - spidx->dir, spidx->prefs, spidx->prefd, spidx->ul_proto); +const char* +kdebug_ipsecrequest_level(u_int level) +{ - printf("%s -> ", ipsec_address(&spidx->src, buf, sizeof(buf))); - printf("%s }\n", ipsec_address(&spidx->dst, buf, sizeof(buf))); + switch (level) { + case IPSEC_LEVEL_DEFAULT: + return ("default"); + case IPSEC_LEVEL_USE: + return ("use"); + case IPSEC_LEVEL_REQUIRE: + return ("require"); + case IPSEC_LEVEL_UNIQUE: + return ("unique"); + } + return ("unknown"); } -void -kdebug_secasindex(struct secasindex *saidx) +const char* +kdebug_secasindex_mode(u_int mode) { - char buf[INET6_ADDRSTRLEN]; - /* sanity check */ - if (saidx == NULL) - panic("%s: NULL pointer was passed.\n", __func__); + switch (mode) { + case IPSEC_MODE_ANY: + return ("any"); + case IPSEC_MODE_TRANSPORT: + return ("transport"); + case IPSEC_MODE_TUNNEL: + return ("tunnel"); + case IPSEC_MODE_TCPMD5: + return ("tcp-md5"); + } + return ("unknown"); +} - printf("secasindex{ mode=%u proto=%u\n", - saidx->mode, saidx->proto); +const char* +kdebug_secasv_state(u_int state) +{ - printf("%s -> ", ipsec_address(&saidx->src, buf, sizeof(buf))); - printf("%s }\n", ipsec_address(&saidx->dst, buf, sizeof(buf))); + switch (state) { + case SADB_SASTATE_LARVAL: + return ("larval"); + case SADB_SASTATE_MATURE: + return ("mature"); + case SADB_SASTATE_DYING: + return ("dying"); + case SADB_SASTATE_DEAD: + return ("dead"); + } + return ("unknown"); } -static void -kdebug_sec_lifetime(struct seclifetime *lft) +static char* +kdebug_port2str(const struct sockaddr *sa, char *buf, size_t len) { - /* sanity check */ - if (lft == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - - printf("sec_lifetime{ alloc=%u, bytes=%u\n", - lft->allocations, (u_int32_t)lft->bytes); - printf(" addtime=%u, usetime=%u }\n", - (u_int32_t)lft->addtime, (u_int32_t)lft->usetime); + uint16_t port; - return; + IPSEC_ASSERT(sa != NULL, ("null sa")); + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + port = ntohs(((const struct sockaddr_in *)sa)->sin_port); + break; +#endif +#ifdef INET6 + case AF_INET6: + port = ntohs(((const struct sockaddr_in6 *)sa)->sin6_port); + break; +#endif + default: + port = 0; + } + if (port == 0) + return ("*"); + snprintf(buf, len, "%u", port); + return (buf); } void -kdebug_secasv(struct secasvar *sav) +kdebug_secpolicy(struct secpolicy *sp) { - /* sanity check */ - if (sav == NULL) - panic("%s: NULL pointer was passed.\n", __func__); + u_int idx; + + IPSEC_ASSERT(sp != NULL, ("null sp")); + printf("SP { refcnt=%u id=%u priority=%u state=%s policy=%s\n", + sp->refcnt, sp->id, sp->priority, + kdebug_secpolicy_state(sp->state), + kdebug_secpolicy_policy(sp->policy)); + kdebug_secpolicyindex(&sp->spidx, " "); + for (idx = 0; idx < sp->tcount; idx++) { + printf(" req[%u]{ level=%s ", idx, + kdebug_ipsecrequest_level(sp->req[idx]->level)); + kdebug_secasindex(&sp->req[idx]->saidx, NULL); + printf(" }\n"); + } + printf("}\n"); +} - printf("secas{"); - kdebug_secasindex(&sav->sah->saidx); +void +kdebug_secpolicyindex(struct secpolicyindex *spidx, const char *indent) +{ + char buf[IPSEC_ADDRSTRLEN]; + + IPSEC_ASSERT(spidx != NULL, ("null spidx")); + if (indent != NULL) + printf("%s", indent); + printf("spidx { dir=%s ul_proto=", + kdebug_secpolicyindex_dir(spidx->dir)); + if (spidx->ul_proto == IPSEC_ULPROTO_ANY) + printf("* "); + else + printf("%u ", spidx->ul_proto); + printf("%s/%u -> ", ipsec_address(&spidx->src, buf, sizeof(buf)), + spidx->prefs); + printf("%s/%u }\n", ipsec_address(&spidx->dst, buf, sizeof(buf)), + spidx->prefd); +} - printf(" refcnt=%u state=%u auth=%u enc=%u\n", - sav->refcnt, sav->state, sav->alg_auth, sav->alg_enc); - printf(" spi=%u flags=%u\n", - (u_int32_t)ntohl(sav->spi), sav->flags); +void +kdebug_secasindex(const struct secasindex *saidx, const char *indent) +{ + char buf[IPSEC_ADDRSTRLEN], port[6]; + + IPSEC_ASSERT(saidx != NULL, ("null saidx")); + if (indent != NULL) + printf("%s", indent); + printf("saidx { mode=%s proto=%u reqid=%u ", + kdebug_secasindex_mode(saidx->mode), saidx->proto, saidx->reqid); + printf("%s:%s -> ", ipsec_address(&saidx->src, buf, sizeof(buf)), + kdebug_port2str(&saidx->src.sa, port, sizeof(port))); + printf("%s:%s }\n", ipsec_address(&saidx->dst, buf, sizeof(buf)), + kdebug_port2str(&saidx->dst.sa, port, sizeof(port))); +} - if (sav->key_auth != NULL) - kdebug_sadb_key((struct sadb_ext *)sav->key_auth); - if (sav->key_enc != NULL) - kdebug_sadb_key((struct sadb_ext *)sav->key_enc); +static void +kdebug_sec_lifetime(struct seclifetime *lft, const char *indent) +{ - if (sav->replay != NULL) { - SECASVAR_LOCK(sav); - kdebug_secreplay(sav->replay); - SECASVAR_UNLOCK(sav); - } - if (sav->lft_c != NULL) - kdebug_sec_lifetime(sav->lft_c); - if (sav->lft_h != NULL) - kdebug_sec_lifetime(sav->lft_h); - if (sav->lft_s != NULL) - kdebug_sec_lifetime(sav->lft_s); + IPSEC_ASSERT(lft != NULL, ("null lft")); + if (indent != NULL) + printf("%s", indent); + printf("lifetime { alloc=%u, bytes=%ju addtime=%ju usetime=%ju }\n", + lft->allocations, (uintmax_t)lft->bytes, (uintmax_t)lft->addtime, + (uintmax_t)lft->usetime); +} -#ifdef notyet - /* XXX: misc[123] ? */ -#endif +void +kdebug_secash(struct secashead *sah, const char *indent) +{ - return; + IPSEC_ASSERT(sah != NULL, ("null sah")); + if (indent != NULL) + printf("%s", indent); + printf("SAH { refcnt=%u state=%s\n", sah->refcnt, + kdebug_secasv_state(sah->state)); + if (indent != NULL) + printf("%s", indent); + kdebug_secasindex(&sah->saidx, indent); + if (indent != NULL) + printf("%s", indent); + printf("}\n"); } static void kdebug_secreplay(struct secreplay *rpl) { int len, l; - /* sanity check */ - if (rpl == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - + IPSEC_ASSERT(rpl != NULL, ("null rpl")); printf(" secreplay{ count=%u bitmap_size=%u wsize=%u seq=%u lastseq=%u", rpl->count, rpl->bitmap_size, rpl->wsize, rpl->seq, rpl->lastseq); if (rpl->bitmap == NULL) { - printf(" }\n"); + printf(" }\n"); return; } - printf("\n bitmap { "); - + printf("\n bitmap { "); for (len = 0; len < rpl->bitmap_size*4; len++) { for (l = 7; l >= 0; l--) printf("%u", (((rpl->bitmap)[len] >> l) & 1) ? 1 : 0); } - printf(" }\n"); + printf(" }\n"); +} - return; +static void +kdebug_secnatt(struct secnatt *natt) +{ + char buf[IPSEC_ADDRSTRLEN]; + + IPSEC_ASSERT(natt != NULL, ("null natt")); + printf(" natt{ sport=%u dport=%u ", ntohs(natt->sport), + ntohs(natt->dport)); + if (natt->flags & IPSEC_NATT_F_OAI) + printf("oai=%s ", ipsec_address(&natt->oai, buf, sizeof(buf))); + if (natt->flags & IPSEC_NATT_F_OAR) + printf("oar=%s ", ipsec_address(&natt->oar, buf, sizeof(buf))); + printf("}\n"); +} + +void +kdebug_secasv(struct secasvar *sav) +{ + struct seclifetime lft_c; + + IPSEC_ASSERT(sav != NULL, ("null sav")); + + printf("SA { refcnt=%u spi=%u seq=%u pid=%u flags=0x%x state=%s\n", + sav->refcnt, ntohl(sav->spi), sav->seq, (uint32_t)sav->pid, + sav->flags, kdebug_secasv_state(sav->state)); + kdebug_secash(sav->sah, " "); + + lft_c.addtime = sav->created; + lft_c.allocations = (uint32_t)counter_u64_fetch( + sav->lft_c_allocations); + lft_c.bytes = counter_u64_fetch(sav->lft_c_bytes); + lft_c.usetime = sav->firstused; + kdebug_sec_lifetime(&lft_c, " c_"); + if (sav->lft_h != NULL) + kdebug_sec_lifetime(sav->lft_h, " h_"); + if (sav->lft_s != NULL) + kdebug_sec_lifetime(sav->lft_s, " s_"); + + if (sav->tdb_authalgxform != NULL) + printf(" alg_auth=%s\n", sav->tdb_authalgxform->name); + if (sav->key_auth != NULL) + KEYDBG(DUMP, + kdebug_sadb_key((struct sadb_ext *)sav->key_auth)); + if (sav->tdb_encalgxform != NULL) + printf(" alg_enc=%s\n", sav->tdb_encalgxform->name); + if (sav->key_enc != NULL) + KEYDBG(DUMP, + kdebug_sadb_key((struct sadb_ext *)sav->key_enc)); + if (sav->natt != NULL) + kdebug_secnatt(sav->natt); + if (sav->replay != NULL) { + KEYDBG(DUMP, + SECASVAR_LOCK(sav); + kdebug_secreplay(sav->replay); + SECASVAR_UNLOCK(sav)); + } + printf("}\n"); } void kdebug_mbufhdr(const struct mbuf *m) { /* sanity check */ if (m == NULL) return; printf("mbuf(%p){ m_next:%p m_nextpkt:%p m_data:%p " "m_len:%d m_type:0x%02x m_flags:0x%02x }\n", m, m->m_next, m->m_nextpkt, m->m_data, m->m_len, m->m_type, m->m_flags); if (m->m_flags & M_PKTHDR) { printf(" m_pkthdr{ len:%d rcvif:%p }\n", m->m_pkthdr.len, m->m_pkthdr.rcvif); } if (m->m_flags & M_EXT) { printf(" m_ext{ ext_buf:%p ext_free:%p " "ext_size:%u ext_cnt:%p }\n", m->m_ext.ext_buf, m->m_ext.ext_free, m->m_ext.ext_size, m->m_ext.ext_cnt); } return; } void kdebug_mbuf(const struct mbuf *m0) { const struct mbuf *m = m0; int i, j; for (j = 0; m; m = m->m_next) { kdebug_mbufhdr(m); printf(" m_data:\n"); for (i = 0; i < m->m_len; i++) { if (i && i % 32 == 0) printf("\n"); if (i % 4 == 0) printf(" "); printf("%02x", mtod(m, const u_char *)[i]); j++; } printf("\n"); } return; } + +/* Return a printable string for the address. */ +char * +ipsec_address(const union sockaddr_union* sa, char *buf, socklen_t size) +{ + + switch (sa->sa.sa_family) { +#ifdef INET + case AF_INET: + return (inet_ntop(AF_INET, &sa->sin.sin_addr, buf, size)); +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (IN6_IS_SCOPE_LINKLOCAL(&sa->sin6.sin6_addr)) { + snprintf(buf, size, "%s%%%u", inet_ntop(AF_INET6, + &sa->sin6.sin6_addr, buf, size), + sa->sin6.sin6_scope_id); + return (buf); + } else + return (inet_ntop(AF_INET6, &sa->sin6.sin6_addr, + buf, size)); +#endif /* INET6 */ + case 0: + return ("*"); + default: + return ("(unknown address family)"); + } +} + +char * +ipsec_sa2str(struct secasvar *sav, char *buf, size_t size) +{ + char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN]; + + snprintf(buf, size, "SA(SPI=%08lx src=%s dst=%s)", + (u_long)ntohl(sav->spi), + ipsec_address(&sav->sah->saidx.src, sbuf, sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, sizeof(dbuf))); + return (buf); +} + #endif /* _KERNEL */ void kdebug_sockaddr(struct sockaddr *addr) { struct sockaddr_in *sin4; #ifdef INET6 struct sockaddr_in6 *sin6; #endif /* sanity check */ if (addr == NULL) panic("%s: NULL pointer was passed.\n", __func__); /* NOTE: We deal with port number as host byte order. */ printf("sockaddr{ len=%u family=%u", addr->sa_len, addr->sa_family); switch (addr->sa_family) { case AF_INET: sin4 = (struct sockaddr_in *)addr; printf(" port=%u\n", ntohs(sin4->sin_port)); ipsec_hexdump((caddr_t)&sin4->sin_addr, sizeof(sin4->sin_addr)); break; #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)addr; printf(" port=%u\n", ntohs(sin6->sin6_port)); printf(" flowinfo=0x%08x, scope_id=0x%08x\n", sin6->sin6_flowinfo, sin6->sin6_scope_id); ipsec_hexdump((caddr_t)&sin6->sin6_addr, sizeof(sin6->sin6_addr)); break; #endif } printf(" }\n"); return; } void ipsec_bindump(caddr_t buf, int len) { int i; for (i = 0; i < len; i++) printf("%c", (unsigned char)buf[i]); return; } void ipsec_hexdump(caddr_t buf, int len) { int i; for (i = 0; i < len; i++) { if (i != 0 && i % 32 == 0) printf("\n"); if (i % 4 == 0) printf(" "); printf("%02x", (unsigned char)buf[i]); } #if 0 if (i % 32 != 0) printf("\n"); #endif return; } diff --git a/sys/netipsec/key_debug.h b/sys/netipsec/key_debug.h index 09576bf34398..18150b5379e5 100644 --- a/sys/netipsec/key_debug.h +++ b/sys/netipsec/key_debug.h @@ -1,89 +1,102 @@ /* $FreeBSD$ */ /* $KAME: key_debug.h,v 1.10 2001/08/05 08:37:52 itojun Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NETIPSEC_KEY_DEBUG_H_ #define _NETIPSEC_KEY_DEBUG_H_ #ifdef _KERNEL /* debug flags */ #define KEYDEBUG_STAMP 0x00000001 /* path */ #define KEYDEBUG_DATA 0x00000002 /* data */ #define KEYDEBUG_DUMP 0x00000004 /* dump */ #define KEYDEBUG_KEY 0x00000010 /* key processing */ #define KEYDEBUG_ALG 0x00000020 /* ciph & auth algorithm */ #define KEYDEBUG_IPSEC 0x00000040 /* ipsec processing */ #define KEYDEBUG_KEY_STAMP (KEYDEBUG_KEY | KEYDEBUG_STAMP) #define KEYDEBUG_KEY_DATA (KEYDEBUG_KEY | KEYDEBUG_DATA) #define KEYDEBUG_KEY_DUMP (KEYDEBUG_KEY | KEYDEBUG_DUMP) #define KEYDEBUG_ALG_STAMP (KEYDEBUG_ALG | KEYDEBUG_STAMP) #define KEYDEBUG_ALG_DATA (KEYDEBUG_ALG | KEYDEBUG_DATA) #define KEYDEBUG_ALG_DUMP (KEYDEBUG_ALG | KEYDEBUG_DUMP) #define KEYDEBUG_IPSEC_STAMP (KEYDEBUG_IPSEC | KEYDEBUG_STAMP) #define KEYDEBUG_IPSEC_DATA (KEYDEBUG_IPSEC | KEYDEBUG_DATA) #define KEYDEBUG_IPSEC_DUMP (KEYDEBUG_IPSEC | KEYDEBUG_DUMP) -#define KEYDEBUG(lev,arg) \ - do { if ((V_key_debug_level & (lev)) == (lev)) { arg; } } while (0) +#define KEYDBG(lev, arg) \ + if ((V_key_debug_level & (KEYDEBUG_ ## lev)) == (KEYDEBUG_ ## lev)) { \ + arg; \ + } -VNET_DECLARE(u_int32_t, key_debug_level); +VNET_DECLARE(uint32_t, key_debug_level); #define V_key_debug_level VNET(key_debug_level) #endif /*_KERNEL*/ struct sadb_msg; struct sadb_ext; extern void kdebug_sadb(struct sadb_msg *); extern void kdebug_sadb_x_policy(struct sadb_ext *); #ifdef _KERNEL struct secpolicy; struct secpolicyindex; struct secasindex; +struct secashead; struct secasvar; struct secreplay; struct mbuf; -extern void kdebug_secpolicy(struct secpolicy *); -extern void kdebug_secpolicyindex(struct secpolicyindex *); -extern void kdebug_secasindex(struct secasindex *); -extern void kdebug_secasv(struct secasvar *); -extern void kdebug_mbufhdr(const struct mbuf *); -extern void kdebug_mbuf(const struct mbuf *); +union sockaddr_union; +const char* kdebug_secpolicy_state(u_int); +const char* kdebug_secpolicy_policy(u_int); +const char* kdebug_secpolicyindex_dir(u_int); +const char* kdebug_ipsecrequest_level(u_int); +const char* kdebug_secasindex_mode(u_int); +const char* kdebug_secasv_state(u_int); +void kdebug_secpolicy(struct secpolicy *); +void kdebug_secpolicyindex(struct secpolicyindex *, const char *); +void kdebug_secasindex(const struct secasindex *, const char *); +void kdebug_secash(struct secashead *, const char *); +void kdebug_secasv(struct secasvar *); +void kdebug_mbufhdr(const struct mbuf *); +void kdebug_mbuf(const struct mbuf *); +char *ipsec_address(const union sockaddr_union *, char *, socklen_t); +char *ipsec_sa2str(struct secasvar *, char *, size_t); #endif /*_KERNEL*/ struct sockaddr; extern void kdebug_sockaddr(struct sockaddr *); extern void ipsec_hexdump(caddr_t, int); extern void ipsec_bindump(caddr_t, int); #endif /* _NETIPSEC_KEY_DEBUG_H_ */ diff --git a/sys/netipsec/keydb.h b/sys/netipsec/keydb.h index 363b8f99d638..ffbbe3325502 100644 --- a/sys/netipsec/keydb.h +++ b/sys/netipsec/keydb.h @@ -1,232 +1,254 @@ /* $FreeBSD$ */ /* $KAME: keydb.h,v 1.14 2000/08/02 17:58:26 sakane Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NETIPSEC_KEYDB_H_ #define _NETIPSEC_KEYDB_H_ #ifdef _KERNEL - +#include #include #include #include #ifndef _SOCKADDR_UNION_DEFINED #define _SOCKADDR_UNION_DEFINED /* * The union of all possible address formats we handle. */ union sockaddr_union { struct sockaddr sa; struct sockaddr_in sin; struct sockaddr_in6 sin6; }; #endif /* _SOCKADDR_UNION_DEFINED */ /* Security Assocciation Index */ /* NOTE: Ensure to be same address family */ struct secasindex { union sockaddr_union src; /* source address for SA */ union sockaddr_union dst; /* destination address for SA */ - u_int16_t proto; /* IPPROTO_ESP or IPPROTO_AH */ - u_int8_t mode; /* mode of protocol, see ipsec.h */ - u_int32_t reqid; /* reqid id who owned this SA */ + uint8_t proto; /* IPPROTO_ESP or IPPROTO_AH */ + uint8_t mode; /* mode of protocol, see ipsec.h */ + uint32_t reqid; /* reqid id who owned this SA */ /* see IPSEC_MANUAL_REQID_MAX. */ }; /* * In order to split out the keydb implementation from that of the * PF_KEY sockets we need to define a few structures that while they * may seem common are likely to diverge over time. */ /* sadb_identity */ struct secident { u_int16_t type; u_int64_t id; }; /* sadb_key */ struct seckey { u_int16_t bits; char *key_data; }; struct seclifetime { u_int32_t allocations; u_int64_t bytes; u_int64_t addtime; u_int64_t usetime; }; +struct secnatt { + union sockaddr_union oai; /* original addresses of initiator */ + union sockaddr_union oar; /* original address of responder */ + uint16_t sport; /* source port */ + uint16_t dport; /* destination port */ + uint16_t cksum; /* checksum delta */ + uint16_t flags; +#define IPSEC_NATT_F_OAI 0x0001 +#define IPSEC_NATT_F_OAR 0x0002 +}; + /* Security Association Data Base */ +TAILQ_HEAD(secasvar_queue, secasvar); struct secashead { - LIST_ENTRY(secashead) chain; + TAILQ_ENTRY(secashead) chain; + LIST_ENTRY(secashead) addrhash; /* hash by sproto+src+dst addresses */ + LIST_ENTRY(secashead) drainq; /* used ONLY by flush callout */ struct secasindex saidx; struct secident *idents; /* source identity */ struct secident *identd; /* destination identity */ /* XXX I don't know how to use them. */ - u_int8_t state; /* MATURE or DEAD. */ - LIST_HEAD(_satree, secasvar) savtree[SADB_SASTATE_MAX+1]; - /* SA chain */ - /* The first of this list is newer SA */ + volatile u_int refcnt; /* reference count */ + uint8_t state; /* MATURE or DEAD. */ + struct secasvar_queue savtree_alive; /* MATURE and DYING SA */ + struct secasvar_queue savtree_larval; /* LARVAL SA */ }; struct xformsw; struct enc_xform; struct auth_hash; struct comp_algo; -/* Security Association */ +/* + * Security Association + * + * For INBOUND packets we do SA lookup using SPI, thus only SPIHASH is used. + * For OUTBOUND packets there may be several SA suitable for packet. + * We use key_preferred_oldsa variable to choose better SA. First of we do + * lookup for suitable SAH using packet's saidx. Then we use SAH's savtree + * to search better candidate. The newer SA (by created time) are placed + * in the beginning of the savtree list. There is no preference between + * DYING and MATURE. + * + * NB: Fields with a tdb_ prefix are part of the "glue" used + * to interface to the OpenBSD crypto support. This was done + * to distinguish this code from the mainline KAME code. + * NB: Fields are sorted on the basis of the frequency of changes, i.e. + * constants and unchangeable fields are going first. + * NB: if you want to change this structure, check that this will not break + * key_updateaddresses(). + */ struct secasvar { - LIST_ENTRY(secasvar) chain; - struct mtx lock; /* update/access lock */ - - u_int refcnt; /* reference count */ - u_int8_t state; /* Status of this Association */ - - u_int8_t alg_auth; /* Authentication Algorithm Identifier*/ - u_int8_t alg_enc; /* Cipher Algorithm Identifier */ - u_int8_t alg_comp; /* Compression Algorithm Identifier */ - u_int32_t spi; /* SPI Value, network byte order */ - u_int32_t flags; /* holder for SADB_KEY_FLAGS */ + uint32_t spi; /* SPI Value, network byte order */ + uint32_t flags; /* holder for SADB_KEY_FLAGS */ + uint32_t seq; /* sequence number */ + pid_t pid; /* message's pid */ + u_int ivlen; /* length of IV */ + struct secashead *sah; /* back pointer to the secashead */ struct seckey *key_auth; /* Key for Authentication */ struct seckey *key_enc; /* Key for Encryption */ - u_int ivlen; /* length of IV */ - void *sched; /* intermediate encryption key */ - size_t schedlen; - uint64_t cntr; /* counter for GCM and CTR */ - struct secreplay *replay; /* replay prevention */ - time_t created; /* for lifetime */ - - struct seclifetime *lft_c; /* CURRENT lifetime, it's constant. */ + struct secnatt *natt; /* NAT-T config */ + struct mtx *lock; /* update/access lock */ + + const struct xformsw *tdb_xform; /* transform */ + const struct enc_xform *tdb_encalgxform;/* encoding algorithm */ + const struct auth_hash *tdb_authalgxform;/* authentication algorithm */ + const struct comp_algo *tdb_compalgxform;/* compression algorithm */ + uint64_t tdb_cryptoid; /* crypto session id */ + + uint8_t alg_auth; /* Authentication Algorithm Identifier*/ + uint8_t alg_enc; /* Cipher Algorithm Identifier */ + uint8_t alg_comp; /* Compression Algorithm Identifier */ + uint8_t state; /* Status of this SA (pfkeyv2.h) */ + + counter_u64_t lft_c; /* CURRENT lifetime */ +#define lft_c_allocations lft_c +#define lft_c_bytes lft_c + 1 struct seclifetime *lft_h; /* HARD lifetime */ struct seclifetime *lft_s; /* SOFT lifetime */ - u_int32_t seq; /* sequence number */ - pid_t pid; /* message's pid */ + uint64_t created; /* time when SA was created */ + uint64_t firstused; /* time when SA was first used */ - struct secashead *sah; /* back pointer to the secashead */ + TAILQ_ENTRY(secasvar) chain; + LIST_ENTRY(secasvar) spihash; + LIST_ENTRY(secasvar) drainq; /* used ONLY by flush callout */ - /* - * NB: Fields with a tdb_ prefix are part of the "glue" used - * to interface to the OpenBSD crypto support. This was done - * to distinguish this code from the mainline KAME code. - */ - struct xformsw *tdb_xform; /* transform */ - struct enc_xform *tdb_encalgxform; /* encoding algorithm */ - struct auth_hash *tdb_authalgxform; /* authentication algorithm */ - struct comp_algo *tdb_compalgxform; /* compression algorithm */ - u_int64_t tdb_cryptoid; /* crypto session id */ - - /* - * NAT-Traversal. - */ - u_int16_t natt_type; /* IKE/ESP-marker in output. */ - u_int16_t natt_esp_frag_len; /* MTU for payload fragmentation. */ + uint64_t cntr; /* counter for GCM and CTR */ + volatile u_int refcnt; /* reference count */ }; -#define SECASVAR_LOCK_INIT(_sav) \ - mtx_init(&(_sav)->lock, "ipsec association", NULL, MTX_DEF) -#define SECASVAR_LOCK(_sav) mtx_lock(&(_sav)->lock) -#define SECASVAR_UNLOCK(_sav) mtx_unlock(&(_sav)->lock) -#define SECASVAR_LOCK_DESTROY(_sav) mtx_destroy(&(_sav)->lock) -#define SECASVAR_LOCK_ASSERT(_sav) mtx_assert(&(_sav)->lock, MA_OWNED) +#define SECASVAR_LOCK(_sav) mtx_lock((_sav)->lock) +#define SECASVAR_UNLOCK(_sav) mtx_unlock((_sav)->lock) +#define SECASVAR_LOCK_ASSERT(_sav) mtx_assert((_sav)->lock, MA_OWNED) #define SAV_ISGCM(_sav) \ ((_sav)->alg_enc == SADB_X_EALG_AESGCM8 || \ (_sav)->alg_enc == SADB_X_EALG_AESGCM12 || \ (_sav)->alg_enc == SADB_X_EALG_AESGCM16) #define SAV_ISCTR(_sav) ((_sav)->alg_enc == SADB_X_EALG_AESCTR) #define SAV_ISCTRORGCM(_sav) (SAV_ISCTR((_sav)) || SAV_ISGCM((_sav))) /* Replay prevention, protected by SECASVAR_LOCK: * (m) locked by mtx * (c) read only except during creation / free */ struct secreplay { u_int32_t count; /* (m) */ u_int wsize; /* (c) window size, i.g. 4 bytes */ u_int32_t seq; /* (m) used by sender */ u_int32_t lastseq; /* (m) used by receiver */ u_int32_t *bitmap; /* (m) used by receiver */ u_int bitmap_size; /* (c) size of the bitmap array */ int overflow; /* (m) overflow flag */ }; /* socket table due to send PF_KEY messages. */ struct secreg { LIST_ENTRY(secreg) chain; struct socket *so; }; /* acquiring list table. */ struct secacq { LIST_ENTRY(secacq) chain; + LIST_ENTRY(secacq) addrhash; + LIST_ENTRY(secacq) seqhash; struct secasindex saidx; - - u_int32_t seq; /* sequence number */ + uint32_t seq; /* sequence number */ time_t created; /* for lifetime */ int count; /* for lifetime */ }; /* Sensitivity Level Specification */ /* nothing */ #define SADB_KILL_INTERVAL 600 /* six seconds */ /* secpolicy */ extern struct secpolicy *keydb_newsecpolicy(void); extern void keydb_delsecpolicy(struct secpolicy *); /* secashead */ extern struct secashead *keydb_newsecashead(void); extern void keydb_delsecashead(struct secashead *); /* secasvar */ extern struct secasvar *keydb_newsecasvar(void); extern void keydb_refsecasvar(struct secasvar *); extern void keydb_freesecasvar(struct secasvar *); /* secreplay */ extern struct secreplay *keydb_newsecreplay(size_t); extern void keydb_delsecreplay(struct secreplay *); /* secreg */ extern struct secreg *keydb_newsecreg(void); extern void keydb_delsecreg(struct secreg *); #endif /* _KERNEL */ #endif /* _NETIPSEC_KEYDB_H_ */ diff --git a/sys/netipsec/keysock.c b/sys/netipsec/keysock.c index fffdd60fdf1d..23bed2a36e6c 100644 --- a/sys/netipsec/keysock.c +++ b/sys/netipsec/keysock.c @@ -1,570 +1,570 @@ /* $FreeBSD$ */ /* $KAME: keysock.c,v 1.25 2001/08/13 20:07:41 itojun Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_ipsec.h" /* This code has derived from sys/net/rtsock.c on FreeBSD2.2.5 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct key_cb { int key_count; int any_count; }; static VNET_DEFINE(struct key_cb, key_cb); #define V_key_cb VNET(key_cb) static struct sockaddr key_src = { 2, PF_KEY, }; static int key_sendup0(struct rawcb *, struct mbuf *, int); VNET_PCPUSTAT_DEFINE(struct pfkeystat, pfkeystat); VNET_PCPUSTAT_SYSINIT(pfkeystat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(pfkeystat); #endif /* VIMAGE */ /* * key_output() */ int key_output(struct mbuf *m, struct socket *so, ...) { struct sadb_msg *msg; int len, error = 0; if (m == NULL) panic("%s: NULL pointer was passed.\n", __func__); PFKEYSTAT_INC(out_total); PFKEYSTAT_ADD(out_bytes, m->m_pkthdr.len); len = m->m_pkthdr.len; if (len < sizeof(struct sadb_msg)) { PFKEYSTAT_INC(out_tooshort); error = EINVAL; goto end; } if (m->m_len < sizeof(struct sadb_msg)) { if ((m = m_pullup(m, sizeof(struct sadb_msg))) == NULL) { PFKEYSTAT_INC(out_nomem); error = ENOBUFS; goto end; } } M_ASSERTPKTHDR(m); - KEYDEBUG(KEYDEBUG_KEY_DUMP, kdebug_mbuf(m)); + KEYDBG(KEY_DUMP, kdebug_mbuf(m)); msg = mtod(m, struct sadb_msg *); PFKEYSTAT_INC(out_msgtype[msg->sadb_msg_type]); if (len != PFKEY_UNUNIT64(msg->sadb_msg_len)) { PFKEYSTAT_INC(out_invlen); error = EINVAL; goto end; } error = key_parse(m, so); m = NULL; end: if (m) m_freem(m); return error; } /* * send message to the socket. */ static int key_sendup0(struct rawcb *rp, struct mbuf *m, int promisc) { int error; if (promisc) { struct sadb_msg *pmsg; M_PREPEND(m, sizeof(struct sadb_msg), M_NOWAIT); if (m == NULL) { PFKEYSTAT_INC(in_nomem); return (ENOBUFS); } pmsg = mtod(m, struct sadb_msg *); bzero(pmsg, sizeof(*pmsg)); pmsg->sadb_msg_version = PF_KEY_V2; pmsg->sadb_msg_type = SADB_X_PROMISC; pmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len); /* pid and seq? */ PFKEYSTAT_INC(in_msgtype[pmsg->sadb_msg_type]); } if (!sbappendaddr(&rp->rcb_socket->so_rcv, (struct sockaddr *)&key_src, m, NULL)) { PFKEYSTAT_INC(in_nomem); m_freem(m); error = ENOBUFS; } else error = 0; sorwakeup(rp->rcb_socket); return error; } /* XXX this interface should be obsoleted. */ int key_sendup(struct socket *so, struct sadb_msg *msg, u_int len, int target) { struct mbuf *m, *n, *mprev; int tlen; /* sanity check */ if (so == NULL || msg == NULL) panic("%s: NULL pointer was passed.\n", __func__); - KEYDEBUG(KEYDEBUG_KEY_DUMP, - printf("%s: \n", __func__); - kdebug_sadb(msg)); + KEYDBG(KEY_DUMP, + printf("%s: \n", __func__); + kdebug_sadb(msg)); /* * we increment statistics here, just in case we have ENOBUFS * in this function. */ PFKEYSTAT_INC(in_total); PFKEYSTAT_ADD(in_bytes, len); PFKEYSTAT_INC(in_msgtype[msg->sadb_msg_type]); /* * Get mbuf chain whenever possible (not clusters), * to save socket buffer. We'll be generating many SADB_ACQUIRE * messages to listening key sockets. If we simply allocate clusters, * sbappendaddr() will raise ENOBUFS due to too little sbspace(). * sbspace() computes # of actual data bytes AND mbuf region. * * TODO: SADB_ACQUIRE filters should be implemented. */ tlen = len; m = mprev = NULL; while (tlen > 0) { if (tlen == len) { MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { PFKEYSTAT_INC(in_nomem); return ENOBUFS; } n->m_len = MHLEN; } else { MGET(n, M_NOWAIT, MT_DATA); if (n == NULL) { PFKEYSTAT_INC(in_nomem); return ENOBUFS; } n->m_len = MLEN; } if (tlen >= MCLBYTES) { /*XXX better threshold? */ if (!(MCLGET(n, M_NOWAIT))) { m_free(n); m_freem(m); PFKEYSTAT_INC(in_nomem); return ENOBUFS; } n->m_len = MCLBYTES; } if (tlen < n->m_len) n->m_len = tlen; n->m_next = NULL; if (m == NULL) m = mprev = n; else { mprev->m_next = n; mprev = n; } tlen -= n->m_len; n = NULL; } m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m_copyback(m, 0, len, (caddr_t)msg); /* avoid duplicated statistics */ PFKEYSTAT_ADD(in_total, -1); PFKEYSTAT_ADD(in_bytes, -len); PFKEYSTAT_ADD(in_msgtype[msg->sadb_msg_type], -1); return key_sendup_mbuf(so, m, target); } /* so can be NULL if target != KEY_SENDUP_ONE */ int key_sendup_mbuf(struct socket *so, struct mbuf *m, int target) { struct mbuf *n; struct keycb *kp; int sendup; struct rawcb *rp; int error = 0; if (m == NULL) panic("key_sendup_mbuf: NULL pointer was passed.\n"); if (so == NULL && target == KEY_SENDUP_ONE) panic("%s: NULL pointer was passed.\n", __func__); PFKEYSTAT_INC(in_total); PFKEYSTAT_ADD(in_bytes, m->m_pkthdr.len); if (m->m_len < sizeof(struct sadb_msg)) { m = m_pullup(m, sizeof(struct sadb_msg)); if (m == NULL) { PFKEYSTAT_INC(in_nomem); return ENOBUFS; } } if (m->m_len >= sizeof(struct sadb_msg)) { struct sadb_msg *msg; msg = mtod(m, struct sadb_msg *); PFKEYSTAT_INC(in_msgtype[msg->sadb_msg_type]); } mtx_lock(&rawcb_mtx); LIST_FOREACH(rp, &V_rawcb_list, list) { if (rp->rcb_proto.sp_family != PF_KEY) continue; if (rp->rcb_proto.sp_protocol && rp->rcb_proto.sp_protocol != PF_KEY_V2) { continue; } kp = (struct keycb *)rp; /* * If you are in promiscuous mode, and when you get broadcasted * reply, you'll get two PF_KEY messages. * (based on pf_key@inner.net message on 14 Oct 1998) */ if (((struct keycb *)rp)->kp_promisc) { if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { (void)key_sendup0(rp, n, 1); n = NULL; } } /* the exact target will be processed later */ if (so && sotorawcb(so) == rp) continue; sendup = 0; switch (target) { case KEY_SENDUP_ONE: /* the statement has no effect */ if (so && sotorawcb(so) == rp) sendup++; break; case KEY_SENDUP_ALL: sendup++; break; case KEY_SENDUP_REGISTERED: if (kp->kp_registered) sendup++; break; } PFKEYSTAT_INC(in_msgtarget[target]); if (!sendup) continue; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { m_freem(m); PFKEYSTAT_INC(in_nomem); mtx_unlock(&rawcb_mtx); return ENOBUFS; } if ((error = key_sendup0(rp, n, 0)) != 0) { m_freem(m); mtx_unlock(&rawcb_mtx); return error; } n = NULL; } if (so) { error = key_sendup0(sotorawcb(so), m, 0); m = NULL; } else { error = 0; m_freem(m); } mtx_unlock(&rawcb_mtx); return error; } /* * key_abort() * derived from net/rtsock.c:rts_abort() */ static void key_abort(struct socket *so) { raw_usrreqs.pru_abort(so); } /* * key_attach() * derived from net/rtsock.c:rts_attach() */ static int key_attach(struct socket *so, int proto, struct thread *td) { struct keycb *kp; int error; KASSERT(so->so_pcb == NULL, ("key_attach: so_pcb != NULL")); if (td != NULL) { error = priv_check(td, PRIV_NET_RAW); if (error) return error; } /* XXX */ kp = malloc(sizeof *kp, M_PCB, M_WAITOK | M_ZERO); if (kp == NULL) return ENOBUFS; so->so_pcb = (caddr_t)kp; error = raw_attach(so, proto); kp = (struct keycb *)sotorawcb(so); if (error) { free(kp, M_PCB); so->so_pcb = (caddr_t) 0; return error; } kp->kp_promisc = kp->kp_registered = 0; if (kp->kp_raw.rcb_proto.sp_protocol == PF_KEY) /* XXX: AF_KEY */ V_key_cb.key_count++; V_key_cb.any_count++; soisconnected(so); so->so_options |= SO_USELOOPBACK; return 0; } /* * key_bind() * derived from net/rtsock.c:rts_bind() */ static int key_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { return EINVAL; } /* * key_close() * derived from net/rtsock.c:rts_close(). */ static void key_close(struct socket *so) { raw_usrreqs.pru_close(so); } /* * key_connect() * derived from net/rtsock.c:rts_connect() */ static int key_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { return EINVAL; } /* * key_detach() * derived from net/rtsock.c:rts_detach() */ static void key_detach(struct socket *so) { struct keycb *kp = (struct keycb *)sotorawcb(so); KASSERT(kp != NULL, ("key_detach: kp == NULL")); if (kp->kp_raw.rcb_proto.sp_protocol == PF_KEY) /* XXX: AF_KEY */ V_key_cb.key_count--; V_key_cb.any_count--; key_freereg(so); raw_usrreqs.pru_detach(so); } /* * key_disconnect() * derived from net/rtsock.c:key_disconnect() */ static int key_disconnect(struct socket *so) { return(raw_usrreqs.pru_disconnect(so)); } /* * key_peeraddr() * derived from net/rtsock.c:rts_peeraddr() */ static int key_peeraddr(struct socket *so, struct sockaddr **nam) { return(raw_usrreqs.pru_peeraddr(so, nam)); } /* * key_send() * derived from net/rtsock.c:rts_send() */ static int key_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { return(raw_usrreqs.pru_send(so, flags, m, nam, control, td)); } /* * key_shutdown() * derived from net/rtsock.c:rts_shutdown() */ static int key_shutdown(struct socket *so) { return(raw_usrreqs.pru_shutdown(so)); } /* * key_sockaddr() * derived from net/rtsock.c:rts_sockaddr() */ static int key_sockaddr(struct socket *so, struct sockaddr **nam) { return(raw_usrreqs.pru_sockaddr(so, nam)); } struct pr_usrreqs key_usrreqs = { .pru_abort = key_abort, .pru_attach = key_attach, .pru_bind = key_bind, .pru_connect = key_connect, .pru_detach = key_detach, .pru_disconnect = key_disconnect, .pru_peeraddr = key_peeraddr, .pru_send = key_send, .pru_shutdown = key_shutdown, .pru_sockaddr = key_sockaddr, .pru_close = key_close, }; /* sysctl */ SYSCTL_NODE(_net, PF_KEY, key, CTLFLAG_RW, 0, "Key Family"); /* * Definitions of protocols supported in the KEY domain. */ extern struct domain keydomain; struct protosw keysw[] = { { .pr_type = SOCK_RAW, .pr_domain = &keydomain, .pr_protocol = PF_KEY_V2, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_output = key_output, .pr_ctlinput = raw_ctlinput, .pr_init = raw_init, .pr_usrreqs = &key_usrreqs } }; static void key_init0(void) { bzero((caddr_t)&V_key_cb, sizeof(V_key_cb)); key_init(); } struct domain keydomain = { .dom_family = PF_KEY, .dom_name = "key", .dom_init = key_init0, #ifdef VIMAGE .dom_destroy = key_destroy, #endif .dom_protosw = keysw, .dom_protoswNPROTOSW = &keysw[nitems(keysw)] }; VNET_DOMAIN_SET(key); diff --git a/sys/netipsec/subr_ipsec.c b/sys/netipsec/subr_ipsec.c new file mode 100644 index 000000000000..5bab2f1dc0d3 --- /dev/null +++ b/sys/netipsec/subr_ipsec.c @@ -0,0 +1,350 @@ +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +/* + * This file is build in the kernel only when 'options IPSEC' or + * 'options IPSEC_SUPPORT' is enabled. + */ + +#ifdef INET +void +ipsec4_setsockaddrs(const struct mbuf *m, union sockaddr_union *src, + union sockaddr_union *dst) +{ + static const struct sockaddr_in template = { + sizeof (struct sockaddr_in), + AF_INET, + 0, { 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + src->sin = template; + dst->sin = template; + + if (m->m_len < sizeof (struct ip)) { + m_copydata(m, offsetof(struct ip, ip_src), + sizeof (struct in_addr), + (caddr_t) &src->sin.sin_addr); + m_copydata(m, offsetof(struct ip, ip_dst), + sizeof (struct in_addr), + (caddr_t) &dst->sin.sin_addr); + } else { + const struct ip *ip = mtod(m, const struct ip *); + src->sin.sin_addr = ip->ip_src; + dst->sin.sin_addr = ip->ip_dst; + } +} +#endif +#ifdef INET6 +void +ipsec6_setsockaddrs(const struct mbuf *m, union sockaddr_union *src, + union sockaddr_union *dst) +{ + struct ip6_hdr ip6buf; + const struct ip6_hdr *ip6; + + if (m->m_len >= sizeof(*ip6)) + ip6 = mtod(m, const struct ip6_hdr *); + else { + m_copydata(m, 0, sizeof(ip6buf), (caddr_t)&ip6buf); + ip6 = &ip6buf; + } + + bzero(&src->sin6, sizeof(struct sockaddr_in6)); + src->sin6.sin6_family = AF_INET6; + src->sin6.sin6_len = sizeof(struct sockaddr_in6); + bcopy(&ip6->ip6_src, &src->sin6.sin6_addr, sizeof(ip6->ip6_src)); + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { + src->sin6.sin6_addr.s6_addr16[1] = 0; + src->sin6.sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); + } + + bzero(&dst->sin6, sizeof(struct sockaddr_in6)); + dst->sin6.sin6_family = AF_INET6; + dst->sin6.sin6_len = sizeof(struct sockaddr_in6); + bcopy(&ip6->ip6_dst, &dst->sin6.sin6_addr, sizeof(ip6->ip6_dst)); + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { + dst->sin6.sin6_addr.s6_addr16[1] = 0; + dst->sin6.sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); + } +} +#endif + +#ifdef IPSEC_SUPPORT +/* + * Declare IPSEC_SUPPORT as module even if IPSEC is defined. + * tcpmd5.ko module depends from IPSEC_SUPPORT. + */ +#define IPSEC_MODULE_INCR 2 +static int +ipsec_kmod_enter(volatile u_int *cntr) +{ + u_int old, new; + + do { + old = *cntr; + if ((old & IPSEC_MODULE_ENABLED) == 0) + return (ENXIO); + new = old + IPSEC_MODULE_INCR; + } while(atomic_cmpset_acq_int(cntr, old, new) == 0); + return (0); +} + +static void +ipsec_kmod_exit(volatile u_int *cntr) +{ + u_int old, new; + + do { + old = *cntr; + new = old - IPSEC_MODULE_INCR; + } while (atomic_cmpset_rel_int(cntr, old, new) == 0); +} + +static void +ipsec_kmod_drain(volatile u_int *cntr) +{ + u_int old, new; + + do { + old = *cntr; + new = old & ~IPSEC_MODULE_ENABLED; + } while (atomic_cmpset_acq_int(cntr, old, new) == 0); + while (atomic_cmpset_int(cntr, 0, 0) == 0) + pause("ipsecd", hz/2); +} + +#define METHOD_DECL(...) __VA_ARGS__ +#define METHOD_ARGS(...) __VA_ARGS__ +#define IPSEC_KMOD_METHOD(type, name, sc, method, decl, args) \ +type name (decl) \ +{ \ + type ret = (type)ipsec_kmod_enter(&sc->enabled); \ + if (ret == 0) { \ + ret = (*sc->methods->method)(args); \ + ipsec_kmod_exit(&sc->enabled); \ + } \ + return (ret); \ +} + +#ifndef TCP_SIGNATURE +/* Declare TCP-MD5 support as kernel module. */ +static struct tcpmd5_support tcpmd5_ipsec = { + .enabled = 0, + .methods = NULL +}; +struct tcpmd5_support * const tcp_ipsec_support = &tcpmd5_ipsec; + +IPSEC_KMOD_METHOD(int, tcpmd5_kmod_input, sc, + input, METHOD_DECL(struct tcpmd5_support * const sc, struct mbuf *m, + struct tcphdr *th, u_char *buf), METHOD_ARGS(m, th, buf) +) + +IPSEC_KMOD_METHOD(int, tcpmd5_kmod_output, sc, + output, METHOD_DECL(struct tcpmd5_support * const sc, struct mbuf *m, + struct tcphdr *th, u_char *buf), METHOD_ARGS(m, th, buf) +) + +IPSEC_KMOD_METHOD(int, tcpmd5_kmod_pcbctl, sc, + pcbctl, METHOD_DECL(struct tcpmd5_support * const sc, struct inpcb *inp, + struct sockopt *sopt), METHOD_ARGS(inp, sopt) +) + +void +tcpmd5_support_enable(const struct tcpmd5_methods * const methods) +{ + + KASSERT(tcp_ipsec_support->enabled == 0, ("TCP-MD5 already enabled")); + tcp_ipsec_support->methods = methods; + tcp_ipsec_support->enabled |= IPSEC_MODULE_ENABLED; +} + +void +tcpmd5_support_disable(void) +{ + + if (tcp_ipsec_support->enabled & IPSEC_MODULE_ENABLED) { + ipsec_kmod_drain(&tcp_ipsec_support->enabled); + tcp_ipsec_support->methods = NULL; + } +} +#endif + +static int +ipsec_support_modevent(module_t mod, int type, void *data) +{ + + switch (type) { + case MOD_LOAD: + return (0); + case MOD_UNLOAD: + return (EBUSY); + default: + return (EOPNOTSUPP); + } +} + +static moduledata_t ipsec_support_mod = { + "ipsec_support", + ipsec_support_modevent, + 0 +}; +DECLARE_MODULE(ipsec_support, ipsec_support_mod, SI_SUB_PROTO_DOMAIN, + SI_ORDER_ANY); +MODULE_VERSION(ipsec_support, 1); + +#ifndef IPSEC +/* + * IPsec support is build as kernel module. + */ +#ifdef INET +static struct ipsec_support ipv4_ipsec = { + .enabled = 0, + .methods = NULL +}; +struct ipsec_support * const ipv4_ipsec_support = &ipv4_ipsec; + +IPSEC_KMOD_METHOD(int, ipsec_kmod_udp_input, sc, + udp_input, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + int off, int af), METHOD_ARGS(m, off, af) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_udp_pcbctl, sc, + udp_pcbctl, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp, + struct sockopt *sopt), METHOD_ARGS(inp, sopt) +) +#endif + +#ifdef INET6 +static struct ipsec_support ipv6_ipsec = { + .enabled = 0, + .methods = NULL +}; +struct ipsec_support * const ipv6_ipsec_support = &ipv6_ipsec; +#endif + +IPSEC_KMOD_METHOD(int, ipsec_kmod_input, sc, + input, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + int offset, int proto), METHOD_ARGS(m, offset, proto) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_check_policy, sc, + check_policy, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + struct inpcb *inp), METHOD_ARGS(m, inp) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_forward, sc, + forward, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m), + (m) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_output, sc, + output, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + struct inpcb *inp), METHOD_ARGS(m, inp) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_pcbctl, sc, + pcbctl, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp, + struct sockopt *sopt), METHOD_ARGS(inp, sopt) +) + +IPSEC_KMOD_METHOD(size_t, ipsec_kmod_hdrsize, sc, + hdrsize, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp), + (inp) +) + +static IPSEC_KMOD_METHOD(int, ipsec_kmod_caps, sc, + capability, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + u_int cap), METHOD_ARGS(m, cap) +) + +int +ipsec_kmod_capability(struct ipsec_support * const sc, struct mbuf *m, + u_int cap) +{ + + /* + * Since PF_KEY is build in the kernel, we can directly + * call key_havesp() without additional synchronizations. + */ + if (cap == IPSEC_CAP_OPERABLE) + return (key_havesp(IPSEC_DIR_INBOUND) != 0 || + key_havesp(IPSEC_DIR_OUTBOUND) != 0); + return (ipsec_kmod_caps(sc, m, cap)); +} + +void +ipsec_support_enable(struct ipsec_support * const sc, + const struct ipsec_methods * const methods) +{ + + KASSERT(sc->enabled == 0, ("IPsec already enabled")); + sc->methods = methods; + sc->enabled |= IPSEC_MODULE_ENABLED; +} + +void +ipsec_support_disable(struct ipsec_support * const sc) +{ + + if (sc->enabled & IPSEC_MODULE_ENABLED) { + ipsec_kmod_drain(&sc->enabled); + sc->methods = NULL; + } +} +#endif /* !IPSEC */ +#endif /* IPSEC_SUPPORT */ diff --git a/sys/netipsec/udpencap.c b/sys/netipsec/udpencap.c new file mode 100644 index 000000000000..f88a0c25ba74 --- /dev/null +++ b/sys/netipsec/udpencap.c @@ -0,0 +1,294 @@ +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_ipsec.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Handle UDP_ENCAP socket option. Always return with released INP_WLOCK. + */ +int +udp_ipsec_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + struct udpcb *up; + int error, optval; + + INP_WLOCK_ASSERT(inp); + if (sopt->sopt_name != UDP_ENCAP) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + + up = intoudpcb(inp); + if (sopt->sopt_dir == SOPT_GET) { + if (up->u_flags & UF_ESPINUDP) + optval = UDP_ENCAP_ESPINUDP; + else + optval = 0; + INP_WUNLOCK(inp); + return (sooptcopyout(sopt, &optval, sizeof(optval))); + } + INP_WUNLOCK(inp); + + error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); + if (error != 0) + return (error); + + INP_WLOCK(inp); + switch (optval) { + case 0: + up->u_flags &= ~UF_ESPINUDP; + break; + case UDP_ENCAP_ESPINUDP: + up->u_flags |= UF_ESPINUDP; + break; + default: + error = EINVAL; + } + INP_WUNLOCK(inp); + return (error); +} + +/* + * Potentially decap ESP in UDP frame. Check for an ESP header. + * If present, strip the UDP header and push the result through IPSec. + * + * Returns error if mbuf consumed and/or processed, otherwise 0. + */ +int +udp_ipsec_input(struct mbuf *m, int off, int af) +{ + union sockaddr_union dst; + struct secasvar *sav; + struct udphdr *udp; + struct ip *ip; + uint32_t spi; + int error, hlen; + + /* + * Just return if packet doesn't have enough data. + * We need at least [IP header + UDP header + ESP header]. + * NAT-Keepalive packet has only one byte of payload, so it + * by default will not be processed. + */ + if (m->m_pkthdr.len < off + sizeof(struct esp)) + return (0); + + m_copydata(m, off, sizeof(uint32_t), (caddr_t)&spi); + if (spi == 0) /* Non-ESP marker. */ + return (0); + + /* + * Find SA and check that it is configured for UDP + * encapsulation. + */ + bzero(&dst, sizeof(dst)); + dst.sa.sa_family = af; + switch (af) { +#ifdef INET + case AF_INET: + dst.sin.sin_len = sizeof(struct sockaddr_in); + ip = mtod(m, struct ip *); + ip->ip_p = IPPROTO_ESP; + off = offsetof(struct ip, ip_p); + hlen = ip->ip_hl << 2; + dst.sin.sin_addr = ip->ip_dst; + break; +#endif +#ifdef INET6 + case AF_INET6: + /* Not yet */ + /* FALLTHROUGH */ +#endif + default: + ESPSTAT_INC(esps_nopf); + m_freem(m); + return (EPFNOSUPPORT); + } + + sav = key_allocsa(&dst, IPPROTO_ESP, spi); + if (sav == NULL) { + ESPSTAT_INC(esps_notdb); + m_freem(m); + return (ENOENT); + } + udp = mtodo(m, hlen); + if (sav->natt == NULL || + sav->natt->sport != udp->uh_sport || + sav->natt->dport != udp->uh_dport) { + /* XXXAE: should we check source address? */ + ESPSTAT_INC(esps_notdb); + key_freesav(&sav); + m_freem(m); + return (ENOENT); + } + /* + * Remove the UDP header + * Before: + * <--- off ---> + * +----+------+-----+ + * | IP | UDP | ESP | + * +----+------+-----+ + * <-skip-> + * After: + * +----+-----+ + * | IP | ESP | + * +----+-----+ + * <-skip-> + */ + m_striphdr(m, hlen, sizeof(*udp)); + /* + * We cannot yet update the cksums so clear any h/w cksum flags + * as they are no longer valid. + */ + if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) + m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + /* + * We can update ip_len and ip_sum here, but ipsec4_input_cb() + * will do this anyway, so don't touch them here. + */ + ESPSTAT_INC(esps_input); + error = (*sav->tdb_xform->xf_input)(m, sav, hlen, off); + if (error != 0) + key_freesav(&sav); + + return (EINPROGRESS); /* Consumed by IPsec. */ +} + +int +udp_ipsec_output(struct mbuf *m, struct secasvar *sav) +{ + struct udphdr *udp; + struct mbuf *n; + struct ip *ip; + int hlen, off; + + IPSEC_ASSERT(sav->natt != NULL, ("UDP encapsulation isn't required.")); + + if (sav->sah->saidx.dst.sa.sa_family == AF_INET6) + return (EAFNOSUPPORT); + + ip = mtod(m, struct ip *); + hlen = ip->ip_hl << 2; + n = m_makespace(m, hlen, sizeof(*udp), &off); + if (n == NULL) { + DPRINTF(("%s: m_makespace for udphdr failed\n", __func__)); + return (ENOBUFS); + } + + udp = mtodo(n, off); + udp->uh_dport = sav->natt->dport; + udp->uh_sport = sav->natt->sport; + udp->uh_sum = 0; + udp->uh_ulen = htons(m->m_pkthdr.len - hlen); + + ip = mtod(m, struct ip *); + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_p = IPPROTO_UDP; + return (0); +} + +void +udp_ipsec_adjust_cksum(struct mbuf *m, struct secasvar *sav, int proto, + int skip) +{ + struct ip *ip; + uint16_t cksum, off; + + IPSEC_ASSERT(sav->natt != NULL, ("NAT-T isn't required")); + IPSEC_ASSERT(proto == IPPROTO_UDP || proto == IPPROTO_TCP, + ("unexpected protocol %u", proto)); + + if (proto == IPPROTO_UDP) + off = offsetof(struct udphdr, uh_sum); + else + off = offsetof(struct tcphdr, th_sum); + + if (V_natt_cksum_policy == 0) { /* auto */ + if (sav->natt->cksum != 0) { + /* Incrementally recompute. */ + m_copydata(m, skip + off, sizeof(cksum), + (caddr_t)&cksum); + cksum = in_addword(cksum, sav->natt->cksum); + } else { + /* No OA from IKEd. */ + if (proto == IPPROTO_TCP) { + /* Ignore for TCP. */ + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | + CSUM_PSEUDO_HDR); + return; + } + cksum = 0; /* Reset for UDP. */ + } + m_copyback(m, skip + off, sizeof(cksum), (caddr_t)&cksum); + } else { /* Fully recompute */ + ip = mtod(m, struct ip *); + cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(m->m_pkthdr.len - skip + proto)); + m_copyback(m, skip + off, sizeof(cksum), (caddr_t)&cksum); + m->m_pkthdr.csum_flags = + (proto == IPPROTO_UDP) ? CSUM_UDP: CSUM_TCP; + m->m_pkthdr.csum_data = off; + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +} + diff --git a/sys/netipsec/xform.h b/sys/netipsec/xform.h index fee457be8794..8e6f8bdbc004 100644 --- a/sys/netipsec/xform.h +++ b/sys/netipsec/xform.h @@ -1,126 +1,117 @@ /* $FreeBSD$ */ /* $OpenBSD: ip_ipsp.h,v 1.119 2002/03/14 01:27:11 millert Exp $ */ /*- * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr), * Niels Provos (provos@physnet.uni-hamburg.de) and * Niklas Hallqvist (niklas@appli.se). * * The original version of this code was written by John Ioannidis * for BSD/OS in Athens, Greece, in November 1995. * * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, * by Angelos D. Keromytis. * * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis * and Niels Provos. * * Additional features in 1999 by Angelos D. Keromytis and Niklas Hallqvist. * * Copyright (c) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, * Angelos D. Keromytis and Niels Provos. * Copyright (c) 1999 Niklas Hallqvist. * Copyright (c) 2001, Angelos D. Keromytis. * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all copies of any software which is or includes a copy or * modification of this software. * You may use this code under the GNU public license if you so wish. Please * contribute changes back to the authors under this freer than GPL license * so that we may further the use of strong encryption without limitations to * all. * * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR * PURPOSE. */ #ifndef _NETIPSEC_XFORM_H_ #define _NETIPSEC_XFORM_H_ #include +#include #include #include #define AH_HMAC_HASHLEN 12 /* 96 bits of authenticator */ #define AH_HMAC_MAXHASHLEN (SHA2_512_HASH_LEN/2) /* Keep this updated */ #define AH_HMAC_INITIAL_RPL 1 /* replay counter initial value */ +#ifdef _KERNEL +struct secpolicy; +struct secasvar; + /* * Packet tag assigned on completion of IPsec processing; used - * to speedup processing when/if the packet comes back for more - * processing. + * to speedup security policy checking for INBOUND packets. */ -struct tdb_ident { - u_int32_t spi; - union sockaddr_union dst; - u_int8_t proto; - /* Cache those two for enc(4) in xform_ipip. */ - u_int8_t alg_auth; - u_int8_t alg_enc; +struct xform_history { + union sockaddr_union dst; /* destination address */ + uint32_t spi; /* Security Parameters Index */ + uint8_t proto; /* IPPROTO_ESP or IPPROTO_AH */ + uint8_t mode; /* transport or tunnel */ }; /* * Opaque data structure hung off a crypto operation descriptor. */ -struct tdb_crypto { - struct ipsecrequest *tc_isr; /* ipsec request state */ - u_int32_t tc_spi; /* associated SPI */ - union sockaddr_union tc_dst; /* dst addr of packet */ - u_int8_t tc_proto; /* current protocol, e.g. AH */ - u_int8_t tc_nxt; /* next protocol, e.g. IPV4 */ - int tc_protoff; /* current protocol offset */ - int tc_skip; /* data offset */ - caddr_t tc_ptr; /* associated crypto data */ - struct secasvar *tc_sav; /* related SA */ +struct xform_data { + struct secpolicy *sp; /* security policy */ + struct secasvar *sav; /* related SA */ + uint64_t cryptoid; /* used crypto session id */ + u_int idx; /* IPsec request index */ + int protoff; /* current protocol offset */ + int skip; /* data offset */ + uint8_t nxt; /* next protocol, e.g. IPV4 */ }; -struct secasvar; -struct ipescrequest; - -struct xformsw { - u_short xf_type; /* xform ID */ #define XF_IP4 1 /* unused */ #define XF_AH 2 /* AH */ #define XF_ESP 3 /* ESP */ #define XF_TCPSIGNATURE 5 /* TCP MD5 Signature option, RFC 2358 */ #define XF_IPCOMP 6 /* IPCOMP */ - u_short xf_flags; -#define XFT_AUTH 0x0001 -#define XFT_CONF 0x0100 -#define XFT_COMP 0x1000 - char *xf_name; /* human-readable name */ + +struct xformsw { + u_short xf_type; /* xform ID */ + char *xf_name; /* human-readable name */ int (*xf_init)(struct secasvar*, struct xformsw*); /* setup */ int (*xf_zeroize)(struct secasvar*); /* cleanup */ int (*xf_input)(struct mbuf*, struct secasvar*, /* input */ int, int); - int (*xf_output)(struct mbuf*, /* output */ - struct ipsecrequest *, struct mbuf **, int, int); - struct xformsw *xf_next; /* list of registered xforms */ + int (*xf_output)(struct mbuf*, /* output */ + struct secpolicy *, struct secasvar *, u_int, int, int); + LIST_ENTRY(xformsw) chain; }; -#ifdef _KERNEL -extern void xform_register(struct xformsw*); -extern int xform_init(struct secasvar *sav, int xftype); -extern int xform_ah_authsize(struct auth_hash *esph); +const struct enc_xform * enc_algorithm_lookup(int); +const struct auth_hash * auth_algorithm_lookup(int); +const struct comp_algo * comp_algorithm_lookup(int); -struct cryptoini; +void xform_attach(void *); +void xform_detach(void *); +struct cryptoini; /* XF_AH */ +int xform_ah_authsize(const struct auth_hash *); extern int ah_init0(struct secasvar *, struct xformsw *, struct cryptoini *); extern int ah_zeroize(struct secasvar *sav); -extern struct auth_hash *ah_algorithm_lookup(int alg); extern size_t ah_hdrsiz(struct secasvar *); /* XF_ESP */ -extern struct enc_xform *esp_algorithm_lookup(int alg); extern size_t esp_hdrsiz(struct secasvar *sav); -/* XF_COMP */ -extern struct comp_algo *ipcomp_algorithm_lookup(int alg); - #endif /* _KERNEL */ #endif /* _NETIPSEC_XFORM_H_ */ diff --git a/sys/netipsec/xform_ah.c b/sys/netipsec/xform_ah.c index 6fcca719a696..e5f80311b96b 100644 --- a/sys/netipsec/xform_ah.c +++ b/sys/netipsec/xform_ah.c @@ -1,1204 +1,1143 @@ /* $FreeBSD$ */ /* $OpenBSD: ip_ah.c,v 1.63 2001/06/26 06:18:58 angelos Exp $ */ /*- * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and * Niels Provos (provos@physnet.uni-hamburg.de). * * The original version of this code was written by John Ioannidis * for BSD/OS in Athens, Greece, in November 1995. * * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, * by Angelos D. Keromytis. * * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis * and Niels Provos. * * Additional features in 1999 by Angelos D. Keromytis and Niklas Hallqvist. * * Copyright (c) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, * Angelos D. Keromytis and Niels Provos. * Copyright (c) 1999 Niklas Hallqvist. * Copyright (c) 2001 Angelos D. Keromytis. * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all copies of any software which is or includes a copy or * modification of this software. * You may use this code under the GNU public license if you so wish. Please * contribute changes back to the authors under this freer than GPL license * so that we may further the use of strong encryption without limitations to * all. * * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR * PURPOSE. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #endif #include #include #include /* * Return header size in bytes. The old protocol did not support * the replay counter; the new protocol always includes the counter. */ #define HDRSIZE(sav) \ (((sav)->flags & SADB_X_EXT_OLD) ? \ sizeof (struct ah) : sizeof (struct ah) + sizeof (u_int32_t)) /* * Return authenticator size in bytes, based on a field in the * algorithm descriptor. */ #define AUTHSIZE(sav) ((sav->flags & SADB_X_EXT_OLD) ? 16 : \ xform_ah_authsize((sav)->tdb_authalgxform)) VNET_DEFINE(int, ah_enable) = 1; /* control flow of packets with AH */ VNET_DEFINE(int, ah_cleartos) = 1; /* clear ip_tos when doing AH calc */ VNET_PCPUSTAT_DEFINE(struct ahstat, ahstat); VNET_PCPUSTAT_SYSINIT(ahstat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(ahstat); #endif /* VIMAGE */ #ifdef INET SYSCTL_DECL(_net_inet_ah); SYSCTL_INT(_net_inet_ah, OID_AUTO, ah_enable, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ah_enable), 0, ""); SYSCTL_INT(_net_inet_ah, OID_AUTO, ah_cleartos, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ah_cleartos), 0, ""); SYSCTL_VNET_PCPUSTAT(_net_inet_ah, IPSECCTL_STATS, stats, struct ahstat, ahstat, "AH statistics (struct ahstat, netipsec/ah_var.h)"); #endif static unsigned char ipseczeroes[256]; /* larger than an ip6 extension hdr */ static int ah_input_cb(struct cryptop*); static int ah_output_cb(struct cryptop*); int -xform_ah_authsize(struct auth_hash *esph) +xform_ah_authsize(const struct auth_hash *esph) { int alen; if (esph == NULL) return 0; switch (esph->type) { case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: case CRYPTO_SHA2_512_HMAC: alen = esph->hashsize / 2; /* RFC4868 2.3 */ break; case CRYPTO_AES_128_NIST_GMAC: case CRYPTO_AES_192_NIST_GMAC: case CRYPTO_AES_256_NIST_GMAC: alen = esph->hashsize; break; default: alen = AH_HMAC_HASHLEN; break; } return alen; } -/* - * NB: this is public for use by the PF_KEY support. - */ -struct auth_hash * -ah_algorithm_lookup(int alg) -{ - if (alg > SADB_AALG_MAX) - return NULL; - switch (alg) { - case SADB_X_AALG_NULL: - return &auth_hash_null; - case SADB_AALG_MD5HMAC: - return &auth_hash_hmac_md5; - case SADB_AALG_SHA1HMAC: - return &auth_hash_hmac_sha1; - case SADB_X_AALG_RIPEMD160HMAC: - return &auth_hash_hmac_ripemd_160; - case SADB_X_AALG_MD5: - return &auth_hash_key_md5; - case SADB_X_AALG_SHA: - return &auth_hash_key_sha1; - case SADB_X_AALG_SHA2_256: - return &auth_hash_hmac_sha2_256; - case SADB_X_AALG_SHA2_384: - return &auth_hash_hmac_sha2_384; - case SADB_X_AALG_SHA2_512: - return &auth_hash_hmac_sha2_512; - case SADB_X_AALG_AES128GMAC: - return &auth_hash_nist_gmac_aes_128; - case SADB_X_AALG_AES192GMAC: - return &auth_hash_nist_gmac_aes_192; - case SADB_X_AALG_AES256GMAC: - return &auth_hash_nist_gmac_aes_256; - } - return NULL; -} - size_t ah_hdrsiz(struct secasvar *sav) { size_t size; if (sav != NULL) { int authsize; IPSEC_ASSERT(sav->tdb_authalgxform != NULL, ("null xform")); /*XXX not right for null algorithm--does it matter??*/ authsize = AUTHSIZE(sav); size = roundup(authsize, sizeof (u_int32_t)) + HDRSIZE(sav); } else { /* default guess */ size = sizeof (struct ah) + sizeof (u_int32_t) + 16; } return size; } /* * NB: public for use by esp_init. */ int ah_init0(struct secasvar *sav, struct xformsw *xsp, struct cryptoini *cria) { - struct auth_hash *thash; + const struct auth_hash *thash; int keylen; - thash = ah_algorithm_lookup(sav->alg_auth); + thash = auth_algorithm_lookup(sav->alg_auth); if (thash == NULL) { DPRINTF(("%s: unsupported authentication algorithm %u\n", __func__, sav->alg_auth)); return EINVAL; } /* * Verify the replay state block allocation is consistent with * the protocol type. We check here so we can make assumptions * later during protocol processing. */ /* NB: replay state is setup elsewhere (sigh) */ if (((sav->flags&SADB_X_EXT_OLD) == 0) ^ (sav->replay != NULL)) { DPRINTF(("%s: replay state block inconsistency, " "%s algorithm %s replay state\n", __func__, (sav->flags & SADB_X_EXT_OLD) ? "old" : "new", sav->replay == NULL ? "without" : "with")); return EINVAL; } if (sav->key_auth == NULL) { DPRINTF(("%s: no authentication key for %s algorithm\n", __func__, thash->name)); return EINVAL; } keylen = _KEYLEN(sav->key_auth); if (keylen != thash->keysize && thash->keysize != 0) { DPRINTF(("%s: invalid keylength %d, algorithm %s requires " "keysize %d\n", __func__, keylen, thash->name, thash->keysize)); return EINVAL; } sav->tdb_xform = xsp; sav->tdb_authalgxform = thash; /* Initialize crypto session. */ bzero(cria, sizeof (*cria)); cria->cri_alg = sav->tdb_authalgxform->type; cria->cri_klen = _KEYBITS(sav->key_auth); cria->cri_key = sav->key_auth->key_data; cria->cri_mlen = AUTHSIZE(sav); return 0; } /* * ah_init() is called when an SPI is being set up. */ static int ah_init(struct secasvar *sav, struct xformsw *xsp) { struct cryptoini cria; int error; error = ah_init0(sav, xsp, &cria); return error ? error : crypto_newsession(&sav->tdb_cryptoid, &cria, V_crypto_support); } /* * Paranoia. * * NB: public for use by esp_zeroize (XXX). */ int ah_zeroize(struct secasvar *sav) { int err; if (sav->key_auth) bzero(sav->key_auth->key_data, _KEYLEN(sav->key_auth)); err = crypto_freesession(sav->tdb_cryptoid); sav->tdb_cryptoid = 0; sav->tdb_authalgxform = NULL; sav->tdb_xform = NULL; return err; } /* * Massage IPv4/IPv6 headers for AH processing. */ static int ah_massage_headers(struct mbuf **m0, int proto, int skip, int alg, int out) { struct mbuf *m = *m0; unsigned char *ptr; int off, count; #ifdef INET struct ip *ip; #endif /* INET */ #ifdef INET6 struct ip6_ext *ip6e; struct ip6_hdr ip6; int alloc, len, ad; #endif /* INET6 */ switch (proto) { #ifdef INET case AF_INET: /* * This is the least painful way of dealing with IPv4 header * and option processing -- just make sure they're in * contiguous memory. */ *m0 = m = m_pullup(m, skip); if (m == NULL) { DPRINTF(("%s: m_pullup failed\n", __func__)); return ENOBUFS; } /* Fix the IP header */ ip = mtod(m, struct ip *); if (V_ah_cleartos) ip->ip_tos = 0; ip->ip_ttl = 0; ip->ip_sum = 0; if (alg == CRYPTO_MD5_KPDK || alg == CRYPTO_SHA1_KPDK) ip->ip_off &= htons(IP_DF); else ip->ip_off = htons(0); ptr = mtod(m, unsigned char *) + sizeof(struct ip); /* IPv4 option processing */ for (off = sizeof(struct ip); off < skip;) { if (ptr[off] == IPOPT_EOL || ptr[off] == IPOPT_NOP || off + 1 < skip) ; else { DPRINTF(("%s: illegal IPv4 option length for " "option %d\n", __func__, ptr[off])); m_freem(m); return EINVAL; } switch (ptr[off]) { case IPOPT_EOL: off = skip; /* End the loop. */ break; case IPOPT_NOP: off++; break; case IPOPT_SECURITY: /* 0x82 */ case 0x85: /* Extended security. */ case 0x86: /* Commercial security. */ case 0x94: /* Router alert */ case 0x95: /* RFC1770 */ /* Sanity check for option length. */ if (ptr[off + 1] < 2) { DPRINTF(("%s: illegal IPv4 option " "length for option %d\n", __func__, ptr[off])); m_freem(m); return EINVAL; } off += ptr[off + 1]; break; case IPOPT_LSRR: case IPOPT_SSRR: /* Sanity check for option length. */ if (ptr[off + 1] < 2) { DPRINTF(("%s: illegal IPv4 option " "length for option %d\n", __func__, ptr[off])); m_freem(m); return EINVAL; } /* * On output, if we have either of the * source routing options, we should * swap the destination address of the * IP header with the last address * specified in the option, as that is * what the destination's IP header * will look like. */ if (out) bcopy(ptr + off + ptr[off + 1] - sizeof(struct in_addr), &(ip->ip_dst), sizeof(struct in_addr)); /* Fall through */ default: /* Sanity check for option length. */ if (ptr[off + 1] < 2) { DPRINTF(("%s: illegal IPv4 option " "length for option %d\n", __func__, ptr[off])); m_freem(m); return EINVAL; } /* Zeroize all other options. */ count = ptr[off + 1]; bcopy(ipseczeroes, ptr, count); off += count; break; } /* Sanity check. */ if (off > skip) { DPRINTF(("%s: malformed IPv4 options header\n", __func__)); m_freem(m); return EINVAL; } } break; #endif /* INET */ #ifdef INET6 case AF_INET6: /* Ugly... */ /* Copy and "cook" the IPv6 header. */ m_copydata(m, 0, sizeof(ip6), (caddr_t) &ip6); /* We don't do IPv6 Jumbograms. */ if (ip6.ip6_plen == 0) { DPRINTF(("%s: unsupported IPv6 jumbogram\n", __func__)); m_freem(m); return EMSGSIZE; } ip6.ip6_flow = 0; ip6.ip6_hlim = 0; ip6.ip6_vfc &= ~IPV6_VERSION_MASK; ip6.ip6_vfc |= IPV6_VERSION; /* Scoped address handling. */ if (IN6_IS_SCOPE_LINKLOCAL(&ip6.ip6_src)) ip6.ip6_src.s6_addr16[1] = 0; if (IN6_IS_SCOPE_LINKLOCAL(&ip6.ip6_dst)) ip6.ip6_dst.s6_addr16[1] = 0; /* Done with IPv6 header. */ m_copyback(m, 0, sizeof(struct ip6_hdr), (caddr_t) &ip6); /* Let's deal with the remaining headers (if any). */ if (skip - sizeof(struct ip6_hdr) > 0) { if (m->m_len <= skip) { ptr = (unsigned char *) malloc( skip - sizeof(struct ip6_hdr), M_XDATA, M_NOWAIT); if (ptr == NULL) { DPRINTF(("%s: failed to allocate memory" "for IPv6 headers\n",__func__)); m_freem(m); return ENOBUFS; } /* * Copy all the protocol headers after * the IPv6 header. */ m_copydata(m, sizeof(struct ip6_hdr), skip - sizeof(struct ip6_hdr), ptr); alloc = 1; } else { /* No need to allocate memory. */ ptr = mtod(m, unsigned char *) + sizeof(struct ip6_hdr); alloc = 0; } } else break; off = ip6.ip6_nxt & 0xff; /* Next header type. */ for (len = 0; len < skip - sizeof(struct ip6_hdr);) switch (off) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: ip6e = (struct ip6_ext *) (ptr + len); /* * Process the mutable/immutable * options -- borrows heavily from the * KAME code. */ for (count = len + sizeof(struct ip6_ext); count < len + ((ip6e->ip6e_len + 1) << 3);) { if (ptr[count] == IP6OPT_PAD1) { count++; continue; /* Skip padding. */ } /* Sanity check. */ if (count > len + ((ip6e->ip6e_len + 1) << 3)) { m_freem(m); /* Free, if we allocated. */ if (alloc) free(ptr, M_XDATA); return EINVAL; } ad = ptr[count + 1]; /* If mutable option, zeroize. */ if (ptr[count] & IP6OPT_MUTABLE) bcopy(ipseczeroes, ptr + count, ptr[count + 1]); count += ad; /* Sanity check. */ if (count > skip - sizeof(struct ip6_hdr)) { m_freem(m); /* Free, if we allocated. */ if (alloc) free(ptr, M_XDATA); return EINVAL; } } /* Advance. */ len += ((ip6e->ip6e_len + 1) << 3); off = ip6e->ip6e_nxt; break; case IPPROTO_ROUTING: /* * Always include routing headers in * computation. */ ip6e = (struct ip6_ext *) (ptr + len); len += ((ip6e->ip6e_len + 1) << 3); off = ip6e->ip6e_nxt; break; default: DPRINTF(("%s: unexpected IPv6 header type %d", __func__, off)); if (alloc) free(ptr, M_XDATA); m_freem(m); return EINVAL; } /* Copyback and free, if we allocated. */ if (alloc) { m_copyback(m, sizeof(struct ip6_hdr), skip - sizeof(struct ip6_hdr), ptr); free(ptr, M_XDATA); } break; #endif /* INET6 */ } return 0; } /* * ah_input() gets called to verify that an input packet * passes authentication. */ static int ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { char buf[128]; - struct auth_hash *ahx; - struct tdb_crypto *tc; - struct newah *ah; - int hl, rplen, authsize, error; - + const struct auth_hash *ahx; struct cryptodesc *crda; struct cryptop *crp; + struct xform_data *xd; + struct newah *ah; + uint64_t cryptoid; + int hl, rplen, authsize, error; IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->key_auth != NULL, ("null authentication key")); IPSEC_ASSERT(sav->tdb_authalgxform != NULL, ("null authentication xform")); /* Figure out header size. */ rplen = HDRSIZE(sav); /* XXX don't pullup, just copy header */ IP6_EXTHDR_GET(ah, struct newah *, m, skip, rplen); if (ah == NULL) { DPRINTF(("ah_input: cannot pullup header\n")); AHSTAT_INC(ahs_hdrops); /*XXX*/ m_freem(m); return ENOBUFS; } /* Check replay window, if applicable. */ - if (sav->replay && !ipsec_chkreplay(ntohl(ah->ah_seq), sav)) { + SECASVAR_LOCK(sav); + if (sav->replay != NULL && sav->replay->wsize != 0 && + ipsec_chkreplay(ntohl(ah->ah_seq), sav) == 0) { + SECASVAR_UNLOCK(sav); AHSTAT_INC(ahs_replay); DPRINTF(("%s: packet replay failure: %s\n", __func__, - ipsec_logsastr(sav, buf, sizeof(buf)))); + ipsec_sa2str(sav, buf, sizeof(buf)))); m_freem(m); - return ENOBUFS; + return (EACCES); } + cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); /* Verify AH header length. */ hl = ah->ah_len * sizeof (u_int32_t); ahx = sav->tdb_authalgxform; authsize = AUTHSIZE(sav); if (hl != authsize + rplen - sizeof (struct ah)) { DPRINTF(("%s: bad authenticator length %u (expecting %lu)" " for packet in SA %s/%08lx\n", __func__, hl, (u_long) (authsize + rplen - sizeof (struct ah)), ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_badauthl); m_freem(m); return EACCES; } AHSTAT_ADD(ahs_ibytes, m->m_pkthdr.len - skip - hl); /* Get crypto descriptors. */ crp = crypto_getreq(1); if (crp == NULL) { - DPRINTF(("%s: failed to acquire crypto descriptor\n",__func__)); + DPRINTF(("%s: failed to acquire crypto descriptor\n", + __func__)); AHSTAT_INC(ahs_crypto); m_freem(m); return ENOBUFS; } crda = crp->crp_desc; IPSEC_ASSERT(crda != NULL, ("null crypto descriptor")); crda->crd_skip = 0; crda->crd_len = m->m_pkthdr.len; crda->crd_inject = skip + rplen; /* Authentication operation. */ crda->crd_alg = ahx->type; crda->crd_klen = _KEYBITS(sav->key_auth); crda->crd_key = sav->key_auth->key_data; /* Allocate IPsec-specific opaque crypto info. */ - tc = (struct tdb_crypto *) malloc(sizeof (struct tdb_crypto) + - skip + rplen + authsize, M_XDATA, M_NOWAIT | M_ZERO); - if (tc == NULL) { - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + xd = malloc(sizeof(*xd) + skip + rplen + authsize, M_XDATA, + M_NOWAIT | M_ZERO); + if (xd == NULL) { + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); AHSTAT_INC(ahs_crypto); crypto_freereq(crp); m_freem(m); return ENOBUFS; } /* * Save the authenticator, the skipped portion of the packet, * and the AH header. */ - m_copydata(m, 0, skip + rplen + authsize, (caddr_t)(tc+1)); + m_copydata(m, 0, skip + rplen + authsize, (caddr_t)(xd + 1)); /* Zeroize the authenticator on the packet. */ m_copyback(m, skip + rplen, authsize, ipseczeroes); /* "Massage" the packet headers for crypto processing. */ error = ah_massage_headers(&m, sav->sah->saidx.dst.sa.sa_family, skip, ahx->type, 0); if (error != 0) { /* NB: mbuf is free'd by ah_massage_headers */ AHSTAT_INC(ahs_hdrops); - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); return (error); } /* Crypto operation descriptor. */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ah_input_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_sid = cryptoid; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback. */ - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_nxt = ah->ah_nxt; - tc->tc_protoff = protoff; - tc->tc_skip = skip; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; + xd->sav = sav; + xd->nxt = ah->ah_nxt; + xd->protoff = protoff; + xd->skip = skip; + xd->cryptoid = cryptoid; return (crypto_dispatch(crp)); } /* * AH input callback from the crypto driver. */ static int ah_input_cb(struct cryptop *crp) { - char buf[INET6_ADDRSTRLEN]; - int rplen, error, skip, protoff; + char buf[IPSEC_ADDRSTRLEN]; unsigned char calc[AH_ALEN_MAX]; + const struct auth_hash *ahx; struct mbuf *m; struct cryptodesc *crd; - struct auth_hash *ahx; - struct tdb_crypto *tc; + struct xform_data *xd; struct secasvar *sav; struct secasindex *saidx; - u_int8_t nxt; caddr_t ptr; - int authsize; + uint64_t cryptoid; + int authsize, rplen, error, skip, protoff; + uint8_t nxt; crd = crp->crp_desc; - - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); - skip = tc->tc_skip; - nxt = tc->tc_nxt; - protoff = tc->tc_protoff; m = (struct mbuf *) crp->crp_buf; - - sav = tc->tc_sav; - IPSEC_ASSERT(sav != NULL, ("null SA!")); - + xd = (struct xform_data *) crp->crp_opaque; + sav = xd->sav; + skip = xd->skip; + nxt = xd->nxt; + protoff = xd->protoff; + cryptoid = xd->cryptoid; saidx = &sav->sah->saidx; IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET || saidx->dst.sa.sa_family == AF_INET6, ("unexpected protocol family %u", saidx->dst.sa.sa_family)); - ahx = (struct auth_hash *) sav->tdb_authalgxform; + ahx = sav->tdb_authalgxform; /* Check for crypto errors. */ if (crp->crp_etype) { - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - - if (crp->crp_etype == EAGAIN) + if (crp->crp_etype == EAGAIN) { + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; return (crypto_dispatch(crp)); - + } AHSTAT_INC(ahs_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; goto bad; } else { AHSTAT_INC(ahs_hist[sav->alg_auth]); crypto_freereq(crp); /* No longer needed. */ crp = NULL; } /* Shouldn't happen... */ if (m == NULL) { AHSTAT_INC(ahs_crypto); DPRINTF(("%s: bogus returned buffer from crypto\n", __func__)); error = EINVAL; goto bad; } /* Figure out header size. */ rplen = HDRSIZE(sav); authsize = AUTHSIZE(sav); /* Copy authenticator off the packet. */ m_copydata(m, skip + rplen, authsize, calc); /* Verify authenticator. */ - ptr = (caddr_t) (tc + 1); + ptr = (caddr_t) (xd + 1); if (timingsafe_bcmp(ptr + skip + rplen, calc, authsize)) { DPRINTF(("%s: authentication hash mismatch for packet " "in SA %s/%08lx\n", __func__, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_badauth); error = EACCES; goto bad; } /* Fix the Next Protocol field. */ - ((u_int8_t *) ptr)[protoff] = nxt; + ((uint8_t *) ptr)[protoff] = nxt; /* Copyback the saved (uncooked) network headers. */ m_copyback(m, 0, skip, ptr); - free(tc, M_XDATA), tc = NULL; /* No longer needed */ + free(xd, M_XDATA), xd = NULL; /* No longer needed */ /* * Header is now authenticated. */ m->m_flags |= M_AUTHIPHDR|M_AUTHIPDGM; /* * Update replay sequence number, if appropriate. */ if (sav->replay) { u_int32_t seq; m_copydata(m, skip + offsetof(struct newah, ah_seq), sizeof (seq), (caddr_t) &seq); + SECASVAR_LOCK(sav); if (ipsec_updatereplay(ntohl(seq), sav)) { + SECASVAR_UNLOCK(sav); AHSTAT_INC(ahs_replay); - error = ENOBUFS; /*XXX as above*/ + error = EACCES; goto bad; } + SECASVAR_UNLOCK(sav); } /* * Remove the AH header and authenticator from the mbuf. */ error = m_striphdr(m, skip, rplen + authsize); if (error) { DPRINTF(("%s: mangled mbuf chain for SA %s/%08lx\n", __func__, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_hdrops); goto bad; } switch (saidx->dst.sa.sa_family) { #ifdef INET6 case AF_INET6: error = ipsec6_common_input_cb(m, sav, skip, protoff); break; #endif #ifdef INET case AF_INET: error = ipsec4_common_input_cb(m, sav, skip, protoff); break; #endif default: panic("%s: Unexpected address family: %d saidx=%p", __func__, saidx->dst.sa.sa_family, saidx); } - - KEY_FREESAV(&sav); return error; bad: if (sav) - KEY_FREESAV(&sav); + key_freesav(&sav); if (m != NULL) m_freem(m); - if (tc != NULL) - free(tc, M_XDATA); + if (xd != NULL) + free(xd, M_XDATA); if (crp != NULL) crypto_freereq(crp); return error; } /* - * AH output routine, called by ipsec[46]_process_packet(). + * AH output routine, called by ipsec[46]_perform_request(). */ static int -ah_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, - int skip, int protoff) +ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx, int skip, int protoff) { - char buf[INET6_ADDRSTRLEN]; - struct secasvar *sav; - struct auth_hash *ahx; + char buf[IPSEC_ADDRSTRLEN]; + const struct auth_hash *ahx; struct cryptodesc *crda; - struct tdb_crypto *tc; + struct xform_data *xd; struct mbuf *mi; struct cryptop *crp; - u_int16_t iplen; - int error, rplen, authsize, maxpacketsize, roff; - u_int8_t prot; struct newah *ah; + uint64_t cryptoid; + uint16_t iplen; + int error, rplen, authsize, maxpacketsize, roff; + uint8_t prot; - sav = isr->sav; IPSEC_ASSERT(sav != NULL, ("null SA")); ahx = sav->tdb_authalgxform; IPSEC_ASSERT(ahx != NULL, ("null authentication xform")); AHSTAT_INC(ahs_output); /* Figure out header size. */ rplen = HDRSIZE(sav); /* Check for maximum packet size violations. */ switch (sav->sah->saidx.dst.sa.sa_family) { #ifdef INET case AF_INET: maxpacketsize = IP_MAXPACKET; break; #endif /* INET */ #ifdef INET6 case AF_INET6: maxpacketsize = IPV6_MAXPACKET; break; #endif /* INET6 */ default: DPRINTF(("%s: unknown/unsupported protocol family %u, " "SA %s/%08lx\n", __func__, sav->sah->saidx.dst.sa.sa_family, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_nopf); error = EPFNOSUPPORT; goto bad; } authsize = AUTHSIZE(sav); if (rplen + authsize + m->m_pkthdr.len > maxpacketsize) { DPRINTF(("%s: packet in SA %s/%08lx got too big " "(len %u, max len %u)\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi), rplen + authsize + m->m_pkthdr.len, maxpacketsize)); AHSTAT_INC(ahs_toobig); error = EMSGSIZE; goto bad; } /* Update the counters. */ AHSTAT_ADD(ahs_obytes, m->m_pkthdr.len - skip); m = m_unshare(m, M_NOWAIT); if (m == NULL) { DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_hdrops); error = ENOBUFS; goto bad; } /* Inject AH header. */ mi = m_makespace(m, skip, rplen + authsize, &roff); if (mi == NULL) { DPRINTF(("%s: failed to inject %u byte AH header for SA " "%s/%08lx\n", __func__, rplen + authsize, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_hdrops); /*XXX differs from openbsd */ error = ENOBUFS; goto bad; } /* * The AH header is guaranteed by m_makespace() to be in * contiguous memory, at roff bytes offset into the returned mbuf. */ ah = (struct newah *)(mtod(mi, caddr_t) + roff); /* Initialize the AH header. */ m_copydata(m, protoff, sizeof(u_int8_t), (caddr_t) &ah->ah_nxt); ah->ah_len = (rplen + authsize - sizeof(struct ah)) / sizeof(u_int32_t); ah->ah_reserve = 0; ah->ah_spi = sav->spi; /* Zeroize authenticator. */ m_copyback(m, skip + rplen, authsize, ipseczeroes); /* Insert packet replay counter, as requested. */ + SECASVAR_LOCK(sav); if (sav->replay) { - SECASVAR_LOCK(sav); - if (sav->replay->count == ~0 && (sav->flags & SADB_X_EXT_CYCSEQ) == 0) { SECASVAR_UNLOCK(sav); DPRINTF(("%s: replay counter wrapped for SA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_wrap); - error = EINVAL; + error = EACCES; goto bad; } #ifdef REGRESSION /* Emulate replay attack when ipsec_replay is TRUE. */ if (!V_ipsec_replay) #endif sav->replay->count++; ah->ah_seq = htonl(sav->replay->count); - - SECASVAR_UNLOCK(sav); } + cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); /* Get crypto descriptors. */ crp = crypto_getreq(1); if (crp == NULL) { DPRINTF(("%s: failed to acquire crypto descriptors\n", __func__)); AHSTAT_INC(ahs_crypto); error = ENOBUFS; goto bad; } crda = crp->crp_desc; - crda->crd_skip = 0; crda->crd_inject = skip + rplen; crda->crd_len = m->m_pkthdr.len; /* Authentication operation. */ crda->crd_alg = ahx->type; crda->crd_key = sav->key_auth->key_data; crda->crd_klen = _KEYBITS(sav->key_auth); /* Allocate IPsec-specific opaque crypto info. */ - tc = (struct tdb_crypto *) malloc( - sizeof(struct tdb_crypto) + skip, M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { + xd = malloc(sizeof(struct xform_data) + skip, M_XDATA, + M_NOWAIT | M_ZERO); + if (xd == NULL) { crypto_freereq(crp); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); AHSTAT_INC(ahs_crypto); error = ENOBUFS; goto bad; } /* Save the skipped portion of the packet. */ - m_copydata(m, 0, skip, (caddr_t) (tc + 1)); + m_copydata(m, 0, skip, (caddr_t) (xd + 1)); /* * Fix IP header length on the header used for * authentication. We don't need to fix the original * header length as it will be fixed by our caller. */ switch (sav->sah->saidx.dst.sa.sa_family) { #ifdef INET case AF_INET: - bcopy(((caddr_t)(tc + 1)) + + bcopy(((caddr_t)(xd + 1)) + offsetof(struct ip, ip_len), (caddr_t) &iplen, sizeof(u_int16_t)); iplen = htons(ntohs(iplen) + rplen + authsize); m_copyback(m, offsetof(struct ip, ip_len), sizeof(u_int16_t), (caddr_t) &iplen); break; #endif /* INET */ #ifdef INET6 case AF_INET6: - bcopy(((caddr_t)(tc + 1)) + + bcopy(((caddr_t)(xd + 1)) + offsetof(struct ip6_hdr, ip6_plen), - (caddr_t) &iplen, sizeof(u_int16_t)); + (caddr_t) &iplen, sizeof(uint16_t)); iplen = htons(ntohs(iplen) + rplen + authsize); m_copyback(m, offsetof(struct ip6_hdr, ip6_plen), - sizeof(u_int16_t), (caddr_t) &iplen); + sizeof(uint16_t), (caddr_t) &iplen); break; #endif /* INET6 */ } /* Fix the Next Header field in saved header. */ - ((u_int8_t *) (tc + 1))[protoff] = IPPROTO_AH; + ((uint8_t *) (xd + 1))[protoff] = IPPROTO_AH; /* Update the Next Protocol field in the IP header. */ prot = IPPROTO_AH; - m_copyback(m, protoff, sizeof(u_int8_t), (caddr_t) &prot); + m_copyback(m, protoff, sizeof(uint8_t), (caddr_t) &prot); /* "Massage" the packet headers for crypto processing. */ error = ah_massage_headers(&m, sav->sah->saidx.dst.sa.sa_family, skip, ahx->type, 1); if (error != 0) { m = NULL; /* mbuf was free'd by ah_massage_headers. */ - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); goto bad; } /* Crypto operation descriptor. */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ah_output_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_sid = cryptoid; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback. */ - key_addref(isr->sp); - tc->tc_isr = isr; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_skip = skip; - tc->tc_protoff = protoff; + xd->sp = sp; + xd->sav = sav; + xd->skip = skip; + xd->idx = idx; + xd->cryptoid = cryptoid; return crypto_dispatch(crp); bad: if (m) m_freem(m); return (error); } /* * AH output callback from the crypto driver. */ static int ah_output_cb(struct cryptop *crp) { - int skip, protoff, error; - struct tdb_crypto *tc; - struct ipsecrequest *isr; + struct xform_data *xd; + struct secpolicy *sp; struct secasvar *sav; struct mbuf *m; + uint64_t cryptoid; caddr_t ptr; + u_int idx; + int skip, error; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque data area!")); - skip = tc->tc_skip; - protoff = tc->tc_protoff; - ptr = (caddr_t) (tc + 1); m = (struct mbuf *) crp->crp_buf; - - isr = tc->tc_isr; - IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); - IPSECREQUEST_LOCK(isr); - sav = tc->tc_sav; - /* With the isr lock released SA pointer can be updated. */ - if (sav != isr->sav) { - AHSTAT_INC(ahs_notdb); - DPRINTF(("%s: SA expired while in crypto\n", __func__)); - error = ENOBUFS; /*XXX*/ - goto bad; - } + xd = (struct xform_data *) crp->crp_opaque; + sp = xd->sp; + sav = xd->sav; + skip = xd->skip; + idx = xd->idx; + cryptoid = xd->cryptoid; + ptr = (caddr_t) (xd + 1); /* Check for crypto errors. */ if (crp->crp_etype) { - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - IPSECREQUEST_UNLOCK(isr); + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; return (crypto_dispatch(crp)); } - AHSTAT_INC(ahs_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; + m_freem(m); goto bad; } /* Shouldn't happen... */ if (m == NULL) { AHSTAT_INC(ahs_crypto); DPRINTF(("%s: bogus returned buffer from crypto\n", __func__)); error = EINVAL; goto bad; } - AHSTAT_INC(ahs_hist[sav->alg_auth]); - /* * Copy original headers (with the new protocol number) back * in place. */ m_copyback(m, 0, skip, ptr); - /* No longer needed. */ - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); - + AHSTAT_INC(ahs_hist[sav->alg_auth]); #ifdef REGRESSION /* Emulate man-in-the-middle attack when ipsec_integrity is TRUE. */ if (V_ipsec_integrity) { int alen; /* * Corrupt HMAC if we want to test integrity verification of * the other side. */ alen = AUTHSIZE(sav); m_copyback(m, m->m_pkthdr.len - alen, alen, ipseczeroes); } #endif /* NB: m is reclaimed by ipsec_process_done. */ - error = ipsec_process_done(m, isr); - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); + error = ipsec_process_done(m, sp, sav, idx); return (error); bad: - if (sav) - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); - if (m) - m_freem(m); - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); + key_freesav(&sav); + key_freesp(&sp); return (error); } static struct xformsw ah_xformsw = { - XF_AH, XFT_AUTH, "IPsec AH", - ah_init, ah_zeroize, ah_input, ah_output, + .xf_type = XF_AH, + .xf_name = "IPsec AH", + .xf_init = ah_init, + .xf_zeroize = ah_zeroize, + .xf_input = ah_input, + .xf_output = ah_output, }; -static void -ah_attach(void) -{ - - xform_register(&ah_xformsw); -} - -SYSINIT(ah_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ah_attach, NULL); +SYSINIT(ah_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_attach, &ah_xformsw); +SYSUNINIT(ah_xform_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_detach, &ah_xformsw); diff --git a/sys/netipsec/xform_esp.c b/sys/netipsec/xform_esp.c index bae8571f5bfd..3c5ca6b65268 100644 --- a/sys/netipsec/xform_esp.c +++ b/sys/netipsec/xform_esp.c @@ -1,1022 +1,957 @@ /* $FreeBSD$ */ /* $OpenBSD: ip_esp.c,v 1.69 2001/06/26 06:18:59 angelos Exp $ */ /*- * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and * Niels Provos (provos@physnet.uni-hamburg.de). * * The original version of this code was written by John Ioannidis * for BSD/OS in Athens, Greece, in November 1995. * * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, * by Angelos D. Keromytis. * * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis * and Niels Provos. * * Additional features in 1999 by Angelos D. Keromytis. * * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, * Angelos D. Keromytis and Niels Provos. * Copyright (c) 2001 Angelos D. Keromytis. * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all copies of any software which is or includes a copy or * modification of this software. * You may use this code under the GNU public license if you so wish. Please * contribute changes back to the authors under this freer than GPL license * so that we may further the use of strong encryption without limitations to * all. * * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR * PURPOSE. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #endif #include #include #include #include VNET_DEFINE(int, esp_enable) = 1; VNET_PCPUSTAT_DEFINE(struct espstat, espstat); VNET_PCPUSTAT_SYSINIT(espstat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(espstat); #endif /* VIMAGE */ SYSCTL_DECL(_net_inet_esp); SYSCTL_INT(_net_inet_esp, OID_AUTO, esp_enable, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(esp_enable), 0, ""); SYSCTL_VNET_PCPUSTAT(_net_inet_esp, IPSECCTL_STATS, stats, struct espstat, espstat, "ESP statistics (struct espstat, netipsec/esp_var.h"); static int esp_input_cb(struct cryptop *op); static int esp_output_cb(struct cryptop *crp); -/* - * NB: this is public for use by the PF_KEY support. - * NB: if you add support here; be sure to add code to esp_attach below! - */ -struct enc_xform * -esp_algorithm_lookup(int alg) -{ - if (alg >= ESP_ALG_MAX) - return NULL; - switch (alg) { - case SADB_EALG_DESCBC: - return &enc_xform_des; - case SADB_EALG_3DESCBC: - return &enc_xform_3des; - case SADB_X_EALG_AES: - return &enc_xform_rijndael128; - case SADB_X_EALG_BLOWFISHCBC: - return &enc_xform_blf; - case SADB_X_EALG_CAST128CBC: - return &enc_xform_cast5; - case SADB_EALG_NULL: - return &enc_xform_null; - case SADB_X_EALG_CAMELLIACBC: - return &enc_xform_camellia; - case SADB_X_EALG_AESCTR: - return &enc_xform_aes_icm; - case SADB_X_EALG_AESGCM16: - return &enc_xform_aes_nist_gcm; - case SADB_X_EALG_AESGMAC: - return &enc_xform_aes_nist_gmac; - } - return NULL; -} - size_t esp_hdrsiz(struct secasvar *sav) { size_t size; if (sav != NULL) { /*XXX not right for null algorithm--does it matter??*/ IPSEC_ASSERT(sav->tdb_encalgxform != NULL, ("SA with null xform")); if (sav->flags & SADB_X_EXT_OLD) size = sizeof (struct esp); else size = sizeof (struct newesp); size += sav->tdb_encalgxform->blocksize + 9; /*XXX need alg check???*/ if (sav->tdb_authalgxform != NULL && sav->replay) size += ah_hdrsiz(sav); } else { /* * base header size * + max iv length for CBC mode * + max pad length * + sizeof (pad length field) * + sizeof (next header field) * + max icv supported. */ size = sizeof (struct newesp) + EALG_MAX_BLOCK_LEN + 9 + 16; } return size; } /* * esp_init() is called when an SPI is being set up. */ static int esp_init(struct secasvar *sav, struct xformsw *xsp) { - struct enc_xform *txform; + const struct enc_xform *txform; struct cryptoini cria, crie; int keylen; int error; - txform = esp_algorithm_lookup(sav->alg_enc); + txform = enc_algorithm_lookup(sav->alg_enc); if (txform == NULL) { DPRINTF(("%s: unsupported encryption algorithm %d\n", __func__, sav->alg_enc)); return EINVAL; } if (sav->key_enc == NULL) { DPRINTF(("%s: no encoding key for %s algorithm\n", __func__, txform->name)); return EINVAL; } if ((sav->flags & (SADB_X_EXT_OLD | SADB_X_EXT_IV4B)) == SADB_X_EXT_IV4B) { DPRINTF(("%s: 4-byte IV not supported with protocol\n", __func__)); return EINVAL; } /* subtract off the salt, RFC4106, 8.1 and RFC3686, 5.1 */ keylen = _KEYLEN(sav->key_enc) - SAV_ISCTRORGCM(sav) * 4; if (txform->minkey > keylen || keylen > txform->maxkey) { DPRINTF(("%s: invalid key length %u, must be in the range " "[%u..%u] for algorithm %s\n", __func__, keylen, txform->minkey, txform->maxkey, txform->name)); return EINVAL; } if (SAV_ISCTRORGCM(sav)) sav->ivlen = 8; /* RFC4106 3.1 and RFC3686 3.1 */ else sav->ivlen = txform->ivsize; /* * Setup AH-related state. */ if (sav->alg_auth != 0) { error = ah_init0(sav, xsp, &cria); if (error) return error; } /* NB: override anything set in ah_init0 */ sav->tdb_xform = xsp; sav->tdb_encalgxform = txform; /* * Whenever AES-GCM is used for encryption, one * of the AES authentication algorithms is chosen * as well, based on the key size. */ if (sav->alg_enc == SADB_X_EALG_AESGCM16) { switch (keylen) { case AES_128_GMAC_KEY_LEN: sav->alg_auth = SADB_X_AALG_AES128GMAC; sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_128; break; case AES_192_GMAC_KEY_LEN: sav->alg_auth = SADB_X_AALG_AES192GMAC; sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_192; break; case AES_256_GMAC_KEY_LEN: sav->alg_auth = SADB_X_AALG_AES256GMAC; sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_256; break; default: DPRINTF(("%s: invalid key length %u" "for algorithm %s\n", __func__, keylen, txform->name)); return EINVAL; } bzero(&cria, sizeof(cria)); cria.cri_alg = sav->tdb_authalgxform->type; cria.cri_key = sav->key_enc->key_data; cria.cri_klen = _KEYBITS(sav->key_enc) - SAV_ISGCM(sav) * 32; } /* Initialize crypto session. */ bzero(&crie, sizeof(crie)); crie.cri_alg = sav->tdb_encalgxform->type; crie.cri_key = sav->key_enc->key_data; crie.cri_klen = _KEYBITS(sav->key_enc) - SAV_ISCTRORGCM(sav) * 32; if (sav->tdb_authalgxform && sav->tdb_encalgxform) { /* init both auth & enc */ crie.cri_next = &cria; error = crypto_newsession(&sav->tdb_cryptoid, &crie, V_crypto_support); } else if (sav->tdb_encalgxform) { error = crypto_newsession(&sav->tdb_cryptoid, &crie, V_crypto_support); } else if (sav->tdb_authalgxform) { error = crypto_newsession(&sav->tdb_cryptoid, &cria, V_crypto_support); } else { /* XXX cannot happen? */ DPRINTF(("%s: no encoding OR authentication xform!\n", __func__)); error = EINVAL; } return error; } /* * Paranoia. */ static int esp_zeroize(struct secasvar *sav) { /* NB: ah_zerorize free's the crypto session state */ int error = ah_zeroize(sav); if (sav->key_enc) bzero(sav->key_enc->key_data, _KEYLEN(sav->key_enc)); sav->tdb_encalgxform = NULL; sav->tdb_xform = NULL; return error; } /* * ESP input processing, called (eventually) through the protocol switch. */ static int esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { char buf[128]; - struct auth_hash *esph; - struct enc_xform *espx; - struct tdb_crypto *tc; - uint8_t *ivp; - int plen, alen, hlen; - struct newesp *esp; + const struct auth_hash *esph; + const struct enc_xform *espx; + struct xform_data *xd; struct cryptodesc *crde; struct cryptop *crp; + struct newesp *esp; + uint8_t *ivp; + uint64_t cryptoid; + int plen, alen, hlen; IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->tdb_encalgxform != NULL, ("null encoding xform")); /* Valid IP Packet length ? */ if ( (skip&3) || (m->m_pkthdr.len&3) ){ DPRINTF(("%s: misaligned packet, skip %u pkt len %u", __func__, skip, m->m_pkthdr.len)); ESPSTAT_INC(esps_badilen); m_freem(m); return EINVAL; } /* XXX don't pullup, just copy header */ IP6_EXTHDR_GET(esp, struct newesp *, m, skip, sizeof (struct newesp)); esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; /* Determine the ESP header and auth length */ if (sav->flags & SADB_X_EXT_OLD) hlen = sizeof (struct esp) + sav->ivlen; else hlen = sizeof (struct newesp) + sav->ivlen; alen = xform_ah_authsize(esph); /* * Verify payload length is multiple of encryption algorithm * block size. * * NB: This works for the null algorithm because the blocksize * is 4 and all packets must be 4-byte aligned regardless * of the algorithm. */ plen = m->m_pkthdr.len - (skip + hlen + alen); if ((plen & (espx->blocksize - 1)) || (plen <= 0)) { DPRINTF(("%s: payload of %d octets not a multiple of %d octets," " SA %s/%08lx\n", __func__, plen, espx->blocksize, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long)ntohl(sav->spi))); ESPSTAT_INC(esps_badilen); m_freem(m); return EINVAL; } /* * Check sequence number. */ - if (esph != NULL && sav->replay != NULL && - !ipsec_chkreplay(ntohl(esp->esp_seq), sav)) { - DPRINTF(("%s: packet replay check for %s\n", __func__, - ipsec_logsastr(sav, buf, sizeof(buf)))); /*XXX*/ - ESPSTAT_INC(esps_replay); - m_freem(m); - return ENOBUFS; /*XXX*/ + SECASVAR_LOCK(sav); + if (esph != NULL && sav->replay != NULL && sav->replay->wsize != 0) { + if (ipsec_chkreplay(ntohl(esp->esp_seq), sav) == 0) { + SECASVAR_UNLOCK(sav); + DPRINTF(("%s: packet replay check for %s\n", __func__, + ipsec_sa2str(sav, buf, sizeof(buf)))); + ESPSTAT_INC(esps_replay); + m_freem(m); + return (EACCES); + } } + cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); /* Update the counters */ ESPSTAT_ADD(esps_ibytes, m->m_pkthdr.len - (skip + hlen + alen)); /* Get crypto descriptors */ crp = crypto_getreq(esph && espx ? 2 : 1); if (crp == NULL) { DPRINTF(("%s: failed to acquire crypto descriptors\n", __func__)); ESPSTAT_INC(esps_crypto); m_freem(m); return ENOBUFS; } /* Get IPsec-specific opaque pointer */ - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto) + alen, - M_XDATA, M_NOWAIT | M_ZERO); - if (tc == NULL) { - crypto_freereq(crp); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + xd = malloc(sizeof(*xd) + alen, M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); ESPSTAT_INC(esps_crypto); + crypto_freereq(crp); m_freem(m); return ENOBUFS; } if (esph != NULL) { struct cryptodesc *crda = crp->crp_desc; IPSEC_ASSERT(crda != NULL, ("null ah crypto descriptor")); /* Authentication descriptor */ crda->crd_skip = skip; if (SAV_ISGCM(sav)) crda->crd_len = 8; /* RFC4106 5, SPI + SN */ else crda->crd_len = m->m_pkthdr.len - (skip + alen); crda->crd_inject = m->m_pkthdr.len - alen; crda->crd_alg = esph->type; /* Copy the authenticator */ m_copydata(m, m->m_pkthdr.len - alen, alen, - (caddr_t) (tc + 1)); + (caddr_t) (xd + 1)); /* Chain authentication request */ crde = crda->crd_next; } else { crde = crp->crp_desc; } /* Crypto operation descriptor */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = esp_input_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_sid = cryptoid; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback */ - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_protoff = protoff; - tc->tc_skip = skip; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; + xd->sav = sav; + xd->protoff = protoff; + xd->skip = skip; + xd->cryptoid = cryptoid; /* Decryption descriptor */ IPSEC_ASSERT(crde != NULL, ("null esp crypto descriptor")); crde->crd_skip = skip + hlen; crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); crde->crd_inject = skip + hlen - sav->ivlen; if (SAV_ISCTRORGCM(sav)) { ivp = &crde->crd_iv[0]; /* GCM IV Format: RFC4106 4 */ /* CTR IV Format: RFC3686 4 */ /* Salt is last four bytes of key, RFC4106 8.1 */ /* Nonce is last four bytes of key, RFC3686 5.1 */ memcpy(ivp, sav->key_enc->key_data + _KEYLEN(sav->key_enc) - 4, 4); if (SAV_ISCTR(sav)) { /* Initial block counter is 1, RFC3686 4 */ be32enc(&ivp[sav->ivlen + 4], 1); } m_copydata(m, skip + hlen - sav->ivlen, sav->ivlen, &ivp[4]); crde->crd_flags |= CRD_F_IV_EXPLICIT; } crde->crd_alg = espx->type; return (crypto_dispatch(crp)); } /* * ESP input callback from the crypto driver. */ static int esp_input_cb(struct cryptop *crp) { char buf[128]; u_int8_t lastthree[3], aalg[AH_HMAC_MAXHASHLEN]; - int hlen, skip, protoff, error, alen; + const struct auth_hash *esph; + const struct enc_xform *espx; struct mbuf *m; struct cryptodesc *crd; - struct auth_hash *esph; - struct enc_xform *espx; - struct tdb_crypto *tc; + struct xform_data *xd; struct secasvar *sav; struct secasindex *saidx; caddr_t ptr; + uint64_t cryptoid; + int hlen, skip, protoff, error, alen; crd = crp->crp_desc; IPSEC_ASSERT(crd != NULL, ("null crypto descriptor!")); - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); - skip = tc->tc_skip; - protoff = tc->tc_protoff; m = (struct mbuf *) crp->crp_buf; - - sav = tc->tc_sav; - IPSEC_ASSERT(sav != NULL, ("null SA!")); - + xd = (struct xform_data *) crp->crp_opaque; + sav = xd->sav; + skip = xd->skip; + protoff = xd->protoff; + cryptoid = xd->cryptoid; saidx = &sav->sah->saidx; - IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET || - saidx->dst.sa.sa_family == AF_INET6, - ("unexpected protocol family %u", saidx->dst.sa.sa_family)); - esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; /* Check for crypto errors */ if (crp->crp_etype) { - /* Reset the session ID */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - - if (crp->crp_etype == EAGAIN) + if (crp->crp_etype == EAGAIN) { + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; return (crypto_dispatch(crp)); - + } ESPSTAT_INC(esps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; goto bad; } /* Shouldn't happen... */ if (m == NULL) { ESPSTAT_INC(esps_crypto); DPRINTF(("%s: bogus returned buffer from crypto\n", __func__)); error = EINVAL; goto bad; } ESPSTAT_INC(esps_hist[sav->alg_enc]); /* If authentication was performed, check now. */ if (esph != NULL) { alen = xform_ah_authsize(esph); AHSTAT_INC(ahs_hist[sav->alg_auth]); /* Copy the authenticator from the packet */ m_copydata(m, m->m_pkthdr.len - alen, alen, aalg); - ptr = (caddr_t) (tc + 1); + ptr = (caddr_t) (xd + 1); /* Verify authenticator */ if (timingsafe_bcmp(ptr, aalg, alen) != 0) { DPRINTF(("%s: authentication hash mismatch for " "packet in SA %s/%08lx\n", __func__, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_badauth); error = EACCES; goto bad; } - + m->m_flags |= M_AUTHIPDGM; /* Remove trailing authenticator */ m_adj(m, -alen); } /* Release the crypto descriptors */ - free(tc, M_XDATA), tc = NULL; + free(xd, M_XDATA), xd = NULL; crypto_freereq(crp), crp = NULL; /* * Packet is now decrypted. */ m->m_flags |= M_DECRYPTED; /* * Update replay sequence number, if appropriate. */ if (sav->replay) { u_int32_t seq; m_copydata(m, skip + offsetof(struct newesp, esp_seq), sizeof (seq), (caddr_t) &seq); + SECASVAR_LOCK(sav); if (ipsec_updatereplay(ntohl(seq), sav)) { + SECASVAR_UNLOCK(sav); DPRINTF(("%s: packet replay check for %s\n", __func__, - ipsec_logsastr(sav, buf, sizeof(buf)))); + ipsec_sa2str(sav, buf, sizeof(buf)))); ESPSTAT_INC(esps_replay); - error = ENOBUFS; + error = EACCES; goto bad; } + SECASVAR_UNLOCK(sav); } /* Determine the ESP header length */ if (sav->flags & SADB_X_EXT_OLD) hlen = sizeof (struct esp) + sav->ivlen; else hlen = sizeof (struct newesp) + sav->ivlen; /* Remove the ESP header and IV from the mbuf. */ error = m_striphdr(m, skip, hlen); if (error) { ESPSTAT_INC(esps_hdrops); DPRINTF(("%s: bad mbuf chain, SA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); goto bad; } /* Save the last three bytes of decrypted data */ m_copydata(m, m->m_pkthdr.len - 3, 3, lastthree); /* Verify pad length */ if (lastthree[1] + 2 > m->m_pkthdr.len - skip) { ESPSTAT_INC(esps_badilen); DPRINTF(("%s: invalid padding length %d for %u byte packet " "in SA %s/%08lx\n", __func__, lastthree[1], m->m_pkthdr.len - skip, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); error = EINVAL; goto bad; } /* Verify correct decryption by checking the last padding bytes */ if ((sav->flags & SADB_X_EXT_PMASK) != SADB_X_EXT_PRAND) { if (lastthree[1] != lastthree[0] && lastthree[1] != 0) { ESPSTAT_INC(esps_badenc); DPRINTF(("%s: decryption failed for packet in " "SA %s/%08lx\n", __func__, ipsec_address( &sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); error = EINVAL; goto bad; } } /* Trim the mbuf chain to remove trailing authenticator and padding */ m_adj(m, -(lastthree[1] + 2)); /* Restore the Next Protocol field */ m_copyback(m, protoff, sizeof (u_int8_t), lastthree + 2); switch (saidx->dst.sa.sa_family) { #ifdef INET6 case AF_INET6: error = ipsec6_common_input_cb(m, sav, skip, protoff); break; #endif #ifdef INET case AF_INET: error = ipsec4_common_input_cb(m, sav, skip, protoff); break; #endif default: panic("%s: Unexpected address family: %d saidx=%p", __func__, saidx->dst.sa.sa_family, saidx); } - - KEY_FREESAV(&sav); return error; bad: - if (sav) - KEY_FREESAV(&sav); + if (sav != NULL) + key_freesav(&sav); if (m != NULL) m_freem(m); - if (tc != NULL) - free(tc, M_XDATA); + if (xd != NULL) + free(xd, M_XDATA); if (crp != NULL) crypto_freereq(crp); return error; } - /* - * ESP output routine, called by ipsec[46]_process_packet(). + * ESP output routine, called by ipsec[46]_perform_request(). */ static int -esp_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, - int skip, int protoff) +esp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx, int skip, int protoff) { - char buf[INET6_ADDRSTRLEN]; - struct enc_xform *espx; - struct auth_hash *esph; - uint8_t *ivp; - uint64_t cntr; - int hlen, rlen, padding, blks, alen, i, roff; - struct mbuf *mo = (struct mbuf *) NULL; - struct tdb_crypto *tc; - struct secasvar *sav; + char buf[IPSEC_ADDRSTRLEN]; + struct cryptodesc *crde = NULL, *crda = NULL; + struct cryptop *crp; + const struct auth_hash *esph; + const struct enc_xform *espx; + struct mbuf *mo = NULL; + struct xform_data *xd; struct secasindex *saidx; unsigned char *pad; - u_int8_t prot; + uint8_t *ivp; + uint64_t cntr, cryptoid; + int hlen, rlen, padding, blks, alen, i, roff; int error, maxpacketsize; + uint8_t prot; - struct cryptodesc *crde = NULL, *crda = NULL; - struct cryptop *crp; - - sav = isr->sav; IPSEC_ASSERT(sav != NULL, ("null SA")); esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; IPSEC_ASSERT(espx != NULL, ("null encoding xform")); if (sav->flags & SADB_X_EXT_OLD) hlen = sizeof (struct esp) + sav->ivlen; else hlen = sizeof (struct newesp) + sav->ivlen; rlen = m->m_pkthdr.len - skip; /* Raw payload length. */ /* * RFC4303 2.4 Requires 4 byte alignment. */ blks = MAX(4, espx->blocksize); /* Cipher blocksize */ /* XXX clamp padding length a la KAME??? */ padding = ((blks - ((rlen + 2) % blks)) % blks) + 2; alen = xform_ah_authsize(esph); ESPSTAT_INC(esps_output); saidx = &sav->sah->saidx; /* Check for maximum packet size violations. */ switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: maxpacketsize = IP_MAXPACKET; break; #endif /* INET */ #ifdef INET6 case AF_INET6: maxpacketsize = IPV6_MAXPACKET; break; #endif /* INET6 */ default: DPRINTF(("%s: unknown/unsupported protocol " "family %d, SA %s/%08lx\n", __func__, saidx->dst.sa.sa_family, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_nopf); error = EPFNOSUPPORT; goto bad; } + /* DPRINTF(("%s: skip %d hlen %d rlen %d padding %d alen %d blksd %d\n", - __func__, skip, hlen, rlen, padding, alen, blks)); + __func__, skip, hlen, rlen, padding, alen, blks)); */ if (skip + hlen + rlen + padding + alen > maxpacketsize) { DPRINTF(("%s: packet in SA %s/%08lx got too big " "(len %u, max len %u)\n", __func__, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi), skip + hlen + rlen + padding + alen, maxpacketsize)); ESPSTAT_INC(esps_toobig); error = EMSGSIZE; goto bad; } /* Update the counters. */ ESPSTAT_ADD(esps_obytes, m->m_pkthdr.len - skip); m = m_unshare(m, M_NOWAIT); if (m == NULL) { DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_hdrops); error = ENOBUFS; goto bad; } /* Inject ESP header. */ mo = m_makespace(m, skip, hlen, &roff); if (mo == NULL) { DPRINTF(("%s: %u byte ESP hdr inject failed for SA %s/%08lx\n", __func__, hlen, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); - ESPSTAT_INC(esps_hdrops); /* XXX diffs from openbsd */ + ESPSTAT_INC(esps_hdrops); /* XXX diffs from openbsd */ error = ENOBUFS; goto bad; } /* Initialize ESP header. */ - bcopy((caddr_t) &sav->spi, mtod(mo, caddr_t) + roff, sizeof(u_int32_t)); + bcopy((caddr_t) &sav->spi, mtod(mo, caddr_t) + roff, + sizeof(uint32_t)); + SECASVAR_LOCK(sav); if (sav->replay) { - u_int32_t replay; + uint32_t replay; - SECASVAR_LOCK(sav); #ifdef REGRESSION /* Emulate replay attack when ipsec_replay is TRUE. */ if (!V_ipsec_replay) #endif sav->replay->count++; replay = htonl(sav->replay->count); - SECASVAR_UNLOCK(sav); - bcopy((caddr_t) &replay, - mtod(mo, caddr_t) + roff + sizeof(u_int32_t), - sizeof(u_int32_t)); + + bcopy((caddr_t) &replay, mtod(mo, caddr_t) + roff + + sizeof(uint32_t), sizeof(uint32_t)); } + cryptoid = sav->tdb_cryptoid; + if (SAV_ISCTRORGCM(sav)) + cntr = sav->cntr++; + SECASVAR_UNLOCK(sav); /* * Add padding -- better to do it ourselves than use the crypto engine, * although if/when we support compression, we'd have to do that. */ pad = (u_char *) m_pad(m, padding + alen); if (pad == NULL) { DPRINTF(("%s: m_pad failed for SA %s/%08lx\n", __func__, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); m = NULL; /* NB: free'd by m_pad */ error = ENOBUFS; goto bad; } /* * Add padding: random, zero, or self-describing. * XXX catch unexpected setting */ switch (sav->flags & SADB_X_EXT_PMASK) { case SADB_X_EXT_PRAND: (void) read_random(pad, padding - 2); break; case SADB_X_EXT_PZERO: bzero(pad, padding - 2); break; case SADB_X_EXT_PSEQ: for (i = 0; i < padding - 2; i++) pad[i] = i+1; break; } /* Fix padding length and Next Protocol in padding itself. */ pad[padding - 2] = padding - 2; m_copydata(m, protoff, sizeof(u_int8_t), pad + padding - 1); /* Fix Next Protocol in IPv4/IPv6 header. */ prot = IPPROTO_ESP; m_copyback(m, protoff, sizeof(u_int8_t), (u_char *) &prot); /* Get crypto descriptors. */ crp = crypto_getreq(esph != NULL ? 2 : 1); if (crp == NULL) { DPRINTF(("%s: failed to acquire crypto descriptors\n", __func__)); ESPSTAT_INC(esps_crypto); error = ENOBUFS; goto bad; } /* IPsec-specific opaque crypto info. */ - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto), - M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { + xd = malloc(sizeof(struct xform_data), M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { crypto_freereq(crp); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); ESPSTAT_INC(esps_crypto); error = ENOBUFS; goto bad; } crde = crp->crp_desc; crda = crde->crd_next; /* Encryption descriptor. */ crde->crd_skip = skip + hlen; crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); crde->crd_flags = CRD_F_ENCRYPT; crde->crd_inject = skip + hlen - sav->ivlen; /* Encryption operation. */ crde->crd_alg = espx->type; if (SAV_ISCTRORGCM(sav)) { ivp = &crde->crd_iv[0]; /* GCM IV Format: RFC4106 4 */ /* CTR IV Format: RFC3686 4 */ /* Salt is last four bytes of key, RFC4106 8.1 */ /* Nonce is last four bytes of key, RFC3686 5.1 */ memcpy(ivp, sav->key_enc->key_data + _KEYLEN(sav->key_enc) - 4, 4); - SECASVAR_LOCK(sav); - cntr = sav->cntr++; - SECASVAR_UNLOCK(sav); be64enc(&ivp[4], cntr); - if (SAV_ISCTR(sav)) { /* Initial block counter is 1, RFC3686 4 */ + /* XXXAE: should we use this only for first packet? */ be32enc(&ivp[sav->ivlen + 4], 1); } m_copyback(m, skip + hlen - sav->ivlen, sav->ivlen, &ivp[4]); crde->crd_flags |= CRD_F_IV_EXPLICIT|CRD_F_IV_PRESENT; } /* Callback parameters */ - key_addref(isr->sp); - tc->tc_isr = isr; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; - tc->tc_spi = sav->spi; - tc->tc_dst = saidx->dst; - tc->tc_proto = saidx->proto; + xd->sp = sp; + xd->sav = sav; + xd->idx = idx; + xd->cryptoid = cryptoid; /* Crypto operation descriptor. */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = esp_output_cb; - crp->crp_opaque = (caddr_t) tc; - crp->crp_sid = sav->tdb_cryptoid; + crp->crp_opaque = (caddr_t) xd; + crp->crp_sid = cryptoid; if (esph) { /* Authentication descriptor. */ crda->crd_alg = esph->type; crda->crd_skip = skip; if (SAV_ISGCM(sav)) crda->crd_len = 8; /* RFC4106 5, SPI + SN */ else crda->crd_len = m->m_pkthdr.len - (skip + alen); crda->crd_inject = m->m_pkthdr.len - alen; } return crypto_dispatch(crp); bad: if (m) m_freem(m); return (error); } - /* * ESP output callback from the crypto driver. */ static int esp_output_cb(struct cryptop *crp) { - char buf[INET6_ADDRSTRLEN]; - struct tdb_crypto *tc; - struct ipsecrequest *isr; + struct xform_data *xd; + struct secpolicy *sp; struct secasvar *sav; struct mbuf *m; + uint64_t cryptoid; + u_int idx; int error; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque data area!")); + xd = (struct xform_data *) crp->crp_opaque; m = (struct mbuf *) crp->crp_buf; - - isr = tc->tc_isr; - IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); - IPSECREQUEST_LOCK(isr); - sav = tc->tc_sav; - - /* With the isr lock released, SA pointer may have changed. */ - if (sav != isr->sav) { - ESPSTAT_INC(esps_notdb); - DPRINTF(("%s: SA gone during crypto (SA %s/%08lx proto %u)\n", - __func__, ipsec_address(&tc->tc_dst, buf, sizeof(buf)), - (u_long) ntohl(tc->tc_spi), tc->tc_proto)); - error = ENOBUFS; /*XXX*/ - goto bad; - } + sp = xd->sp; + sav = xd->sav; + idx = xd->idx; + cryptoid = xd->cryptoid; /* Check for crypto errors. */ if (crp->crp_etype) { - /* Reset session ID. */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - IPSECREQUEST_UNLOCK(isr); + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; return (crypto_dispatch(crp)); } - ESPSTAT_INC(esps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; + m_freem(m); goto bad; } /* Shouldn't happen... */ if (m == NULL) { ESPSTAT_INC(esps_crypto); DPRINTF(("%s: bogus returned buffer from crypto\n", __func__)); error = EINVAL; goto bad; } + free(xd, M_XDATA); + crypto_freereq(crp); ESPSTAT_INC(esps_hist[sav->alg_enc]); if (sav->tdb_authalgxform != NULL) AHSTAT_INC(ahs_hist[sav->alg_auth]); - /* Release crypto descriptors. */ - free(tc, M_XDATA); - crypto_freereq(crp); - #ifdef REGRESSION /* Emulate man-in-the-middle attack when ipsec_integrity is TRUE. */ if (V_ipsec_integrity) { static unsigned char ipseczeroes[AH_HMAC_MAXHASHLEN]; - struct auth_hash *esph; + const struct auth_hash *esph; /* * Corrupt HMAC if we want to test integrity verification of * the other side. */ esph = sav->tdb_authalgxform; if (esph != NULL) { int alen; alen = xform_ah_authsize(esph); m_copyback(m, m->m_pkthdr.len - alen, alen, ipseczeroes); } } #endif /* NB: m is reclaimed by ipsec_process_done. */ - error = ipsec_process_done(m, isr); - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); + error = ipsec_process_done(m, sp, sav, idx); return (error); bad: - if (sav) - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); - if (m) - m_freem(m); - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); + key_freesav(&sav); + key_freesp(&sp); return (error); } static struct xformsw esp_xformsw = { - XF_ESP, XFT_CONF|XFT_AUTH, "IPsec ESP", - esp_init, esp_zeroize, esp_input, - esp_output + .xf_type = XF_ESP, + .xf_name = "IPsec ESP", + .xf_init = esp_init, + .xf_zeroize = esp_zeroize, + .xf_input = esp_input, + .xf_output = esp_output, }; -static void -esp_attach(void) -{ - - xform_register(&esp_xformsw); -} -SYSINIT(esp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, esp_attach, NULL); +SYSINIT(esp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_attach, &esp_xformsw); +SYSUNINIT(esp_xform_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_detach, &esp_xformsw); diff --git a/sys/netipsec/xform_ipcomp.c b/sys/netipsec/xform_ipcomp.c index 066f048ba40d..eb68f2aa444e 100644 --- a/sys/netipsec/xform_ipcomp.c +++ b/sys/netipsec/xform_ipcomp.c @@ -1,787 +1,772 @@ /* $FreeBSD$ */ /* $OpenBSD: ip_ipcomp.c,v 1.1 2001/07/05 12:08:52 jjbg Exp $ */ /*- * Copyright (c) 2001 Jean-Jacques Bernard-Gundol (jj@wabbitt.org) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* IP payload compression protocol (IPComp), see RFC 2393 */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #endif #include #include #include +#include #include #include #include VNET_DEFINE(int, ipcomp_enable) = 1; VNET_PCPUSTAT_DEFINE(struct ipcompstat, ipcompstat); VNET_PCPUSTAT_SYSINIT(ipcompstat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(ipcompstat); #endif /* VIMAGE */ SYSCTL_DECL(_net_inet_ipcomp); SYSCTL_INT(_net_inet_ipcomp, OID_AUTO, ipcomp_enable, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipcomp_enable), 0, ""); SYSCTL_VNET_PCPUSTAT(_net_inet_ipcomp, IPSECCTL_STATS, stats, struct ipcompstat, ipcompstat, "IPCOMP statistics (struct ipcompstat, netipsec/ipcomp_var.h"); static int ipcomp_input_cb(struct cryptop *crp); static int ipcomp_output_cb(struct cryptop *crp); -struct comp_algo * -ipcomp_algorithm_lookup(int alg) -{ - if (alg >= IPCOMP_ALG_MAX) - return NULL; - switch (alg) { - case SADB_X_CALG_DEFLATE: - return &comp_algo_deflate; - } - return NULL; -} - /* * RFC 3173 p 2.2. Non-Expansion Policy: * If the total size of a compressed payload and the IPComp header, as * defined in section 3, is not smaller than the size of the original * payload, the IP datagram MUST be sent in the original non-compressed * form. * * When we use IPComp in tunnel mode, for small packets we will receive * encapsulated IP-IP datagrams without any compression and without IPComp * header. */ static int ipcomp_encapcheck(union sockaddr_union *src, union sockaddr_union *dst) { struct secasvar *sav; - sav = KEY_ALLOCSA_TUNNEL(src, dst, IPPROTO_IPCOMP); + sav = key_allocsa_tunnel(src, dst, IPPROTO_IPCOMP); if (sav == NULL) return (0); - KEY_FREESAV(&sav); + key_freesav(&sav); if (src->sa.sa_family == AF_INET) return (sizeof(struct in_addr) << 4); else return (sizeof(struct in6_addr) << 4); } static int ipcomp_nonexp_input(struct mbuf **mp, int *offp, int proto) { int isr; switch (proto) { #ifdef INET case IPPROTO_IPV4: isr = NETISR_IP; break; #endif #ifdef INET6 case IPPROTO_IPV6: isr = NETISR_IPV6; break; #endif default: IPCOMPSTAT_INC(ipcomps_nopf); m_freem(*mp); return (IPPROTO_DONE); } m_adj(*mp, *offp); IPCOMPSTAT_ADD(ipcomps_ibytes, (*mp)->m_pkthdr.len); IPCOMPSTAT_INC(ipcomps_input); netisr_dispatch(isr, *mp); return (IPPROTO_DONE); } /* * ipcomp_init() is called when an CPI is being set up. */ static int ipcomp_init(struct secasvar *sav, struct xformsw *xsp) { - struct comp_algo *tcomp; + const struct comp_algo *tcomp; struct cryptoini cric; /* NB: algorithm really comes in alg_enc and not alg_comp! */ - tcomp = ipcomp_algorithm_lookup(sav->alg_enc); + tcomp = comp_algorithm_lookup(sav->alg_enc); if (tcomp == NULL) { DPRINTF(("%s: unsupported compression algorithm %d\n", __func__, sav->alg_comp)); return EINVAL; } sav->alg_comp = sav->alg_enc; /* set for doing histogram */ sav->tdb_xform = xsp; sav->tdb_compalgxform = tcomp; /* Initialize crypto session */ bzero(&cric, sizeof (cric)); cric.cri_alg = sav->tdb_compalgxform->type; return crypto_newsession(&sav->tdb_cryptoid, &cric, V_crypto_support); } /* * ipcomp_zeroize() used when IPCA is deleted */ static int ipcomp_zeroize(struct secasvar *sav) { int err; err = crypto_freesession(sav->tdb_cryptoid); sav->tdb_cryptoid = 0; return err; } /* * ipcomp_input() gets called to uncompress an input packet */ static int ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { - struct tdb_crypto *tc; + struct xform_data *xd; struct cryptodesc *crdc; struct cryptop *crp; struct ipcomp *ipcomp; caddr_t addr; int hlen = IPCOMP_HLENGTH; /* * Check that the next header of the IPComp is not IPComp again, before * doing any real work. Given it is not possible to do double * compression it means someone is playing tricks on us. */ if (m->m_len < skip + hlen && (m = m_pullup(m, skip + hlen)) == NULL) { IPCOMPSTAT_INC(ipcomps_hdrops); /*XXX*/ DPRINTF(("%s: m_pullup failed\n", __func__)); return (ENOBUFS); } addr = (caddr_t) mtod(m, struct ip *) + skip; ipcomp = (struct ipcomp *)addr; if (ipcomp->comp_nxt == IPPROTO_IPCOMP) { m_freem(m); IPCOMPSTAT_INC(ipcomps_pdrops); /* XXX have our own stats? */ DPRINTF(("%s: recursive compression detected\n", __func__)); return (EINVAL); } /* Get crypto descriptors */ crp = crypto_getreq(1); if (crp == NULL) { m_freem(m); DPRINTF(("%s: no crypto descriptors\n", __func__)); IPCOMPSTAT_INC(ipcomps_crypto); return ENOBUFS; } /* Get IPsec-specific opaque pointer */ - tc = (struct tdb_crypto *) malloc(sizeof (*tc), M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { - m_freem(m); - crypto_freereq(crp); - DPRINTF(("%s: cannot allocate tdb_crypto\n", __func__)); + xd = malloc(sizeof(*xd), M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { + DPRINTF(("%s: cannot allocate xform_data\n", __func__)); IPCOMPSTAT_INC(ipcomps_crypto); + crypto_freereq(crp); + m_freem(m); return ENOBUFS; } crdc = crp->crp_desc; crdc->crd_skip = skip + hlen; crdc->crd_len = m->m_pkthdr.len - (skip + hlen); crdc->crd_inject = skip; - tc->tc_ptr = 0; - /* Decompression operation */ crdc->crd_alg = sav->tdb_compalgxform->type; + /* Crypto operation descriptor */ crp->crp_ilen = m->m_pkthdr.len - (skip + hlen); crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ipcomp_input_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback */ - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_protoff = protoff; - tc->tc_skip = skip; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; + xd->sav = sav; + xd->protoff = protoff; + xd->skip = skip; + + SECASVAR_LOCK(sav); + crp->crp_sid = xd->cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); return crypto_dispatch(crp); } /* * IPComp input callback from the crypto driver. */ static int ipcomp_input_cb(struct cryptop *crp) { - char buf[INET6_ADDRSTRLEN]; + char buf[IPSEC_ADDRSTRLEN]; struct cryptodesc *crd; - struct tdb_crypto *tc; - int skip, protoff; + struct xform_data *xd; struct mbuf *m; struct secasvar *sav; struct secasindex *saidx; - int hlen = IPCOMP_HLENGTH, error, clen; - u_int8_t nproto; caddr_t addr; + uint64_t cryptoid; + int hlen = IPCOMP_HLENGTH, error, clen; + int skip, protoff; + uint8_t nproto; crd = crp->crp_desc; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); - skip = tc->tc_skip; - protoff = tc->tc_protoff; m = (struct mbuf *) crp->crp_buf; - - sav = tc->tc_sav; - IPSEC_ASSERT(sav != NULL, ("null SA!")); - + xd = (struct xform_data *) crp->crp_opaque; + sav = xd->sav; + skip = xd->skip; + protoff = xd->protoff; + cryptoid = xd->cryptoid; saidx = &sav->sah->saidx; IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET || saidx->dst.sa.sa_family == AF_INET6, ("unexpected protocol family %u", saidx->dst.sa.sa_family)); /* Check for crypto errors */ if (crp->crp_etype) { - /* Reset the session ID */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - return crypto_dispatch(crp); + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; + return (crypto_dispatch(crp)); } IPCOMPSTAT_INC(ipcomps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; goto bad; } /* Shouldn't happen... */ if (m == NULL) { IPCOMPSTAT_INC(ipcomps_crypto); DPRINTF(("%s: null mbuf returned from crypto\n", __func__)); error = EINVAL; goto bad; } IPCOMPSTAT_INC(ipcomps_hist[sav->alg_comp]); clen = crp->crp_olen; /* Length of data after processing */ /* Release the crypto descriptors */ - free(tc, M_XDATA), tc = NULL; + free(xd, M_XDATA), xd = NULL; crypto_freereq(crp), crp = NULL; /* In case it's not done already, adjust the size of the mbuf chain */ m->m_pkthdr.len = clen + hlen + skip; if (m->m_len < skip + hlen && (m = m_pullup(m, skip + hlen)) == NULL) { IPCOMPSTAT_INC(ipcomps_hdrops); /*XXX*/ DPRINTF(("%s: m_pullup failed\n", __func__)); error = EINVAL; /*XXX*/ goto bad; } /* Keep the next protocol field */ addr = (caddr_t) mtod(m, struct ip *) + skip; nproto = ((struct ipcomp *) addr)->comp_nxt; /* Remove the IPCOMP header */ error = m_striphdr(m, skip, hlen); if (error) { IPCOMPSTAT_INC(ipcomps_hdrops); DPRINTF(("%s: bad mbuf chain, IPCA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); goto bad; } /* Restore the Next Protocol field */ m_copyback(m, protoff, sizeof (u_int8_t), (u_int8_t *) &nproto); switch (saidx->dst.sa.sa_family) { #ifdef INET6 case AF_INET6: error = ipsec6_common_input_cb(m, sav, skip, protoff); break; #endif #ifdef INET case AF_INET: error = ipsec4_common_input_cb(m, sav, skip, protoff); break; #endif default: panic("%s: Unexpected address family: %d saidx=%p", __func__, saidx->dst.sa.sa_family, saidx); } - - KEY_FREESAV(&sav); return error; bad: - if (sav) - KEY_FREESAV(&sav); - if (m) + if (sav != NULL) + key_freesav(&sav); + if (m != NULL) m_freem(m); - if (tc != NULL) - free(tc, M_XDATA); - if (crp) + if (xd != NULL) + free(xd, M_XDATA); + if (crp != NULL) crypto_freereq(crp); return error; } /* - * IPComp output routine, called by ipsec[46]_process_packet() + * IPComp output routine, called by ipsec[46]_perform_request() */ static int -ipcomp_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, - int skip, int protoff) +ipcomp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx, int skip, int protoff) { - char buf[INET6_ADDRSTRLEN]; - struct secasvar *sav; - struct comp_algo *ipcompx; - int error, ralen, maxpacketsize; + char buf[IPSEC_ADDRSTRLEN]; + const struct comp_algo *ipcompx; struct cryptodesc *crdc; struct cryptop *crp; - struct tdb_crypto *tc; + struct xform_data *xd; + int error, ralen, maxpacketsize; - sav = isr->sav; IPSEC_ASSERT(sav != NULL, ("null SA")); ipcompx = sav->tdb_compalgxform; IPSEC_ASSERT(ipcompx != NULL, ("null compression xform")); /* * Do not touch the packet in case our payload to compress * is lower than the minimal threshold of the compression * alogrithm. We will just send out the data uncompressed. * See RFC 3173, 2.2. Non-Expansion Policy. */ if (m->m_pkthdr.len <= ipcompx->minlen) { IPCOMPSTAT_INC(ipcomps_threshold); - return ipsec_process_done(m, isr); + return ipsec_process_done(m, sp, sav, idx); } ralen = m->m_pkthdr.len - skip; /* Raw payload length before comp. */ IPCOMPSTAT_INC(ipcomps_output); /* Check for maximum packet size violations. */ switch (sav->sah->saidx.dst.sa.sa_family) { #ifdef INET case AF_INET: maxpacketsize = IP_MAXPACKET; break; #endif /* INET */ #ifdef INET6 case AF_INET6: maxpacketsize = IPV6_MAXPACKET; break; #endif /* INET6 */ default: IPCOMPSTAT_INC(ipcomps_nopf); DPRINTF(("%s: unknown/unsupported protocol family %d, " "IPCA %s/%08lx\n", __func__, sav->sah->saidx.dst.sa.sa_family, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); error = EPFNOSUPPORT; goto bad; } if (ralen + skip + IPCOMP_HLENGTH > maxpacketsize) { IPCOMPSTAT_INC(ipcomps_toobig); DPRINTF(("%s: packet in IPCA %s/%08lx got too big " "(len %u, max len %u)\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi), ralen + skip + IPCOMP_HLENGTH, maxpacketsize)); error = EMSGSIZE; goto bad; } /* Update the counters */ IPCOMPSTAT_ADD(ipcomps_obytes, m->m_pkthdr.len - skip); m = m_unshare(m, M_NOWAIT); if (m == NULL) { IPCOMPSTAT_INC(ipcomps_hdrops); DPRINTF(("%s: cannot clone mbuf chain, IPCA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); error = ENOBUFS; goto bad; } /* Ok now, we can pass to the crypto processing. */ /* Get crypto descriptors */ crp = crypto_getreq(1); if (crp == NULL) { IPCOMPSTAT_INC(ipcomps_crypto); DPRINTF(("%s: failed to acquire crypto descriptor\n",__func__)); error = ENOBUFS; goto bad; } crdc = crp->crp_desc; /* Compression descriptor */ crdc->crd_skip = skip; crdc->crd_len = ralen; crdc->crd_flags = CRD_F_COMP; crdc->crd_inject = skip; /* Compression operation */ crdc->crd_alg = ipcompx->type; /* IPsec-specific opaque crypto info */ - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto), - M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { + xd = malloc(sizeof(struct xform_data), M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { IPCOMPSTAT_INC(ipcomps_crypto); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); crypto_freereq(crp); error = ENOBUFS; goto bad; } - key_addref(isr->sp); - tc->tc_isr = isr; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_protoff = protoff; - tc->tc_skip = skip; + xd->sp = sp; + xd->sav = sav; + xd->idx = idx; + xd->skip = skip; + xd->protoff = protoff; /* Crypto operation descriptor */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ipcomp_output_cb; - crp->crp_opaque = (caddr_t) tc; - crp->crp_sid = sav->tdb_cryptoid; + crp->crp_opaque = (caddr_t) xd; + + SECASVAR_LOCK(sav); + crp->crp_sid = xd->cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); return crypto_dispatch(crp); bad: if (m) m_freem(m); return (error); } /* * IPComp output callback from the crypto driver. */ static int ipcomp_output_cb(struct cryptop *crp) { - char buf[INET6_ADDRSTRLEN]; - struct tdb_crypto *tc; - struct ipsecrequest *isr; + char buf[IPSEC_ADDRSTRLEN]; + struct xform_data *xd; + struct secpolicy *sp; struct secasvar *sav; struct mbuf *m; - int error, skip; + uint64_t cryptoid; + u_int idx; + int error, skip, protoff; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque data area!")); m = (struct mbuf *) crp->crp_buf; - skip = tc->tc_skip; - - isr = tc->tc_isr; - IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); - IPSECREQUEST_LOCK(isr); - sav = tc->tc_sav; - /* With the isr lock released SA pointer can be updated. */ - if (sav != isr->sav) { - IPCOMPSTAT_INC(ipcomps_notdb); - DPRINTF(("%s: SA expired while in crypto\n", __func__)); - error = ENOBUFS; /*XXX*/ - goto bad; - } + xd = (struct xform_data *) crp->crp_opaque; + idx = xd->idx; + sp = xd->sp; + sav = xd->sav; + skip = xd->skip; + protoff = xd->protoff; + cryptoid = xd->cryptoid; /* Check for crypto errors */ if (crp->crp_etype) { - /* Reset the session ID */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - IPSECREQUEST_UNLOCK(isr); - return crypto_dispatch(crp); + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; + return (crypto_dispatch(crp)); } IPCOMPSTAT_INC(ipcomps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; goto bad; } /* Shouldn't happen... */ if (m == NULL) { IPCOMPSTAT_INC(ipcomps_crypto); DPRINTF(("%s: bogus return buffer from crypto\n", __func__)); error = EINVAL; goto bad; } IPCOMPSTAT_INC(ipcomps_hist[sav->alg_comp]); if (crp->crp_ilen - skip > crp->crp_olen) { struct mbuf *mo; struct ipcomp *ipcomp; int roff; uint8_t prot; /* Compression helped, inject IPCOMP header. */ mo = m_makespace(m, skip, IPCOMP_HLENGTH, &roff); if (mo == NULL) { IPCOMPSTAT_INC(ipcomps_wrap); - DPRINTF(("%s: IPCOMP header inject failed for IPCA %s/%08lx\n", + DPRINTF(("%s: IPCOMP header inject failed " + "for IPCA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); error = ENOBUFS; goto bad; } ipcomp = (struct ipcomp *)(mtod(mo, caddr_t) + roff); /* Initialize the IPCOMP header */ /* XXX alignment always correct? */ switch (sav->sah->saidx.dst.sa.sa_family) { #ifdef INET case AF_INET: ipcomp->comp_nxt = mtod(m, struct ip *)->ip_p; break; #endif /* INET */ #ifdef INET6 case AF_INET6: ipcomp->comp_nxt = mtod(m, struct ip6_hdr *)->ip6_nxt; break; #endif } ipcomp->comp_flags = 0; ipcomp->comp_cpi = htons((u_int16_t) ntohl(sav->spi)); /* Fix Next Protocol in IPv4/IPv6 header */ prot = IPPROTO_IPCOMP; - m_copyback(m, tc->tc_protoff, sizeof(u_int8_t), + m_copyback(m, protoff, sizeof(u_int8_t), (u_char *)&prot); /* Adjust the length in the IP header */ switch (sav->sah->saidx.dst.sa.sa_family) { #ifdef INET case AF_INET: mtod(m, struct ip *)->ip_len = htons(m->m_pkthdr.len); break; #endif /* INET */ #ifdef INET6 case AF_INET6: mtod(m, struct ip6_hdr *)->ip6_plen = htons(m->m_pkthdr.len) - sizeof(struct ip6_hdr); break; #endif /* INET6 */ default: IPCOMPSTAT_INC(ipcomps_nopf); DPRINTF(("%s: unknown/unsupported protocol " "family %d, IPCA %s/%08lx\n", __func__, sav->sah->saidx.dst.sa.sa_family, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); error = EPFNOSUPPORT; goto bad; } } else { /* Compression was useless, we have lost time. */ IPCOMPSTAT_INC(ipcomps_uncompr); DPRINTF(("%s: compressions was useless %d - %d <= %d\n", __func__, crp->crp_ilen, skip, crp->crp_olen)); /* XXX remember state to not compress the next couple * of packets, RFC 3173, 2.2. Non-Expansion Policy */ } /* Release the crypto descriptor */ - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); /* NB: m is reclaimed by ipsec_process_done. */ - error = ipsec_process_done(m, isr); - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); + error = ipsec_process_done(m, sp, sav, idx); return (error); bad: - if (sav) - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); if (m) m_freem(m); - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); + key_freesav(&sav); + key_freesp(&sp); return (error); } -static struct xformsw ipcomp_xformsw = { - XF_IPCOMP, XFT_COMP, "IPcomp", - ipcomp_init, ipcomp_zeroize, ipcomp_input, - ipcomp_output -}; - #ifdef INET static const struct encaptab *ipe4_cookie = NULL; extern struct domain inetdomain; static struct protosw ipcomp4_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = 0 /* IPPROTO_IPV[46] */, .pr_flags = PR_ATOMIC | PR_ADDR | PR_LASTHDR, .pr_input = ipcomp_nonexp_input, .pr_output = rip_output, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }; static int ipcomp4_nonexp_encapcheck(const struct mbuf *m, int off, int proto, void *arg __unused) { union sockaddr_union src, dst; const struct ip *ip; if (V_ipcomp_enable == 0) return (0); if (proto != IPPROTO_IPV4 && proto != IPPROTO_IPV6) return (0); bzero(&src, sizeof(src)); bzero(&dst, sizeof(dst)); src.sa.sa_family = dst.sa.sa_family = AF_INET; src.sin.sin_len = dst.sin.sin_len = sizeof(struct sockaddr_in); ip = mtod(m, const struct ip *); src.sin.sin_addr = ip->ip_src; dst.sin.sin_addr = ip->ip_dst; return (ipcomp_encapcheck(&src, &dst)); } #endif #ifdef INET6 static const struct encaptab *ipe6_cookie = NULL; extern struct domain inet6domain; static struct protosw ipcomp6_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = 0 /* IPPROTO_IPV[46] */, .pr_flags = PR_ATOMIC | PR_ADDR | PR_LASTHDR, .pr_input = ipcomp_nonexp_input, .pr_output = rip6_output, .pr_ctloutput = rip6_ctloutput, .pr_usrreqs = &rip6_usrreqs }; static int ipcomp6_nonexp_encapcheck(const struct mbuf *m, int off, int proto, void *arg __unused) { union sockaddr_union src, dst; const struct ip6_hdr *ip6; if (V_ipcomp_enable == 0) return (0); if (proto != IPPROTO_IPV4 && proto != IPPROTO_IPV6) return (0); bzero(&src, sizeof(src)); bzero(&dst, sizeof(dst)); src.sa.sa_family = dst.sa.sa_family = AF_INET; src.sin6.sin6_len = dst.sin6.sin6_len = sizeof(struct sockaddr_in6); ip6 = mtod(m, const struct ip6_hdr *); src.sin6.sin6_addr = ip6->ip6_src; dst.sin6.sin6_addr = ip6->ip6_dst; if (IN6_IS_SCOPE_LINKLOCAL(&src.sin6.sin6_addr)) { /* XXX: sa6_recoverscope() */ src.sin6.sin6_scope_id = ntohs(src.sin6.sin6_addr.s6_addr16[1]); src.sin6.sin6_addr.s6_addr16[1] = 0; } if (IN6_IS_SCOPE_LINKLOCAL(&dst.sin6.sin6_addr)) { /* XXX: sa6_recoverscope() */ dst.sin6.sin6_scope_id = ntohs(dst.sin6.sin6_addr.s6_addr16[1]); dst.sin6.sin6_addr.s6_addr16[1] = 0; } return (ipcomp_encapcheck(&src, &dst)); } #endif +static struct xformsw ipcomp_xformsw = { + .xf_type = XF_IPCOMP, + .xf_name = "IPcomp", + .xf_init = ipcomp_init, + .xf_zeroize = ipcomp_zeroize, + .xf_input = ipcomp_input, + .xf_output = ipcomp_output, +}; + static void ipcomp_attach(void) { #ifdef INET ipe4_cookie = encap_attach_func(AF_INET, -1, ipcomp4_nonexp_encapcheck, &ipcomp4_protosw, NULL); #endif #ifdef INET6 ipe6_cookie = encap_attach_func(AF_INET6, -1, ipcomp6_nonexp_encapcheck, &ipcomp6_protosw, NULL); #endif - xform_register(&ipcomp_xformsw); + xform_attach(&ipcomp_xformsw); +} + +static void +ipcomp_detach(void) +{ + +#ifdef INET + encap_detach(ipe4_cookie); +#endif +#ifdef INET6 + encap_detach(ipe6_cookie); +#endif + xform_detach(&ipcomp_xformsw); } SYSINIT(ipcomp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ipcomp_attach, NULL); +SYSUNINIT(ipcomp_xform_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + ipcomp_detach, NULL); diff --git a/sys/netipsec/xform_tcp.c b/sys/netipsec/xform_tcp.c index 6a55ed27f050..96019ce3dd31 100644 --- a/sys/netipsec/xform_tcp.c +++ b/sys/netipsec/xform_tcp.c @@ -1,170 +1,435 @@ -/* $FreeBSD$ */ - /*- * Copyright (c) 2003 Bruce M. Simpson + * Copyright (c) 2016 Andrey V. Elsukov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* TCP MD5 Signature Option (RFC2385) */ +#include +__FBSDID("$FreeBSD$"); + #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_ipsec.h" #include #include #include #include +#include +#include #include +#include #include +#include #include -#include #include +#include #include #include #include #include #include #include #include +#include #include #ifdef INET6 #include #include #endif #include #include +#define TCP_SIGLEN 16 /* length of computed digest in bytes */ +#define TCP_KEYLEN_MIN 1 /* minimum length of TCP-MD5 key */ +#define TCP_KEYLEN_MAX 80 /* maximum length of TCP-MD5 key */ + +static inline void +tcp_fields_to_net(struct tcphdr *th) +{ + + th->th_seq = htonl(th->th_seq); + th->th_ack = htonl(th->th_ack); + th->th_win = htons(th->th_win); + th->th_urp = htons(th->th_urp); +} + +static int +tcp_ipsec_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + struct tcpcb *tp; + int error, optval; + + INP_WLOCK_ASSERT(inp); + if (sopt->sopt_name != TCP_MD5SIG) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + + tp = intotcpcb(inp); + if (sopt->sopt_dir == SOPT_GET) { + optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; + INP_WUNLOCK(inp); + + /* On success return with released INP_WLOCK */ + return (sooptcopyout(sopt, &optval, sizeof(optval))); + } + + INP_WUNLOCK(inp); + + error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); + if (error != 0) + return (error); + + /* INP_WLOCK_RECHECK */ + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_WUNLOCK(inp); + return (ECONNRESET); + } + if (optval > 0) + tp->t_flags |= TF_SIGNATURE; + else + tp->t_flags &= ~TF_SIGNATURE; + + /* On success return with acquired INP_WLOCK */ + return (error); +} + +/* + * Callback function invoked by m_apply() to digest TCP segment data + * contained within an mbuf chain. + */ +static int +tcp_signature_apply(void *fstate, void *data, u_int len) +{ + + MD5Update(fstate, (u_char *)data, len); + return (0); +} + +#ifdef INET +static int +ip_pseudo_compute(struct mbuf *m, MD5_CTX *ctx) +{ + struct ippseudo ipp; + struct ip *ip; + + ip = mtod(m, struct ip *); + ipp.ippseudo_src.s_addr = ip->ip_src.s_addr; + ipp.ippseudo_dst.s_addr = ip->ip_dst.s_addr; + ipp.ippseudo_p = IPPROTO_TCP; + ipp.ippseudo_pad = 0; + ipp.ippseudo_len = htons(m->m_pkthdr.len - (ip->ip_hl << 2)); + MD5Update(ctx, (char *)&ipp, sizeof(ipp)); + return (ip->ip_hl << 2); +} +#endif + +#ifdef INET6 +static int +ip6_pseudo_compute(struct mbuf *m, MD5_CTX *ctx) +{ + struct ip6_pseudo { + struct in6_addr src, dst; + uint32_t len; + uint32_t nxt; + } ip6p __aligned(4); + struct ip6_hdr *ip6; + + ip6 = mtod(m, struct ip6_hdr *); + ip6p.src = ip6->ip6_src; + ip6p.dst = ip6->ip6_dst; + ip6p.len = htonl(m->m_pkthdr.len - sizeof(*ip6)); /* XXX: ext headers */ + ip6p.nxt = htonl(IPPROTO_TCP); + MD5Update(ctx, (char *)&ip6p, sizeof(ip6p)); + return (sizeof(*ip6)); +} +#endif + +static int +tcp_signature_compute(struct mbuf *m, struct tcphdr *th, + struct secasvar *sav, u_char *buf) +{ + MD5_CTX ctx; + int len; + u_short csum; + + MD5Init(&ctx); + /* Step 1: Update MD5 hash with IP(v6) pseudo-header. */ + switch (sav->sah->saidx.dst.sa.sa_family) { +#ifdef INET + case AF_INET: + len = ip_pseudo_compute(m, &ctx); + break; +#endif +#ifdef INET6 + case AF_INET6: + len = ip6_pseudo_compute(m, &ctx); + break; +#endif + default: + return (EAFNOSUPPORT); + } + /* + * Step 2: Update MD5 hash with TCP header, excluding options. + * The TCP checksum must be set to zero. + */ + csum = th->th_sum; + th->th_sum = 0; + MD5Update(&ctx, (char *)th, sizeof(struct tcphdr)); + th->th_sum = csum; + /* + * Step 3: Update MD5 hash with TCP segment data. + * Use m_apply() to avoid an early m_pullup(). + */ + len += (th->th_off << 2); + if (m->m_pkthdr.len - len > 0) + m_apply(m, len, m->m_pkthdr.len - len, + tcp_signature_apply, &ctx); + /* + * Step 4: Update MD5 hash with shared secret. + */ + MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth)); + MD5Final(buf, &ctx); + key_sa_recordxfer(sav, m); + return (0); +} + +static void +setsockaddrs(const struct mbuf *m, union sockaddr_union *src, + union sockaddr_union *dst) +{ + struct ip *ip; + + IPSEC_ASSERT(m->m_len >= sizeof(*ip), ("unexpected mbuf len")); + + ip = mtod(m, struct ip *); + switch (ip->ip_v) { +#ifdef INET + case IPVERSION: + ipsec4_setsockaddrs(m, src, dst); + break; +#endif +#ifdef INET6 + case (IPV6_VERSION >> 4): + ipsec6_setsockaddrs(m, src, dst); + break; +#endif + default: + bzero(src, sizeof(*src)); + bzero(dst, sizeof(*dst)); + } +} + +/* + * Compute TCP-MD5 hash of an *INBOUND* TCP segment. + * Parameters: + * m pointer to head of mbuf chain + * th pointer to TCP header + * buf pointer to storage for computed MD5 digest + * + * Return 0 if successful, otherwise return -1. + */ +static int +tcp_ipsec_input(struct mbuf *m, struct tcphdr *th, u_char *buf) +{ + char tmpdigest[TCP_SIGLEN]; + struct secasindex saidx; + struct secasvar *sav; + + setsockaddrs(m, &saidx.src, &saidx.dst); + saidx.proto = IPPROTO_TCP; + saidx.mode = IPSEC_MODE_TCPMD5; + saidx.reqid = 0; + sav = key_allocsa_tcpmd5(&saidx); + if (sav == NULL) { + KMOD_TCPSTAT_INC(tcps_sig_err_buildsig); + return (EACCES); + } + /* + * tcp_input() operates with TCP header fields in host + * byte order. We expect them in network byte order. + */ + tcp_fields_to_net(th); + tcp_signature_compute(m, th, sav, tmpdigest); + tcp_fields_to_host(th); + key_freesav(&sav); + if (bcmp(buf, tmpdigest, TCP_SIGLEN) != 0) { + KMOD_TCPSTAT_INC(tcps_sig_rcvbadsig); + return (EACCES); + } + KMOD_TCPSTAT_INC(tcps_sig_rcvgoodsig); + return (0); +} + +/* + * Compute TCP-MD5 hash of an *OUTBOUND* TCP segment. + * Parameters: + * m pointer to head of mbuf chain + * th pointer to TCP header + * buf pointer to storage for computed MD5 digest + * + * Return 0 if successful, otherwise return error code. + */ +static int +tcp_ipsec_output(struct mbuf *m, struct tcphdr *th, u_char *buf) +{ + struct secasindex saidx; + struct secasvar *sav; + + setsockaddrs(m, &saidx.src, &saidx.dst); + saidx.proto = IPPROTO_TCP; + saidx.mode = IPSEC_MODE_TCPMD5; + saidx.reqid = 0; + sav = key_allocsa_tcpmd5(&saidx); + if (sav == NULL) { + KMOD_TCPSTAT_INC(tcps_sig_err_buildsig); + return (EACCES); + } + tcp_signature_compute(m, th, sav, buf); + key_freesav(&sav); + return (0); +} + /* * Initialize a TCP-MD5 SA. Called when the SA is being set up. * * We don't need to set up the tdb prefixed fields, as we don't use the * opencrypto code; we just perform a key length check. * - * XXX: Currently we only allow a single 'magic' SPI to be used. + * XXX: Currently we have used single 'magic' SPI and need to still + * support this. * * This allows per-host granularity without affecting the userland * interface, which is a simple socket option toggle switch, * TCP_SIGNATURE_ENABLE. * * To allow per-service granularity requires that we have a means * of mapping port to SPI. The mandated way of doing this is to * use SPD entries to specify packet flows which get the TCP-MD5 * treatment, however the code to do this is currently unstable * and unsuitable for production use. * * Therefore we use this compromise in the meantime. */ static int tcpsignature_init(struct secasvar *sav, struct xformsw *xsp) { int keylen; - if (sav->spi != htonl(TCP_SIG_SPI)) { - DPRINTF(("%s: SPI must be TCP_SIG_SPI (0x1000)\n", - __func__)); - return (EINVAL); - } if (sav->alg_auth != SADB_X_AALG_TCP_MD5) { DPRINTF(("%s: unsupported authentication algorithm %u\n", __func__, sav->alg_auth)); return (EINVAL); } if (sav->key_auth == NULL) { DPRINTF(("%s: no authentication key present\n", __func__)); return (EINVAL); } keylen = _KEYLEN(sav->key_auth); if ((keylen < TCP_KEYLEN_MIN) || (keylen > TCP_KEYLEN_MAX)) { DPRINTF(("%s: invalid key length %u\n", __func__, keylen)); return (EINVAL); } - + sav->tdb_xform = xsp; return (0); } /* - * Paranoia. - * * Called when the SA is deleted. */ static int tcpsignature_zeroize(struct secasvar *sav) { - if (sav->key_auth) + if (sav->key_auth != NULL) bzero(sav->key_auth->key_data, _KEYLEN(sav->key_auth)); - - sav->tdb_cryptoid = 0; - sav->tdb_authalgxform = NULL; sav->tdb_xform = NULL; - return (0); } -/* - * Verify that an input packet passes authentication. - * Called from the ipsec layer. - * We do this from within tcp itself, so this routine is just a stub. - */ -static int -tcpsignature_input(struct mbuf *m, struct secasvar *sav, int skip, - int protoff) -{ +static struct xformsw tcpsignature_xformsw = { + .xf_type = XF_TCPSIGNATURE, + .xf_name = "TCP-MD5", + .xf_init = tcpsignature_init, + .xf_zeroize = tcpsignature_zeroize, +}; - return (0); -} +static const struct tcpmd5_methods tcpmd5_methods = { + .input = tcp_ipsec_input, + .output = tcp_ipsec_output, + .pcbctl = tcp_ipsec_pcbctl, +}; + +#ifndef KLD_MODULE +/* TCP-MD5 support is build in the kernel */ +static const struct tcpmd5_support tcpmd5_ipsec = { + .enabled = IPSEC_MODULE_ENABLED, + .methods = &tcpmd5_methods +}; +const struct tcpmd5_support * const tcp_ipsec_support = &tcpmd5_ipsec; +#endif /* !KLD_MODULE */ -/* - * Prepend the authentication header. - * Called from the ipsec layer. - * We do this from within tcp itself, so this routine is just a stub. - */ static int -tcpsignature_output(struct mbuf *m, struct ipsecrequest *isr, - struct mbuf **mp, int skip, int protoff) +tcpmd5_modevent(module_t mod, int type, void *data) { - return (EINVAL); + switch (type) { + case MOD_LOAD: + xform_attach(&tcpsignature_xformsw); +#ifdef KLD_MODULE + tcpmd5_support_enable(&tcpmd5_methods); +#endif + break; + case MOD_UNLOAD: +#ifdef KLD_MODULE + tcpmd5_support_disable(); +#endif + xform_detach(&tcpsignature_xformsw); + break; + default: + return (EOPNOTSUPP); + } + return (0); } -static struct xformsw tcpsignature_xformsw = { - XF_TCPSIGNATURE, XFT_AUTH, "TCPMD5", - tcpsignature_init, tcpsignature_zeroize, - tcpsignature_input, tcpsignature_output +static moduledata_t tcpmd5_mod = { + "tcpmd5", + tcpmd5_modevent, + 0 }; -static void -tcpsignature_attach(void) -{ - - xform_register(&tcpsignature_xformsw); -} - -SYSINIT(tcpsignature_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, - tcpsignature_attach, NULL); +DECLARE_MODULE(tcpmd5, tcpmd5_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); +MODULE_VERSION(tcpmd5, 1); +#ifdef KLD_MODULE +MODULE_DEPEND(tcpmd5, ipsec_support, 1, 1, 1); +#endif diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c index 71dc9f6ab72b..bf995d282bbf 100644 --- a/usr.bin/netstat/inet.c +++ b/usr.bin/netstat/inet.c @@ -1,1494 +1,1507 @@ /*- * Copyright (c) 1983, 1988, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)inet.c 8.5 (Berkeley) 5/24/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "netstat.h" #include "nl_defs.h" void inetprint(const char *, struct in_addr *, int, const char *, int, const int); #ifdef INET6 static int udp_done, tcp_done, sdp_done; #endif /* INET6 */ static int pcblist_sysctl(int proto, const char *name, char **bufp, int istcp __unused) { const char *mibvar; char *buf; size_t len; switch (proto) { case IPPROTO_TCP: mibvar = "net.inet.tcp.pcblist"; break; case IPPROTO_UDP: mibvar = "net.inet.udp.pcblist"; break; case IPPROTO_DIVERT: mibvar = "net.inet.divert.pcblist"; break; default: mibvar = "net.inet.raw.pcblist"; break; } if (strncmp(name, "sdp", 3) == 0) mibvar = "net.inet.sdp.pcblist"; len = 0; if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) { if (errno != ENOENT) xo_warn("sysctl: %s", mibvar); return (0); } if ((buf = malloc(len)) == NULL) { xo_warnx("malloc %lu bytes", (u_long)len); return (0); } if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) { xo_warn("sysctl: %s", mibvar); free(buf); return (0); } *bufp = buf; return (1); } /* * Copied directly from uipc_socket2.c. We leave out some fields that are in * nested structures that aren't used to avoid extra work. */ static void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) { xsb->sb_cc = sb->sb_ccc; xsb->sb_hiwat = sb->sb_hiwat; xsb->sb_mbcnt = sb->sb_mbcnt; xsb->sb_mcnt = sb->sb_mcnt; xsb->sb_ccnt = sb->sb_ccnt; xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; xsb->sb_timeo = sb->sb_timeo; } int sotoxsocket(struct socket *so, struct xsocket *xso) { struct protosw proto; struct domain domain; bzero(xso, sizeof *xso); xso->xso_len = sizeof *xso; xso->xso_so = so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; xso->so_pcb = so->so_pcb; if (kread((uintptr_t)so->so_proto, &proto, sizeof(proto)) != 0) return (-1); xso->xso_protocol = proto.pr_protocol; if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0) return (-1); xso->xso_family = domain.dom_family; xso->so_qlen = so->so_qlen; xso->so_incqlen = so->so_incqlen; xso->so_qlimit = so->so_qlimit; xso->so_timeo = so->so_timeo; xso->so_error = so->so_error; xso->so_oobmark = so->so_oobmark; sbtoxsockbuf(&so->so_snd, &xso->so_snd); sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); return (0); } static int pcblist_kvm(u_long off, char **bufp, int istcp) { struct inpcbinfo pcbinfo; struct inpcbhead listhead; struct inpcb *inp; struct xinpcb xi; struct xinpgen xig; struct xtcpcb xt; struct socket so; struct xsocket *xso; char *buf, *p; size_t len; if (off == 0) return (0); kread(off, &pcbinfo, sizeof(pcbinfo)); if (istcp) len = 2 * sizeof(xig) + (pcbinfo.ipi_count + pcbinfo.ipi_count / 8) * sizeof(struct xtcpcb); else len = 2 * sizeof(xig) + (pcbinfo.ipi_count + pcbinfo.ipi_count / 8) * sizeof(struct xinpcb); if ((buf = malloc(len)) == NULL) { xo_warnx("malloc %lu bytes", (u_long)len); return (0); } p = buf; #define COPYOUT(obj, size) do { \ if (len < (size)) { \ xo_warnx("buffer size exceeded"); \ goto fail; \ } \ bcopy((obj), p, (size)); \ len -= (size); \ p += (size); \ } while (0) #define KREAD(off, buf, len) do { \ if (kread((uintptr_t)(off), (buf), (len)) != 0) \ goto fail; \ } while (0) /* Write out header. */ xig.xig_len = sizeof xig; xig.xig_count = pcbinfo.ipi_count; xig.xig_gen = pcbinfo.ipi_gencnt; xig.xig_sogen = 0; COPYOUT(&xig, sizeof xig); /* Walk the PCB list. */ xt.xt_len = sizeof xt; xi.xi_len = sizeof xi; if (istcp) xso = &xt.xt_socket; else xso = &xi.xi_socket; KREAD(pcbinfo.ipi_listhead, &listhead, sizeof(listhead)); LIST_FOREACH(inp, &listhead, inp_list) { if (istcp) { KREAD(inp, &xt.xt_inp, sizeof(*inp)); inp = &xt.xt_inp; } else { KREAD(inp, &xi.xi_inp, sizeof(*inp)); inp = &xi.xi_inp; } if (inp->inp_gencnt > pcbinfo.ipi_gencnt) continue; if (istcp) { if (inp->inp_ppcb == NULL) bzero(&xt.xt_tp, sizeof xt.xt_tp); else if (inp->inp_flags & INP_TIMEWAIT) { bzero(&xt.xt_tp, sizeof xt.xt_tp); xt.xt_tp.t_state = TCPS_TIME_WAIT; } else KREAD(inp->inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); } if (inp->inp_socket) { KREAD(inp->inp_socket, &so, sizeof(so)); if (sotoxsocket(&so, xso) != 0) goto fail; } else { bzero(xso, sizeof(*xso)); if (istcp) xso->xso_protocol = IPPROTO_TCP; } if (istcp) COPYOUT(&xt, sizeof xt); else COPYOUT(&xi, sizeof xi); } /* Reread the pcbinfo and write out the footer. */ kread(off, &pcbinfo, sizeof(pcbinfo)); xig.xig_count = pcbinfo.ipi_count; xig.xig_gen = pcbinfo.ipi_gencnt; COPYOUT(&xig, sizeof xig); *bufp = buf; return (1); fail: free(buf); return (0); #undef COPYOUT #undef KREAD } /* * Print a summary of connections related to an Internet * protocol. For TCP, also give state of connection. * Listening processes (aflag) are suppressed unless the * -a (all) flag is specified. */ void protopr(u_long off, const char *name, int af1, int proto) { int istcp; static int first = 1; char *buf; const char *vchar; struct tcpcb *tp = NULL; struct inpcb *inp; struct xinpgen *xig, *oxig; struct xsocket *so; struct xtcp_timer *timer; istcp = 0; switch (proto) { case IPPROTO_TCP: #ifdef INET6 if (strncmp(name, "sdp", 3) != 0) { if (tcp_done != 0) return; else tcp_done = 1; } else { if (sdp_done != 0) return; else sdp_done = 1; } #endif istcp = 1; break; case IPPROTO_UDP: #ifdef INET6 if (udp_done != 0) return; else udp_done = 1; #endif break; } if (live) { if (!pcblist_sysctl(proto, name, &buf, istcp)) return; } else { if (!pcblist_kvm(off, &buf, istcp)) return; } oxig = xig = (struct xinpgen *)buf; for (xig = (struct xinpgen *)((char *)xig + xig->xig_len); xig->xig_len > sizeof(struct xinpgen); xig = (struct xinpgen *)((char *)xig + xig->xig_len)) { if (istcp) { timer = &((struct xtcpcb *)xig)->xt_timer; tp = &((struct xtcpcb *)xig)->xt_tp; inp = &((struct xtcpcb *)xig)->xt_inp; so = &((struct xtcpcb *)xig)->xt_socket; } else { inp = &((struct xinpcb *)xig)->xi_inp; so = &((struct xinpcb *)xig)->xi_socket; timer = NULL; } /* Ignore sockets for protocols other than the desired one. */ if (so->xso_protocol != proto) continue; /* Ignore PCBs which were freed during copyout. */ if (inp->inp_gencnt > oxig->xig_gen) continue; if ((af1 == AF_INET && (inp->inp_vflag & INP_IPV4) == 0) #ifdef INET6 || (af1 == AF_INET6 && (inp->inp_vflag & INP_IPV6) == 0) #endif /* INET6 */ || (af1 == AF_UNSPEC && ((inp->inp_vflag & INP_IPV4) == 0 #ifdef INET6 && (inp->inp_vflag & INP_IPV6) == 0 #endif /* INET6 */ )) ) continue; if (!aflag && ( (istcp && tp->t_state == TCPS_LISTEN) || (af1 == AF_INET && inet_lnaof(inp->inp_laddr) == INADDR_ANY) #ifdef INET6 || (af1 == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) #endif /* INET6 */ || (af1 == AF_UNSPEC && (((inp->inp_vflag & INP_IPV4) != 0 && inet_lnaof(inp->inp_laddr) == INADDR_ANY) #ifdef INET6 || ((inp->inp_vflag & INP_IPV6) != 0 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) #endif )) )) continue; if (first) { if (!Lflag) { xo_emit("Active Internet connections"); if (aflag) xo_emit(" (including servers)"); } else xo_emit( "Current listen queue sizes (qlen/incqlen/maxqlen)"); xo_emit("\n"); if (Aflag) xo_emit("{T:/%-*s} ", 2 * (int)sizeof(void *), "Tcpcb"); if (Lflag) xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-32.32s} {T:/%-18.18s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-32.32s} {T:/%-22.22s}" : "{T:/%-5.5s} {T:/%-32.32s} {T:/%-45.45s}"), "Proto", "Listen", "Local Address"); else if (Tflag) xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%s}" : "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%s}"), "Proto", "Rexmit", "OOORcv", "0-win", "Local Address", "Foreign Address"); else { xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%-18.18s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%-22.22s}" : "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%-45.45s}"), "Proto", "Recv-Q", "Send-Q", "Local Address", "Foreign Address"); if (!xflag && !Rflag) xo_emit(" (state)"); } if (xflag) { xo_emit(" {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s}", "R-MBUF", "S-MBUF", "R-CLUS", "S-CLUS", "R-HIWA", "S-HIWA", "R-LOWA", "S-LOWA", "R-BCNT", "S-BCNT", "R-BMAX", "S-BMAX"); xo_emit(" {T:/%7.7s} {T:/%7.7s} {T:/%7.7s} " "{T:/%7.7s} {T:/%7.7s} {T:/%7.7s}", "rexmt", "persist", "keep", "2msl", "delack", "rcvtime"); } else if (Rflag) { xo_emit(" {T:/%8.8s} {T:/%5.5s}", "flowid", "ftype"); } xo_emit("\n"); first = 0; } if (Lflag && so->so_qlimit == 0) continue; xo_open_instance("socket"); if (Aflag) { if (istcp) xo_emit("{q:address/%*lx} ", 2 * (int)sizeof(void *), (u_long)inp->inp_ppcb); else xo_emit("{q:adddress/%*lx} ", 2 * (int)sizeof(void *), (u_long)so->so_pcb); } #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) vchar = ((inp->inp_vflag & INP_IPV4) != 0) ? "46" : "6"; else #endif vchar = ((inp->inp_vflag & INP_IPV4) != 0) ? "4" : ""; if (istcp && (tp->t_flags & TF_TOE) != 0) xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", "toe", vchar); else xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", name, vchar); if (Lflag) { char buf1[33]; snprintf(buf1, sizeof buf1, "%u/%u/%u", so->so_qlen, so->so_incqlen, so->so_qlimit); xo_emit("{:listen-queue-sizes/%-32.32s} ", buf1); } else if (Tflag) { if (istcp) xo_emit("{:sent-retransmit-packets/%6u} " "{:received-out-of-order-packets/%6u} " "{:sent-zero-window/%6u} ", tp->t_sndrexmitpack, tp->t_rcvoopack, tp->t_sndzerowin); else xo_emit("{P:/%21s}", ""); } else { xo_emit("{:receive-bytes-waiting/%6u} " "{:send-bytes-waiting/%6u} ", so->so_rcv.sb_cc, so->so_snd.sb_cc); } if (numeric_port) { if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 1, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, 1, af1); } #ifdef INET6 else if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, 1); } /* else nothing printed now */ #endif /* INET6 */ } else if (inp->inp_flags & INP_ANONPORT) { if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 1, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, 0, af1); } #ifdef INET6 else if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, 0); } /* else nothing printed now */ #endif /* INET6 */ } else { if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 0, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, inp->inp_lport != inp->inp_fport, af1); } #ifdef INET6 else if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 0); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, inp->inp_lport != inp->inp_fport); } /* else nothing printed now */ #endif /* INET6 */ } if (xflag) { xo_emit("{:receive-mbufs/%6u} {:send-mbufs/%6u} " "{:receive-clusters/%6u} {:send-clusters/%6u} " "{:receive-high-water/%6u} {:send-high-water/%6u} " "{:receive-low-water/%6u} {:send-low-water/%6u} " "{:receive-mbuf-bytes/%6u} {:send-mbuf-bytes/%6u} " "{:receive-mbuf-bytes-max/%6u} " "{:send-mbuf-bytes-max/%6u}", so->so_rcv.sb_mcnt, so->so_snd.sb_mcnt, so->so_rcv.sb_ccnt, so->so_snd.sb_ccnt, so->so_rcv.sb_hiwat, so->so_snd.sb_hiwat, so->so_rcv.sb_lowat, so->so_snd.sb_lowat, so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt, so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax); if (timer != NULL) xo_emit(" {:retransmit-timer/%4d.%02d} " "{:persist-timer/%4d.%02d} " "{:keepalive-timer/%4d.%02d} " "{:msl2-timer/%4d.%02d} " "{:delay-ack-timer/%4d.%02d} " "{:inactivity-timer/%4d.%02d}", timer->tt_rexmt / 1000, (timer->tt_rexmt % 1000) / 10, timer->tt_persist / 1000, (timer->tt_persist % 1000) / 10, timer->tt_keep / 1000, (timer->tt_keep % 1000) / 10, timer->tt_2msl / 1000, (timer->tt_2msl % 1000) / 10, timer->tt_delack / 1000, (timer->tt_delack % 1000) / 10, timer->t_rcvtime / 1000, (timer->t_rcvtime % 1000) / 10); } if (istcp && !Lflag && !xflag && !Tflag && !Rflag) { if (tp->t_state < 0 || tp->t_state >= TCP_NSTATES) xo_emit("{:tcp-state/%d}", tp->t_state); else { xo_emit("{:tcp-state/%s}", tcpstates[tp->t_state]); #if defined(TF_NEEDSYN) && defined(TF_NEEDFIN) /* Show T/TCP `hidden state' */ if (tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) xo_emit("{:need-syn-or-fin/*}"); #endif /* defined(TF_NEEDSYN) && defined(TF_NEEDFIN) */ } } if (Rflag) { /* XXX: is this right Alfred */ xo_emit(" {:flow-id/%08x} {:flow-type/%5d}", inp->inp_flowid, inp->inp_flowtype); } xo_emit("\n"); xo_close_instance("socket"); } if (xig != oxig && xig->xig_gen != oxig->xig_gen) { if (oxig->xig_count > xig->xig_count) { xo_emit("Some {d:lost/%s} sockets may have been " "deleted.\n", name); } else if (oxig->xig_count < xig->xig_count) { xo_emit("Some {d:created/%s} sockets may have been " "created.\n", name); } else { xo_emit("Some {d:changed/%s} sockets may have been " "created or deleted.\n", name); } } free(buf); } /* * Dump TCP statistics structure. */ void tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct tcpstat tcpstat; uint64_t tcps_states[TCP_NSTATES]; #ifdef INET6 if (tcp_done != 0) return; else tcp_done = 1; #endif if (fetch_stats("net.inet.tcp.stats", off, &tcpstat, sizeof(tcpstat), kread_counters) != 0) return; if (fetch_stats_ro("net.inet.tcp.states", nl[N_TCPS_STATES].n_value, &tcps_states, sizeof(tcps_states), kread_counters) != 0) return; xo_open_container("tcp"); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f, plural(tcpstat.f)) #define p1a(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f) #define p2(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1), \ (uintmax_t )tcpstat.f2, plural(tcpstat.f2)) #define p2a(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1), \ (uintmax_t )tcpstat.f2) #define p3(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f, pluralies(tcpstat.f)) p(tcps_sndtotal, "\t{:sent-packets/%ju} {N:/packet%s sent}\n"); p2(tcps_sndpack,tcps_sndbyte, "\t\t{:sent-data-packets/%ju} " "{N:/data packet%s} ({:sent-data-bytes/%ju} {N:/byte%s})\n"); p2(tcps_sndrexmitpack, tcps_sndrexmitbyte, "\t\t" "{:sent-retransmitted-packets/%ju} {N:/data packet%s} " "({:sent-retransmitted-bytes/%ju} {N:/byte%s}) " "{N:retransmitted}\n"); p(tcps_sndrexmitbad, "\t\t" "{:sent-unnecessary-retransmitted-packets/%ju} " "{N:/data packet%s unnecessarily retransmitted}\n"); p(tcps_mturesent, "\t\t{:sent-resends-by-mtu-discovery/%ju} " "{N:/resend%s initiated by MTU discovery}\n"); p2a(tcps_sndacks, tcps_delack, "\t\t{:sent-ack-only-packets/%ju} " "{N:/ack-only packet%s/} ({:sent-packets-delayed/%ju} " "{N:delayed})\n"); p(tcps_sndurg, "\t\t{:sent-urg-only-packets/%ju} " "{N:/URG only packet%s}\n"); p(tcps_sndprobe, "\t\t{:sent-window-probe-packets/%ju} " "{N:/window probe packet%s}\n"); p(tcps_sndwinup, "\t\t{:sent-window-update-packets/%ju} " "{N:/window update packet%s}\n"); p(tcps_sndctrl, "\t\t{:sent-control-packets/%ju} " "{N:/control packet%s}\n"); p(tcps_rcvtotal, "\t{:received-packets/%ju} " "{N:/packet%s received}\n"); p2(tcps_rcvackpack, tcps_rcvackbyte, "\t\t" "{:received-ack-packets/%ju} {N:/ack%s} " "{N:(for} {:received-ack-bytes/%ju} {N:/byte%s})\n"); p(tcps_rcvdupack, "\t\t{:received-duplicate-acks/%ju} " "{N:/duplicate ack%s}\n"); p(tcps_rcvacktoomuch, "\t\t{:received-acks-for-unsent-data/%ju} " "{N:/ack%s for unsent data}\n"); p2(tcps_rcvpack, tcps_rcvbyte, "\t\t" "{:received-in-sequence-packets/%ju} {N:/packet%s} " "({:received-in-sequence-bytes/%ju} {N:/byte%s}) " "{N:received in-sequence}\n"); p2(tcps_rcvduppack, tcps_rcvdupbyte, "\t\t" "{:received-completely-duplicate-packets/%ju} " "{N:/completely duplicate packet%s} " "({:received-completely-duplicate-bytes/%ju} {N:/byte%s})\n"); p(tcps_pawsdrop, "\t\t{:received-old-duplicate-packets/%ju} " "{N:/old duplicate packet%s}\n"); p2(tcps_rcvpartduppack, tcps_rcvpartdupbyte, "\t\t" "{:received-some-duplicate-packets/%ju} " "{N:/packet%s with some dup. data} " "({:received-some-duplicate-bytes/%ju} {N:/byte%s duped/})\n"); p2(tcps_rcvoopack, tcps_rcvoobyte, "\t\t{:received-out-of-order/%ju} " "{N:/out-of-order packet%s} " "({:received-out-of-order-bytes/%ju} {N:/byte%s})\n"); p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin, "\t\t" "{:received-after-window-packets/%ju} {N:/packet%s} " "({:received-after-window-bytes/%ju} {N:/byte%s}) " "{N:of data after window}\n"); p(tcps_rcvwinprobe, "\t\t{:received-window-probes/%ju} " "{N:/window probe%s}\n"); p(tcps_rcvwinupd, "\t\t{:receive-window-update-packets/%ju} " "{N:/window update packet%s}\n"); p(tcps_rcvafterclose, "\t\t{:received-after-close-packets/%ju} " "{N:/packet%s received after close}\n"); p(tcps_rcvbadsum, "\t\t{:discard-bad-checksum/%ju} " "{N:/discarded for bad checksum%s}\n"); p(tcps_rcvbadoff, "\t\t{:discard-bad-header-offset/%ju} " "{N:/discarded for bad header offset field%s}\n"); p1a(tcps_rcvshort, "\t\t{:discard-too-short/%ju} " "{N:discarded because packet too short}\n"); p1a(tcps_rcvmemdrop, "\t\t{:discard-memory-problems/%ju} " "{N:discarded due to memory problems}\n"); p(tcps_connattempt, "\t{:connection-requests/%ju} " "{N:/connection request%s}\n"); p(tcps_accepts, "\t{:connections-accepts/%ju} " "{N:/connection accept%s}\n"); p(tcps_badsyn, "\t{:bad-connection-attempts/%ju} " "{N:/bad connection attempt%s}\n"); p(tcps_listendrop, "\t{:listen-queue-overflows/%ju} " "{N:/listen queue overflow%s}\n"); p(tcps_badrst, "\t{:ignored-in-window-resets/%ju} " "{N:/ignored RSTs in the window%s}\n"); p(tcps_connects, "\t{:connections-established/%ju} " "{N:/connection%s established (including accepts)}\n"); p(tcps_usedrtt, "\t\t{:connections-hostcache-rtt/%ju} " "{N:/time%s used RTT from hostcache}\n"); p(tcps_usedrttvar, "\t\t{:connections-hostcache-rttvar/%ju} " "{N:/time%s used RTT variance from hostcache}\n"); p(tcps_usedssthresh, "\t\t{:connections-hostcache-ssthresh/%ju} " "{N:/time%s used slow-start threshold from hostcache}\n"); p2(tcps_closed, tcps_drops, "\t{:connections-closed/%ju} " "{N:/connection%s closed (including} " "{:connection-drops/%ju} {N:/drop%s})\n"); p(tcps_cachedrtt, "\t\t{:connections-updated-rtt-on-close/%ju} " "{N:/connection%s updated cached RTT on close}\n"); p(tcps_cachedrttvar, "\t\t" "{:connections-updated-variance-on-close/%ju} " "{N:/connection%s updated cached RTT variance on close}\n"); p(tcps_cachedssthresh, "\t\t" "{:connections-updated-ssthresh-on-close/%ju} " "{N:/connection%s updated cached ssthresh on close}\n"); p(tcps_conndrops, "\t{:embryonic-connections-dropped/%ju} " "{N:/embryonic connection%s dropped}\n"); p2(tcps_rttupdated, tcps_segstimed, "\t{:segments-updated-rtt/%ju} " "{N:/segment%s updated rtt (of} " "{:segment-update-attempts/%ju} {N:/attempt%s})\n"); p(tcps_rexmttimeo, "\t{:retransmit-timeouts/%ju} " "{N:/retransmit timeout%s}\n"); p(tcps_timeoutdrop, "\t\t" "{:connections-dropped-by-retransmit-timeout/%ju} " "{N:/connection%s dropped by rexmit timeout}\n"); p(tcps_persisttimeo, "\t{:persist-timeout/%ju} " "{N:/persist timeout%s}\n"); p(tcps_persistdrop, "\t\t" "{:connections-dropped-by-persist-timeout/%ju} " "{N:/connection%s dropped by persist timeout}\n"); p(tcps_finwait2_drops, "\t" "{:connections-dropped-by-finwait2-timeout/%ju} " "{N:/Connection%s (fin_wait_2) dropped because of timeout}\n"); p(tcps_keeptimeo, "\t{:keepalive-timeout/%ju} " "{N:/keepalive timeout%s}\n"); p(tcps_keepprobe, "\t\t{:keepalive-probes/%ju} " "{N:/keepalive probe%s sent}\n"); p(tcps_keepdrops, "\t\t{:connections-dropped-by-keepalives/%ju} " "{N:/connection%s dropped by keepalive}\n"); p(tcps_predack, "\t{:ack-header-predictions/%ju} " "{N:/correct ACK header prediction%s}\n"); p(tcps_preddat, "\t{:data-packet-header-predictions/%ju} " "{N:/correct data packet header prediction%s}\n"); xo_open_container("syncache"); p3(tcps_sc_added, "\t{:entries-added/%ju} " "{N:/syncache entr%s added}\n"); p1a(tcps_sc_retransmitted, "\t\t{:retransmitted/%ju} " "{N:/retransmitted}\n"); p1a(tcps_sc_dupsyn, "\t\t{:duplicates/%ju} {N:/dupsyn}\n"); p1a(tcps_sc_dropped, "\t\t{:dropped/%ju} {N:/dropped}\n"); p1a(tcps_sc_completed, "\t\t{:completed/%ju} {N:/completed}\n"); p1a(tcps_sc_bucketoverflow, "\t\t{:bucket-overflow/%ju} " "{N:/bucket overflow}\n"); p1a(tcps_sc_cacheoverflow, "\t\t{:cache-overflow/%ju} " "{N:/cache overflow}\n"); p1a(tcps_sc_reset, "\t\t{:reset/%ju} {N:/reset}\n"); p1a(tcps_sc_stale, "\t\t{:stale/%ju} {N:/stale}\n"); p1a(tcps_sc_aborted, "\t\t{:aborted/%ju} {N:/aborted}\n"); p1a(tcps_sc_badack, "\t\t{:bad-ack/%ju} {N:/badack}\n"); p1a(tcps_sc_unreach, "\t\t{:unreachable/%ju} {N:/unreach}\n"); p(tcps_sc_zonefail, "\t\t{:zone-failures/%ju} {N:/zone failure%s}\n"); p(tcps_sc_sendcookie, "\t{:sent-cookies/%ju} {N:/cookie%s sent}\n"); p(tcps_sc_recvcookie, "\t{:receivd-cookies/%ju} " "{N:/cookie%s received}\n"); xo_close_container("syncache"); xo_open_container("hostcache"); p3(tcps_hc_added, "\t{:entries-added/%ju} " "{N:/hostcache entr%s added}\n"); p1a(tcps_hc_bucketoverflow, "\t\t{:buffer-overflows/%ju} " "{N:/bucket overflow}\n"); xo_close_container("hostcache"); xo_open_container("sack"); p(tcps_sack_recovery_episode, "\t{:recovery-episodes/%ju} " "{N:/SACK recovery episode%s}\n"); p(tcps_sack_rexmits, "\t{:segment-retransmits/%ju} " "{N:/segment rexmit%s in SACK recovery episodes}\n"); p(tcps_sack_rexmit_bytes, "\t{:byte-retransmits/%ju} " "{N:/byte rexmit%s in SACK recovery episodes}\n"); p(tcps_sack_rcv_blocks, "\t{:received-blocks/%ju} " "{N:/SACK option%s (SACK blocks) received}\n"); p(tcps_sack_send_blocks, "\t{:sent-option-blocks/%ju} " "{N:/SACK option%s (SACK blocks) sent}\n"); p1a(tcps_sack_sboverflow, "\t{:scoreboard-overflows/%ju} " "{N:/SACK scoreboard overflow}\n"); xo_close_container("sack"); xo_open_container("ecn"); p(tcps_ecn_ce, "\t{:ce-packets/%ju} " "{N:/packet%s with ECN CE bit set}\n"); p(tcps_ecn_ect0, "\t{:ect0-packets/%ju} " "{N:/packet%s with ECN ECT(0) bit set}\n"); p(tcps_ecn_ect1, "\t{:ect1-packets/%ju} " "{N:/packet%s with ECN ECT(1) bit set}\n"); p(tcps_ecn_shs, "\t{:handshakes/%ju} " "{N:/successful ECN handshake%s}\n"); p(tcps_ecn_rcwnd, "\t{:congestion-reductions/%ju} " "{N:/time%s ECN reduced the congestion window}\n"); + + xo_close_container("ecn"); + xo_open_container("tcp-signature"); + p(tcps_sig_rcvgoodsig, "\t{:received-good-signature/%ju} " + "{N:/packet%s with matching signature received}\n"); + p(tcps_sig_rcvbadsig, "\t{:received-bad-signature/%ju} " + "{N:/packet%s with bad signature received}\n"); + p(tcps_sig_err_buildsig, "\t{:failed-make-signature/%ju} " + "{N:/time%s failed to make signature due to no SA}\n"); + p(tcps_sig_err_sigopt, "\t{:no-signature-expected/%ju} " + "{N:/time%s unexpected signature received}\n"); + p(tcps_sig_err_nosigopt, "\t{:no-signature-provided/%ju} " + "{N:/time%s no signature provided by segment}\n"); #undef p #undef p1a #undef p2 #undef p2a #undef p3 - xo_close_container("ecn"); + xo_close_container("tcp-signature"); xo_open_container("TCP connection count by state"); xo_emit("{T:/TCP connection count by state}:\n"); for (int i = 0; i < TCP_NSTATES; i++) { /* * XXXGL: is there a way in libxo to use %s * in the "content string" of a format * string? I failed to do that, that's why * a temporary buffer is used to construct * format string for xo_emit(). */ char fmtbuf[80]; if (sflag > 1 && tcps_states[i] == 0) continue; snprintf(fmtbuf, sizeof(fmtbuf), "\t{:%s/%%ju} " "{Np:/connection ,connections} in %s state\n", tcpstates[i], tcpstates[i]); xo_emit(fmtbuf, (uintmax_t )tcps_states[i]); } xo_close_container("TCP connection count by state"); xo_close_container("tcp"); } /* * Dump UDP statistics structure. */ void udp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct udpstat udpstat; uint64_t delivered; #ifdef INET6 if (udp_done != 0) return; else udp_done = 1; #endif if (fetch_stats("net.inet.udp.stats", off, &udpstat, sizeof(udpstat), kread_counters) != 0) return; xo_open_container("udp"); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (udpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)udpstat.f, plural(udpstat.f)) #define p1a(f, m) if (udpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)udpstat.f) p(udps_ipackets, "{:received-datagrams/%ju} " "{N:/datagram%s received}\n"); p1a(udps_hdrops, "{:dropped-incomplete-headers/%ju} " "{N:/with incomplete header}\n"); p1a(udps_badlen, "{:dropped-bad-data-length/%ju} " "{N:/with bad data length field}\n"); p1a(udps_badsum, "{:dropped-bad-checksum/%ju} " "{N:/with bad checksum}\n"); p1a(udps_nosum, "{:dropped-no-checksum/%ju} " "{N:/with no checksum}\n"); p1a(udps_noport, "{:dropped-no-socket/%ju} " "{N:/dropped due to no socket}\n"); p(udps_noportbcast, "{:dropped-broadcast-multicast/%ju} " "{N:/broadcast\\/multicast datagram%s undelivered}\n"); p1a(udps_fullsock, "{:dropped-full-socket-buffer/%ju} " "{N:/dropped due to full socket buffers}\n"); p1a(udpps_pcbhashmiss, "{:not-for-hashed-pcb/%ju} " "{N:/not for hashed pcb}\n"); delivered = udpstat.udps_ipackets - udpstat.udps_hdrops - udpstat.udps_badlen - udpstat.udps_badsum - udpstat.udps_noport - udpstat.udps_noportbcast - udpstat.udps_fullsock; if (delivered || sflag <= 1) xo_emit("\t{:delivered-packets/%ju} {N:/delivered}\n", (uint64_t)delivered); p(udps_opackets, "{:output-packets/%ju} {N:/datagram%s output}\n"); /* the next statistic is cumulative in udps_noportbcast */ p(udps_filtermcast, "{:multicast-source-filter-matches/%ju} " "{N:/time%s multicast source filter matched}\n"); #undef p #undef p1a xo_close_container("udp"); } /* * Dump CARP statistics structure. */ void carp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct carpstats carpstat; if (fetch_stats("net.inet.carp.stats", off, &carpstat, sizeof(carpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (carpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)carpstat.f, plural(carpstat.f)) #define p2(f, m) if (carpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)carpstat.f) p(carps_ipackets, "\t{:received-inet-packets/%ju} " "{N:/packet%s received (IPv4)}\n"); p(carps_ipackets6, "\t{:received-inet6-packets/%ju} " "{N:/packet%s received (IPv6)}\n"); p(carps_badttl, "\t\t{:dropped-wrong-ttl/%ju} " "{N:/packet%s discarded for wrong TTL}\n"); p(carps_hdrops, "\t\t{:dropped-short-header/%ju} " "{N:/packet%s shorter than header}\n"); p(carps_badsum, "\t\t{:dropped-bad-checksum/%ju} " "{N:/discarded for bad checksum%s}\n"); p(carps_badver, "\t\t{:dropped-bad-version/%ju} " "{N:/discarded packet%s with a bad version}\n"); p2(carps_badlen, "\t\t{:dropped-short-packet/%ju} " "{N:/discarded because packet too short}\n"); p2(carps_badauth, "\t\t{:dropped-bad-authentication/%ju} " "{N:/discarded for bad authentication}\n"); p2(carps_badvhid, "\t\t{:dropped-bad-vhid/%ju} " "{N:/discarded for bad vhid}\n"); p2(carps_badaddrs, "\t\t{:dropped-bad-address-list/%ju} " "{N:/discarded because of a bad address list}\n"); p(carps_opackets, "\t{:sent-inet-packets/%ju} " "{N:/packet%s sent (IPv4)}\n"); p(carps_opackets6, "\t{:sent-inet6-packets/%ju} " "{N:/packet%s sent (IPv6)}\n"); p2(carps_onomem, "\t\t{:send-failed-memory-error/%ju} " "{N:/send failed due to mbuf memory error}\n"); #if notyet p(carps_ostates, "\t\t{:send-state-updates/%s} " "{N:/state update%s sent}\n"); #endif #undef p #undef p2 xo_close_container(name); } /* * Dump IP statistics structure. */ void ip_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct ipstat ipstat; if (fetch_stats("net.inet.ip.stats", off, &ipstat, sizeof(ipstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (ipstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )ipstat.f, plural(ipstat.f)) #define p1a(f, m) if (ipstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )ipstat.f) p(ips_total, "\t{:received-packets/%ju} " "{N:/total packet%s received}\n"); p(ips_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/bad header checksum%s}\n"); p1a(ips_toosmall, "\t{:dropped-below-minimum-size/%ju} " "{N:/with size smaller than minimum}\n"); p1a(ips_tooshort, "\t{:dropped-short-packets/%ju} " "{N:/with data size < data length}\n"); p1a(ips_toolong, "\t{:dropped-too-long/%ju} " "{N:/with ip length > max ip packet size}\n"); p1a(ips_badhlen, "\t{:dropped-short-header-length/%ju} " "{N:/with header length < data size}\n"); p1a(ips_badlen, "\t{:dropped-short-data/%ju} " "{N:/with data length < header length}\n"); p1a(ips_badoptions, "\t{:dropped-bad-options/%ju} " "{N:/with bad options}\n"); p1a(ips_badvers, "\t{:dropped-bad-version/%ju} " "{N:/with incorrect version number}\n"); p(ips_fragments, "\t{:received-fragments/%ju} " "{N:/fragment%s received}\n"); p(ips_fragdropped, "\t{:dropped-fragments/%ju} " "{N:/fragment%s dropped (dup or out of space)}\n"); p(ips_fragtimeout, "\t{:dropped-fragments-after-timeout/%ju} " "{N:/fragment%s dropped after timeout}\n"); p(ips_reassembled, "\t{:reassembled-packets/%ju} " "{N:/packet%s reassembled ok}\n"); p(ips_delivered, "\t{:received-local-packets/%ju} " "{N:/packet%s for this host}\n"); p(ips_noproto, "\t{:dropped-unknown-protocol/%ju} " "{N:/packet%s for unknown\\/unsupported protocol}\n"); p(ips_forward, "\t{:forwarded-packets/%ju} " "{N:/packet%s forwarded}"); p(ips_fastforward, " ({:fast-forwarded-packets/%ju} " "{N:/packet%s fast forwarded})"); if (ipstat.ips_forward || sflag <= 1) xo_emit("\n"); p(ips_cantforward, "\t{:packets-cannot-forward/%ju} " "{N:/packet%s not forwardable}\n"); p(ips_notmember, "\t{:received-unknown-multicast-group/%ju} " "{N:/packet%s received for unknown multicast group}\n"); p(ips_redirectsent, "\t{:redirects-sent/%ju} " "{N:/redirect%s sent}\n"); p(ips_localout, "\t{:sent-packets/%ju} " "{N:/packet%s sent from this host}\n"); p(ips_rawout, "\t{:send-packets-fabricated-header/%ju} " "{N:/packet%s sent with fabricated ip header}\n"); p(ips_odropped, "\t{:discard-no-mbufs/%ju} " "{N:/output packet%s dropped due to no bufs, etc.}\n"); p(ips_noroute, "\t{:discard-no-route/%ju} " "{N:/output packet%s discarded due to no route}\n"); p(ips_fragmented, "\t{:sent-fragments/%ju} " "{N:/output datagram%s fragmented}\n"); p(ips_ofragments, "\t{:fragments-created/%ju} " "{N:/fragment%s created}\n"); p(ips_cantfrag, "\t{:discard-cannot-fragment/%ju} " "{N:/datagram%s that can't be fragmented}\n"); p(ips_nogif, "\t{:discard-tunnel-no-gif/%ju} " "{N:/tunneling packet%s that can't find gif}\n"); p(ips_badaddr, "\t{:discard-bad-address/%ju} " "{N:/datagram%s with bad address in header}\n"); #undef p #undef p1a xo_close_container(name); } /* * Dump ARP statistics structure. */ void arp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct arpstat arpstat; if (fetch_stats("net.link.ether.arp.stats", off, &arpstat, sizeof(arpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (arpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)arpstat.f, plural(arpstat.f)) #define p2(f, m) if (arpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)arpstat.f, pluralies(arpstat.f)) p(txrequests, "{:sent-requests/%ju} {N:/ARP request%s sent}\n"); p2(txreplies, "{:sent-replies/%ju} {N:/ARP repl%s sent}\n"); p(rxrequests, "{:received-requests/%ju} " "{N:/ARP request%s received}\n"); p2(rxreplies, "{:received-replies/%ju} " "{N:/ARP repl%s received}\n"); p(received, "{:received-packers/%ju} " "{N:/ARP packet%s received}\n"); p(dropped, "{:dropped-no-entry/%ju} " "{N:/total packet%s dropped due to no ARP entry}\n"); p(timeouts, "{:entries-timeout/%ju} " "{N:/ARP entry%s timed out}\n"); p(dupips, "{:dropped-duplicate-address/%ju} " "{N:/Duplicate IP%s seen}\n"); #undef p #undef p2 xo_close_container(name); } static const char *icmpnames[ICMP_MAXTYPE + 1] = { "echo reply", /* RFC 792 */ "#1", "#2", "destination unreachable", /* RFC 792 */ "source quench", /* RFC 792 */ "routing redirect", /* RFC 792 */ "#6", "#7", "echo", /* RFC 792 */ "router advertisement", /* RFC 1256 */ "router solicitation", /* RFC 1256 */ "time exceeded", /* RFC 792 */ "parameter problem", /* RFC 792 */ "time stamp", /* RFC 792 */ "time stamp reply", /* RFC 792 */ "information request", /* RFC 792 */ "information request reply", /* RFC 792 */ "address mask request", /* RFC 950 */ "address mask reply", /* RFC 950 */ "#19", "#20", "#21", "#22", "#23", "#24", "#25", "#26", "#27", "#28", "#29", "icmp traceroute", /* RFC 1393 */ "datagram conversion error", /* RFC 1475 */ "mobile host redirect", "IPv6 where-are-you", "IPv6 i-am-here", "mobile registration req", "mobile registration reply", "domain name request", /* RFC 1788 */ "domain name reply", /* RFC 1788 */ "icmp SKIP", "icmp photuris", /* RFC 2521 */ }; /* * Dump ICMP statistics. */ void icmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct icmpstat icmpstat; size_t len; int i, first; if (fetch_stats("net.inet.icmp.stats", off, &icmpstat, sizeof(icmpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f, plural(icmpstat.f)) #define p1a(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f) #define p2(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f, plurales(icmpstat.f)) p(icps_error, "\t{:icmp-calls/%lu} " "{N:/call%s to icmp_error}\n"); p(icps_oldicmp, "\t{:errors-not-from-message/%lu} " "{N:/error%s not generated in response to an icmp message}\n"); for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) { if (icmpstat.icps_outhist[i] != 0) { if (first) { xo_open_list("output-histogram"); xo_emit("\tOutput histogram:\n"); first = 0; } xo_open_instance("output-histogram"); if (icmpnames[i] != NULL) xo_emit("\t\t{k:name/%s}: {:count/%lu}\n", icmpnames[i], icmpstat.icps_outhist[i]); else xo_emit("\t\tunknown ICMP #{k:name/%d}: " "{:count/%lu}\n", i, icmpstat.icps_outhist[i]); xo_close_instance("output-histogram"); } } if (!first) xo_close_list("output-histogram"); p(icps_badcode, "\t{:dropped-bad-code/%lu} " "{N:/message%s with bad code fields}\n"); p(icps_tooshort, "\t{:dropped-too-short/%lu} " "{N:/message%s less than the minimum length}\n"); p(icps_checksum, "\t{:dropped-bad-checksum/%lu} " "{N:/message%s with bad checksum}\n"); p(icps_badlen, "\t{:dropped-bad-length/%lu} " "{N:/message%s with bad length}\n"); p1a(icps_bmcastecho, "\t{:dropped-multicast-echo/%lu} " "{N:/multicast echo requests ignored}\n"); p1a(icps_bmcasttstamp, "\t{:dropped-multicast-timestamp/%lu} " "{N:/multicast timestamp requests ignored}\n"); for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) { if (icmpstat.icps_inhist[i] != 0) { if (first) { xo_open_list("input-histogram"); xo_emit("\tInput histogram:\n"); first = 0; } xo_open_instance("input-histogram"); if (icmpnames[i] != NULL) xo_emit("\t\t{k:name/%s}: {:count/%lu}\n", icmpnames[i], icmpstat.icps_inhist[i]); else xo_emit( "\t\tunknown ICMP #{k:name/%d}: {:count/%lu}\n", i, icmpstat.icps_inhist[i]); xo_close_instance("input-histogram"); } } if (!first) xo_close_list("input-histogram"); p(icps_reflect, "\t{:sent-packets/%lu} " "{N:/message response%s generated}\n"); p2(icps_badaddr, "\t{:discard-invalid-return-address/%lu} " "{N:/invalid return address%s}\n"); p(icps_noroute, "\t{:discard-no-route/%lu} " "{N:/no return route%s}\n"); #undef p #undef p1a #undef p2 if (live) { len = sizeof i; if (sysctlbyname("net.inet.icmp.maskrepl", &i, &len, NULL, 0) < 0) return; xo_emit("\tICMP address mask responses are " "{q:icmp-address-responses/%sabled}\n", i ? "en" : "dis"); } xo_close_container(name); } /* * Dump IGMP statistics structure. */ void igmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct igmpstat igmpstat; if (fetch_stats("net.inet.igmp.stats", 0, &igmpstat, sizeof(igmpstat), kread) != 0) return; if (igmpstat.igps_version != IGPS_VERSION_3) { xo_warnx("%s: version mismatch (%d != %d)", __func__, igmpstat.igps_version, IGPS_VERSION_3); } if (igmpstat.igps_len != IGPS_VERSION3_LEN) { xo_warnx("%s: size mismatch (%d != %d)", __func__, igmpstat.igps_len, IGPS_VERSION3_LEN); } xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p64(f, m) if (igmpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t) igmpstat.f, plural(igmpstat.f)) #define py64(f, m) if (igmpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t) igmpstat.f, pluralies(igmpstat.f)) p64(igps_rcv_total, "\t{:received-messages/%ju} " "{N:/message%s received}\n"); p64(igps_rcv_tooshort, "\t{:dropped-too-short/%ju} " "{N:/message%s received with too few bytes}\n"); p64(igps_rcv_badttl, "\t{:dropped-wrong-ttl/%ju} " "{N:/message%s received with wrong TTL}\n"); p64(igps_rcv_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/message%s received with bad checksum}\n"); py64(igps_rcv_v1v2_queries, "\t{:received-membership-queries/%ju} " "{N:/V1\\/V2 membership quer%s received}\n"); py64(igps_rcv_v3_queries, "\t{:received-v3-membership-queries/%ju} " "{N:/V3 membership quer%s received}\n"); py64(igps_rcv_badqueries, "\t{:dropped-membership-queries/%ju} " "{N:/membership quer%s received with invalid field(s)}\n"); py64(igps_rcv_gen_queries, "\t{:received-general-queries/%ju} " "{N:/general quer%s received}\n"); py64(igps_rcv_group_queries, "\t{:received-group-queries/%ju} " "{N:/group quer%s received}\n"); py64(igps_rcv_gsr_queries, "\t{:received-group-source-queries/%ju} " "{N:/group-source quer%s received}\n"); py64(igps_drop_gsr_queries, "\t{:dropped-group-source-queries/%ju} " "{N:/group-source quer%s dropped}\n"); p64(igps_rcv_reports, "\t{:received-membership-requests/%ju} " "{N:/membership report%s received}\n"); p64(igps_rcv_badreports, "\t{:dropped-membership-reports/%ju} " "{N:/membership report%s received with invalid field(s)}\n"); p64(igps_rcv_ourreports, "\t" "{:received-membership-reports-matching/%ju} " "{N:/membership report%s received for groups to which we belong}" "\n"); p64(igps_rcv_nora, "\t{:received-v3-reports-no-router-alert/%ju} " "{N:/V3 report%s received without Router Alert}\n"); p64(igps_snd_reports, "\t{:sent-membership-reports/%ju} " "{N:/membership report%s sent}\n"); #undef p64 #undef py64 xo_close_container(name); } /* * Dump PIM statistics structure. */ void pim_stats(u_long off __unused, const char *name, int af1 __unused, int proto __unused) { struct pimstat pimstat; if (fetch_stats("net.inet.pim.stats", off, &pimstat, sizeof(pimstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (pimstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)pimstat.f, plural(pimstat.f)) #define py(f, m) if (pimstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)pimstat.f, pimstat.f != 1 ? "ies" : "y") p(pims_rcv_total_msgs, "\t{:received-messages/%ju} " "{N:/message%s received}\n"); p(pims_rcv_total_bytes, "\t{:received-bytes/%ju} " "{N:/byte%s received}\n"); p(pims_rcv_tooshort, "\t{:dropped-too-short/%ju} " "{N:/message%s received with too few bytes}\n"); p(pims_rcv_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/message%s received with bad checksum}\n"); p(pims_rcv_badversion, "\t{:dropped-bad-version/%ju} " "{N:/message%s received with bad version}\n"); p(pims_rcv_registers_msgs, "\t{:received-data-register-messages/%ju} " "{N:/data register message%s received}\n"); p(pims_rcv_registers_bytes, "\t{:received-data-register-bytes/%ju} " "{N:/data register byte%s received}\n"); p(pims_rcv_registers_wrongiif, "\t" "{:received-data-register-wrong-interface/%ju} " "{N:/data register message%s received on wrong iif}\n"); p(pims_rcv_badregisters, "\t{:received-bad-registers/%ju} " "{N:/bad register%s received}\n"); p(pims_snd_registers_msgs, "\t{:sent-data-register-messages/%ju} " "{N:/data register message%s sent}\n"); p(pims_snd_registers_bytes, "\t{:sent-data-register-bytes/%ju} " "{N:/data register byte%s sent}\n"); #undef p #undef py xo_close_container(name); } /* * Pretty print an Internet address (net address + port). */ void inetprint(const char *container, struct in_addr *in, int port, const char *proto, int num_port, const int af1) { struct servent *sp = 0; char line[80], *cp; int width; size_t alen, plen; if (container) xo_open_container(container); if (Wflag) snprintf(line, sizeof(line), "%s.", inetname(in)); else snprintf(line, sizeof(line), "%.*s.", (Aflag && !num_port) ? 12 : 16, inetname(in)); alen = strlen(line); cp = line + alen; if (!num_port && port) sp = getservbyport((int)port, proto); if (sp || port == 0) snprintf(cp, sizeof(line) - alen, "%.15s ", sp ? sp->s_name : "*"); else snprintf(cp, sizeof(line) - alen, "%d ", ntohs((u_short)port)); width = (Aflag && !Wflag) ? 18 : ((!Wflag || af1 == AF_INET) ? 22 : 45); if (Wflag) xo_emit("{d:target/%-*s} ", width, line); else xo_emit("{d:target/%-*.*s} ", width, width, line); plen = strlen(cp) - 1; alen--; xo_emit("{e:address/%*.*s}{e:port/%*.*s}", alen, alen, line, plen, plen, cp); if (container) xo_close_container(container); } /* * Construct an Internet address representation. * If numeric_addr has been supplied, give * numeric value, otherwise try for symbolic name. */ char * inetname(struct in_addr *inp) { char *cp; static char line[MAXHOSTNAMELEN]; struct hostent *hp; struct netent *np; cp = 0; if (!numeric_addr && inp->s_addr != INADDR_ANY) { int net = inet_netof(*inp); int lna = inet_lnaof(*inp); if (lna == INADDR_ANY) { np = getnetbyaddr(net, AF_INET); if (np) cp = np->n_name; } if (cp == NULL) { hp = gethostbyaddr((char *)inp, sizeof (*inp), AF_INET); if (hp) { cp = hp->h_name; trimdomain(cp, strlen(cp)); } } } if (inp->s_addr == INADDR_ANY) strcpy(line, "*"); else if (cp) { strlcpy(line, cp, sizeof(line)); } else { inp->s_addr = ntohl(inp->s_addr); #define C(x) ((u_int)((x) & 0xff)) snprintf(line, sizeof(line), "%u.%u.%u.%u", C(inp->s_addr >> 24), C(inp->s_addr >> 16), C(inp->s_addr >> 8), C(inp->s_addr)); } return (line); }