diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c index 15f1e0e44545..964a70e1022e 100644 --- a/sbin/ifconfig/ifconfig.c +++ b/sbin/ifconfig/ifconfig.c @@ -1,1473 +1,1475 @@ /* * Copyright (c) 1983, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1983, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)ifconfig.c 8.2 (Berkeley) 2/16/94"; #endif static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include #include /* IP */ #include #include #include #include #include #include #include #include #include #ifdef JAIL #include #endif #include #include #include #include #include "ifconfig.h" /* * Since "struct ifreq" is composed of various union members, callers * should pay special attention to interpret the value. * (.e.g. little/big endian difference in the structure.) */ struct ifreq ifr; char name[IFNAMSIZ]; char *descr = NULL; size_t descrlen = 64; int setaddr; int setmask; int doalias; int clearaddr; int newaddr = 1; int verbose; int noload; int printifname = 0; int supmedia = 0; int printkeys = 0; /* Print keying material for interfaces. */ /* Formatter Strings */ char *f_inet, *f_inet6, *f_ether, *f_addr; static int ifconfig(int argc, char *const *argv, int iscreate, const struct afswtch *afp); static void status(const struct afswtch *afp, const struct sockaddr_dl *sdl, struct ifaddrs *ifa); static void tunnel_status(int s); static void usage(void) _Noreturn; static struct afswtch *af_getbyname(const char *name); static struct afswtch *af_getbyfamily(int af); static void af_other_status(int); void printifnamemaybe(void); static struct option *opts = NULL; struct ifa_order_elt { int if_order; int af_orders[255]; struct ifaddrs *ifa; TAILQ_ENTRY(ifa_order_elt) link; }; TAILQ_HEAD(ifa_queue, ifa_order_elt); void opt_register(struct option *p) { p->next = opts; opts = p; } static void usage(void) { char options[1024]; struct option *p; /* XXX not right but close enough for now */ options[0] = '\0'; for (p = opts; p != NULL; p = p->next) { strlcat(options, p->opt_usage, sizeof(options)); strlcat(options, " ", sizeof(options)); } fprintf(stderr, "usage: ifconfig [-f type:format] %sinterface address_family\n" " [address [dest_address]] [parameters]\n" " ifconfig interface create\n" " ifconfig -a %s[-d] [-m] [-u] [-v] [address_family]\n" " ifconfig -l [-d] [-u] [address_family]\n" " ifconfig %s[-d] [-m] [-u] [-v]\n", options, options, options); exit(1); } #define ORDERS_SIZE(x) sizeof(x) / sizeof(x[0]) static int calcorders(struct ifaddrs *ifa, struct ifa_queue *q) { struct ifaddrs *prev; struct ifa_order_elt *cur; unsigned int ord, af, ifa_ord; prev = NULL; cur = NULL; ord = 0; ifa_ord = 0; while (ifa != NULL) { if (prev == NULL || strcmp(ifa->ifa_name, prev->ifa_name) != 0) { cur = calloc(1, sizeof(*cur)); if (cur == NULL) return (-1); TAILQ_INSERT_TAIL(q, cur, link); cur->if_order = ifa_ord ++; cur->ifa = ifa; ord = 0; } if (ifa->ifa_addr) { af = ifa->ifa_addr->sa_family; if (af < ORDERS_SIZE(cur->af_orders) && cur->af_orders[af] == 0) cur->af_orders[af] = ++ord; } prev = ifa; ifa = ifa->ifa_next; } return (0); } static int cmpifaddrs(struct ifaddrs *a, struct ifaddrs *b, struct ifa_queue *q) { struct ifa_order_elt *cur, *e1, *e2; unsigned int af1, af2; int ret; e1 = e2 = NULL; ret = strcmp(a->ifa_name, b->ifa_name); if (ret != 0) { TAILQ_FOREACH(cur, q, link) { if (e1 && e2) break; if (strcmp(cur->ifa->ifa_name, a->ifa_name) == 0) e1 = cur; else if (strcmp(cur->ifa->ifa_name, b->ifa_name) == 0) e2 = cur; } if (!e1 || !e2) return (0); else return (e1->if_order - e2->if_order); } else if (a->ifa_addr != NULL && b->ifa_addr != NULL) { TAILQ_FOREACH(cur, q, link) { if (strcmp(cur->ifa->ifa_name, a->ifa_name) == 0) { e1 = cur; break; } } if (!e1) return (0); af1 = a->ifa_addr->sa_family; af2 = b->ifa_addr->sa_family; if (af1 < ORDERS_SIZE(e1->af_orders) && af2 < ORDERS_SIZE(e1->af_orders)) return (e1->af_orders[af1] - e1->af_orders[af2]); } return (0); } static void freeformat(void) { if (f_inet != NULL) free(f_inet); if (f_inet6 != NULL) free(f_inet6); if (f_ether != NULL) free(f_ether); if (f_addr != NULL) free(f_addr); } static void setformat(char *input) { char *formatstr, *category, *modifier; formatstr = strdup(input); while ((category = strsep(&formatstr, ",")) != NULL) { modifier = strchr(category, ':'); if (modifier == NULL || modifier[1] == '\0') { warnx("Skipping invalid format specification: %s\n", category); continue; } /* Split the string on the separator, then seek past it */ modifier[0] = '\0'; modifier++; if (strcmp(category, "addr") == 0) f_addr = strdup(modifier); else if (strcmp(category, "ether") == 0) f_ether = strdup(modifier); else if (strcmp(category, "inet") == 0) f_inet = strdup(modifier); else if (strcmp(category, "inet6") == 0) f_inet6 = strdup(modifier); } free(formatstr); } #undef ORDERS_SIZE static struct ifaddrs * sortifaddrs(struct ifaddrs *list, int (*compare)(struct ifaddrs *, struct ifaddrs *, struct ifa_queue *), struct ifa_queue *q) { struct ifaddrs *right, *temp, *last, *result, *next, *tail; right = list; temp = list; last = list; result = NULL; next = NULL; tail = NULL; if (!list || !list->ifa_next) return (list); while (temp && temp->ifa_next) { last = right; right = right->ifa_next; temp = temp->ifa_next->ifa_next; } last->ifa_next = NULL; list = sortifaddrs(list, compare, q); right = sortifaddrs(right, compare, q); while (list || right) { if (!right) { next = list; list = list->ifa_next; } else if (!list) { next = right; right = right->ifa_next; } else if (compare(list, right, q) <= 0) { next = list; list = list->ifa_next; } else { next = right; right = right->ifa_next; } if (!result) result = next; else tail->ifa_next = next; tail = next; } return (result); } void printifnamemaybe() { if (printifname) printf("%s\n", name); } int main(int argc, char *argv[]) { int c, all, namesonly, downonly, uponly; const struct afswtch *afp = NULL; int ifindex; struct ifaddrs *ifap, *sifap, *ifa; struct ifreq paifr; const struct sockaddr_dl *sdl; char options[1024], *cp, *envformat, *namecp = NULL; struct ifa_queue q = TAILQ_HEAD_INITIALIZER(q); struct ifa_order_elt *cur, *tmp; const char *ifname; struct option *p; size_t iflen; all = downonly = uponly = namesonly = noload = verbose = 0; f_inet = f_inet6 = f_ether = f_addr = NULL; envformat = getenv("IFCONFIG_FORMAT"); if (envformat != NULL) setformat(envformat); /* * Ensure we print interface name when expected to, * even if we terminate early due to error. */ atexit(printifnamemaybe); /* Parse leading line options */ strlcpy(options, "f:adklmnuv", sizeof(options)); for (p = opts; p != NULL; p = p->next) strlcat(options, p->opt, sizeof(options)); while ((c = getopt(argc, argv, options)) != -1) { switch (c) { case 'a': /* scan all interfaces */ all++; break; case 'd': /* restrict scan to "down" interfaces */ downonly++; break; case 'f': if (optarg == NULL) usage(); setformat(optarg); break; case 'k': printkeys++; break; case 'l': /* scan interface names only */ namesonly++; break; case 'm': /* show media choices in status */ supmedia = 1; break; case 'n': /* suppress module loading */ noload++; break; case 'u': /* restrict scan to "up" interfaces */ uponly++; break; case 'v': verbose++; break; default: for (p = opts; p != NULL; p = p->next) if (p->opt[0] == c) { p->cb(optarg); break; } if (p == NULL) usage(); break; } } argc -= optind; argv += optind; /* -l cannot be used with -a or -m */ if (namesonly && (all || supmedia)) usage(); /* nonsense.. */ if (uponly && downonly) usage(); /* no arguments is equivalent to '-a' */ if (!namesonly && argc < 1) all = 1; /* -a and -l allow an address family arg to limit the output */ if (all || namesonly) { if (argc > 1) usage(); ifname = NULL; ifindex = 0; if (argc == 1) { afp = af_getbyname(*argv); if (afp == NULL) { warnx("Address family '%s' unknown.", *argv); usage(); } if (afp->af_name != NULL) argc--, argv++; /* leave with afp non-zero */ } } else { /* not listing, need an argument */ if (argc < 1) usage(); ifname = *argv; argc--, argv++; /* check and maybe load support for this interface */ ifmaybeload(ifname); ifindex = if_nametoindex(ifname); if (ifindex == 0) { /* * NOTE: We must special-case the `create' command * right here as we would otherwise fail when trying * to find the interface. */ if (argc > 0 && (strcmp(argv[0], "create") == 0 || strcmp(argv[0], "plumb") == 0)) { iflen = strlcpy(name, ifname, sizeof(name)); if (iflen >= sizeof(name)) errx(1, "%s: cloning name too long", ifname); ifconfig(argc, argv, 1, NULL); exit(0); } #ifdef JAIL /* * NOTE: We have to special-case the `-vnet' command * right here as we would otherwise fail when trying * to find the interface as it lives in another vnet. */ if (argc > 0 && (strcmp(argv[0], "-vnet") == 0)) { iflen = strlcpy(name, ifname, sizeof(name)); if (iflen >= sizeof(name)) errx(1, "%s: interface name too long", ifname); ifconfig(argc, argv, 0, NULL); exit(0); } #endif errx(1, "interface %s does not exist", ifname); } } /* Check for address family */ if (argc > 0) { afp = af_getbyname(*argv); if (afp != NULL) argc--, argv++; } if (getifaddrs(&ifap) != 0) err(EXIT_FAILURE, "getifaddrs"); cp = NULL; if (calcorders(ifap, &q) != 0) err(EXIT_FAILURE, "calcorders"); sifap = sortifaddrs(ifap, cmpifaddrs, &q); TAILQ_FOREACH_SAFE(cur, &q, link, tmp) free(cur); ifindex = 0; for (ifa = sifap; ifa; ifa = ifa->ifa_next) { memset(&paifr, 0, sizeof(paifr)); strlcpy(paifr.ifr_name, ifa->ifa_name, sizeof(paifr.ifr_name)); if (sizeof(paifr.ifr_addr) >= ifa->ifa_addr->sa_len) { memcpy(&paifr.ifr_addr, ifa->ifa_addr, ifa->ifa_addr->sa_len); } if (ifname != NULL && strcmp(ifname, ifa->ifa_name) != 0) continue; if (ifa->ifa_addr->sa_family == AF_LINK) sdl = (const struct sockaddr_dl *) ifa->ifa_addr; else sdl = NULL; if (cp != NULL && strcmp(cp, ifa->ifa_name) == 0 && !namesonly) continue; iflen = strlcpy(name, ifa->ifa_name, sizeof(name)); if (iflen >= sizeof(name)) { warnx("%s: interface name too long, skipping", ifa->ifa_name); continue; } cp = ifa->ifa_name; if ((ifa->ifa_flags & IFF_CANTCONFIG) != 0) continue; if (downonly && (ifa->ifa_flags & IFF_UP) != 0) continue; if (uponly && (ifa->ifa_flags & IFF_UP) == 0) continue; /* * Are we just listing the interfaces? */ if (namesonly) { if (namecp == cp) continue; if (afp != NULL) { /* special case for "ether" address family */ if (!strcmp(afp->af_name, "ether")) { if (sdl == NULL || (sdl->sdl_type != IFT_ETHER && sdl->sdl_type != IFT_L2VLAN && sdl->sdl_type != IFT_BRIDGE) || sdl->sdl_alen != ETHER_ADDR_LEN) continue; } else { if (ifa->ifa_addr->sa_family != afp->af_af) continue; } } namecp = cp; ifindex++; if (ifindex > 1) printf(" "); fputs(name, stdout); continue; } ifindex++; if (argc > 0) ifconfig(argc, argv, 0, afp); else status(afp, sdl, ifa); } if (namesonly) printf("\n"); freeifaddrs(ifap); freeformat(); exit(0); } static struct afswtch *afs = NULL; void af_register(struct afswtch *p) { p->af_next = afs; afs = p; } static struct afswtch * af_getbyname(const char *name) { struct afswtch *afp; for (afp = afs; afp != NULL; afp = afp->af_next) if (strcmp(afp->af_name, name) == 0) return afp; return NULL; } static struct afswtch * af_getbyfamily(int af) { struct afswtch *afp; for (afp = afs; afp != NULL; afp = afp->af_next) if (afp->af_af == af) return afp; return NULL; } static void af_other_status(int s) { struct afswtch *afp; uint8_t afmask[howmany(AF_MAX, NBBY)]; memset(afmask, 0, sizeof(afmask)); for (afp = afs; afp != NULL; afp = afp->af_next) { if (afp->af_other_status == NULL) continue; if (afp->af_af != AF_UNSPEC && isset(afmask, afp->af_af)) continue; afp->af_other_status(s); setbit(afmask, afp->af_af); } } static void af_all_tunnel_status(int s) { struct afswtch *afp; uint8_t afmask[howmany(AF_MAX, NBBY)]; memset(afmask, 0, sizeof(afmask)); for (afp = afs; afp != NULL; afp = afp->af_next) { if (afp->af_status_tunnel == NULL) continue; if (afp->af_af != AF_UNSPEC && isset(afmask, afp->af_af)) continue; afp->af_status_tunnel(s); setbit(afmask, afp->af_af); } } static struct cmd *cmds = NULL; void cmd_register(struct cmd *p) { p->c_next = cmds; cmds = p; } static const struct cmd * cmd_lookup(const char *name, int iscreate) { const struct cmd *p; for (p = cmds; p != NULL; p = p->c_next) if (strcmp(name, p->c_name) == 0) { if (iscreate) { if (p->c_iscloneop) return p; } else { if (!p->c_iscloneop) return p; } } return NULL; } struct callback { callback_func *cb_func; void *cb_arg; struct callback *cb_next; }; static struct callback *callbacks = NULL; void callback_register(callback_func *func, void *arg) { struct callback *cb; cb = malloc(sizeof(struct callback)); if (cb == NULL) errx(1, "unable to allocate memory for callback"); cb->cb_func = func; cb->cb_arg = arg; cb->cb_next = callbacks; callbacks = cb; } /* specially-handled commands */ static void setifaddr(const char *, int, int, const struct afswtch *); static const struct cmd setifaddr_cmd = DEF_CMD("ifaddr", 0, setifaddr); static void setifdstaddr(const char *, int, int, const struct afswtch *); static const struct cmd setifdstaddr_cmd = DEF_CMD("ifdstaddr", 0, setifdstaddr); static int ifconfig(int argc, char *const *argv, int iscreate, const struct afswtch *uafp) { const struct afswtch *afp, *nafp; const struct cmd *p; struct callback *cb; int s; strlcpy(ifr.ifr_name, name, sizeof ifr.ifr_name); afp = NULL; if (uafp != NULL) afp = uafp; /* * This is the historical "accident" allowing users to configure IPv4 * addresses without the "inet" keyword which while a nice feature has * proven to complicate other things. We cannot remove this but only * make sure we will never have a similar implicit default for IPv6 or * any other address familiy. We need a fallback though for * ifconfig IF up/down etc. to work without INET support as people * never used ifconfig IF link up/down, etc. either. */ #ifndef RESCUE #ifdef INET if (afp == NULL && feature_present("inet")) afp = af_getbyname("inet"); #endif #endif if (afp == NULL) afp = af_getbyname("link"); if (afp == NULL) { warnx("Please specify an address_family."); usage(); } top: ifr.ifr_addr.sa_family = afp->af_af == AF_LINK || afp->af_af == AF_UNSPEC ? AF_LOCAL : afp->af_af; if ((s = socket(ifr.ifr_addr.sa_family, SOCK_DGRAM, 0)) < 0 && (uafp != NULL || errno != EAFNOSUPPORT || (s = socket(AF_LOCAL, SOCK_DGRAM, 0)) < 0)) err(1, "socket(family %u,SOCK_DGRAM)", ifr.ifr_addr.sa_family); while (argc > 0) { p = cmd_lookup(*argv, iscreate); if (iscreate && p == NULL) { /* * Push the clone create callback so the new * device is created and can be used for any * remaining arguments. */ cb = callbacks; if (cb == NULL) errx(1, "internal error, no callback"); callbacks = cb->cb_next; cb->cb_func(s, cb->cb_arg); iscreate = 0; /* * Handle any address family spec that * immediately follows and potentially * recreate the socket. */ nafp = af_getbyname(*argv); if (nafp != NULL) { argc--, argv++; if (nafp != afp) { close(s); afp = nafp; goto top; } } /* * Look for a normal parameter. */ continue; } if (p == NULL) { /* * Not a recognized command, choose between setting * the interface address and the dst address. */ p = (setaddr ? &setifdstaddr_cmd : &setifaddr_cmd); } if (p->c_parameter == NEXTARG && p->c_u.c_func) { if (argv[1] == NULL) errx(1, "'%s' requires argument", p->c_name); p->c_u.c_func(argv[1], 0, s, afp); argc--, argv++; } else if (p->c_parameter == OPTARG && p->c_u.c_func) { p->c_u.c_func(argv[1], 0, s, afp); if (argv[1] != NULL) argc--, argv++; } else if (p->c_parameter == NEXTARG2 && p->c_u.c_func2) { if (argc < 3) errx(1, "'%s' requires 2 arguments", p->c_name); p->c_u.c_func2(argv[1], argv[2], s, afp); argc -= 2, argv += 2; } else if (p->c_u.c_func) p->c_u.c_func(*argv, p->c_parameter, s, afp); argc--, argv++; } /* * Do any post argument processing required by the address family. */ if (afp->af_postproc != NULL) afp->af_postproc(s, afp); /* * Do deferred callbacks registered while processing * command-line arguments. */ for (cb = callbacks; cb != NULL; cb = cb->cb_next) cb->cb_func(s, cb->cb_arg); /* * Do deferred operations. */ if (clearaddr) { if (afp->af_ridreq == NULL || afp->af_difaddr == 0) { warnx("interface %s cannot change %s addresses!", name, afp->af_name); clearaddr = 0; } } if (clearaddr) { int ret; strlcpy(((struct ifreq *)afp->af_ridreq)->ifr_name, name, sizeof ifr.ifr_name); ret = ioctl(s, afp->af_difaddr, afp->af_ridreq); if (ret < 0) { if (errno == EADDRNOTAVAIL && (doalias >= 0)) { /* means no previous address for interface */ } else Perror("ioctl (SIOCDIFADDR)"); } } if (newaddr) { if (afp->af_addreq == NULL || afp->af_aifaddr == 0) { warnx("interface %s cannot change %s addresses!", name, afp->af_name); newaddr = 0; } } if (newaddr && (setaddr || setmask)) { strlcpy(((struct ifreq *)afp->af_addreq)->ifr_name, name, sizeof ifr.ifr_name); if (ioctl(s, afp->af_aifaddr, afp->af_addreq) < 0) Perror("ioctl (SIOCAIFADDR)"); } close(s); return(0); } /*ARGSUSED*/ static void setifaddr(const char *addr, int param, int s, const struct afswtch *afp) { if (afp->af_getaddr == NULL) return; /* * Delay the ioctl to set the interface addr until flags are all set. * The address interpretation may depend on the flags, * and the flags may change when the address is set. */ setaddr++; if (doalias == 0 && afp->af_af != AF_LINK) clearaddr = 1; afp->af_getaddr(addr, (doalias >= 0 ? ADDR : RIDADDR)); } static void settunnel(const char *src, const char *dst, int s, const struct afswtch *afp) { struct addrinfo *srcres, *dstres; int ecode; if (afp->af_settunnel == NULL) { warn("address family %s does not support tunnel setup", afp->af_name); return; } if ((ecode = getaddrinfo(src, NULL, NULL, &srcres)) != 0) errx(1, "error in parsing address string: %s", gai_strerror(ecode)); if ((ecode = getaddrinfo(dst, NULL, NULL, &dstres)) != 0) errx(1, "error in parsing address string: %s", gai_strerror(ecode)); if (srcres->ai_addr->sa_family != dstres->ai_addr->sa_family) errx(1, "source and destination address families do not match"); afp->af_settunnel(s, srcres, dstres); freeaddrinfo(srcres); freeaddrinfo(dstres); } /* ARGSUSED */ static void deletetunnel(const char *vname, int param, int s, const struct afswtch *afp) { if (ioctl(s, SIOCDIFPHYADDR, &ifr) < 0) err(1, "SIOCDIFPHYADDR"); } #ifdef JAIL static void setifvnet(const char *jname, int dummy __unused, int s, const struct afswtch *afp) { struct ifreq my_ifr; memcpy(&my_ifr, &ifr, sizeof(my_ifr)); my_ifr.ifr_jid = jail_getid(jname); if (my_ifr.ifr_jid < 0) errx(1, "%s", jail_errmsg); if (ioctl(s, SIOCSIFVNET, &my_ifr) < 0) err(1, "SIOCSIFVNET"); } static void setifrvnet(const char *jname, int dummy __unused, int s, const struct afswtch *afp) { struct ifreq my_ifr; memcpy(&my_ifr, &ifr, sizeof(my_ifr)); my_ifr.ifr_jid = jail_getid(jname); if (my_ifr.ifr_jid < 0) errx(1, "%s", jail_errmsg); if (ioctl(s, SIOCSIFRVNET, &my_ifr) < 0) err(1, "SIOCSIFRVNET(%d, %s)", my_ifr.ifr_jid, my_ifr.ifr_name); } #endif static void setifnetmask(const char *addr, int dummy __unused, int s, const struct afswtch *afp) { if (afp->af_getaddr != NULL) { setmask++; afp->af_getaddr(addr, MASK); } } static void setifbroadaddr(const char *addr, int dummy __unused, int s, const struct afswtch *afp) { if (afp->af_getaddr != NULL) afp->af_getaddr(addr, DSTADDR); } static void notealias(const char *addr, int param, int s, const struct afswtch *afp) { #define rqtosa(x) (&(((struct ifreq *)(afp->x))->ifr_addr)) if (setaddr && doalias == 0 && param < 0) if (afp->af_addreq != NULL && afp->af_ridreq != NULL) bcopy((caddr_t)rqtosa(af_addreq), (caddr_t)rqtosa(af_ridreq), rqtosa(af_addreq)->sa_len); doalias = param; if (param < 0) { clearaddr = 1; newaddr = 0; } else clearaddr = 0; #undef rqtosa } /*ARGSUSED*/ static void setifdstaddr(const char *addr, int param __unused, int s, const struct afswtch *afp) { if (afp->af_getaddr != NULL) afp->af_getaddr(addr, DSTADDR); } /* * Note: doing an SIOCIGIFFLAGS scribbles on the union portion * of the ifreq structure, which may confuse other parts of ifconfig. * Make a private copy so we can avoid that. */ static void setifflags(const char *vname, int value, int s, const struct afswtch *afp) { struct ifreq my_ifr; int flags; memset(&my_ifr, 0, sizeof(my_ifr)); (void) strlcpy(my_ifr.ifr_name, name, sizeof(my_ifr.ifr_name)); if (ioctl(s, SIOCGIFFLAGS, (caddr_t)&my_ifr) < 0) { Perror("ioctl (SIOCGIFFLAGS)"); exit(1); } flags = (my_ifr.ifr_flags & 0xffff) | (my_ifr.ifr_flagshigh << 16); if (value < 0) { value = -value; flags &= ~value; } else flags |= value; my_ifr.ifr_flags = flags & 0xffff; my_ifr.ifr_flagshigh = flags >> 16; if (ioctl(s, SIOCSIFFLAGS, (caddr_t)&my_ifr) < 0) Perror(vname); } void setifcap(const char *vname, int value, int s, const struct afswtch *afp) { int flags; if (ioctl(s, SIOCGIFCAP, (caddr_t)&ifr) < 0) { Perror("ioctl (SIOCGIFCAP)"); exit(1); } flags = ifr.ifr_curcap; if (value < 0) { value = -value; flags &= ~value; } else flags |= value; flags &= ifr.ifr_reqcap; ifr.ifr_reqcap = flags; if (ioctl(s, SIOCSIFCAP, (caddr_t)&ifr) < 0) Perror(vname); } static void setifmetric(const char *val, int dummy __unused, int s, const struct afswtch *afp) { strlcpy(ifr.ifr_name, name, sizeof (ifr.ifr_name)); ifr.ifr_metric = atoi(val); if (ioctl(s, SIOCSIFMETRIC, (caddr_t)&ifr) < 0) err(1, "ioctl SIOCSIFMETRIC (set metric)"); } static void setifmtu(const char *val, int dummy __unused, int s, const struct afswtch *afp) { strlcpy(ifr.ifr_name, name, sizeof (ifr.ifr_name)); ifr.ifr_mtu = atoi(val); if (ioctl(s, SIOCSIFMTU, (caddr_t)&ifr) < 0) err(1, "ioctl SIOCSIFMTU (set mtu)"); } static void setifname(const char *val, int dummy __unused, int s, const struct afswtch *afp) { char *newname; strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); newname = strdup(val); if (newname == NULL) err(1, "no memory to set ifname"); ifr.ifr_data = newname; if (ioctl(s, SIOCSIFNAME, (caddr_t)&ifr) < 0) { free(newname); err(1, "ioctl SIOCSIFNAME (set name)"); } printifname = 1; strlcpy(name, newname, sizeof(name)); free(newname); } /* ARGSUSED */ static void setifdescr(const char *val, int dummy __unused, int s, const struct afswtch *afp) { char *newdescr; strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); ifr.ifr_buffer.length = strlen(val) + 1; if (ifr.ifr_buffer.length == 1) { ifr.ifr_buffer.buffer = newdescr = NULL; ifr.ifr_buffer.length = 0; } else { newdescr = strdup(val); ifr.ifr_buffer.buffer = newdescr; if (newdescr == NULL) { warn("no memory to set ifdescr"); return; } } if (ioctl(s, SIOCSIFDESCR, (caddr_t)&ifr) < 0) err(1, "ioctl SIOCSIFDESCR (set descr)"); free(newdescr); } /* ARGSUSED */ static void unsetifdescr(const char *val, int value, int s, const struct afswtch *afp) { setifdescr("", 0, s, 0); } #define IFFBITS \ "\020\1UP\2BROADCAST\3DEBUG\4LOOPBACK\5POINTOPOINT\7RUNNING" \ "\10NOARP\11PROMISC\12ALLMULTI\13OACTIVE\14SIMPLEX\15LINK0\16LINK1\17LINK2" \ "\20MULTICAST\22PPROMISC\23MONITOR\24STATICARP" #define IFCAPBITS \ "\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \ "\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \ "\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \ -"\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT" +"\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT\32HWRXTSTMP" /* * Print the status of the interface. If an address family was * specified, show only it; otherwise, show them all. */ static void status(const struct afswtch *afp, const struct sockaddr_dl *sdl, struct ifaddrs *ifa) { struct ifaddrs *ift; int allfamilies, s; struct ifstat ifs; if (afp == NULL) { allfamilies = 1; ifr.ifr_addr.sa_family = AF_LOCAL; } else { allfamilies = 0; ifr.ifr_addr.sa_family = afp->af_af == AF_LINK ? AF_LOCAL : afp->af_af; } strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); s = socket(ifr.ifr_addr.sa_family, SOCK_DGRAM, 0); if (s < 0) err(1, "socket(family %u,SOCK_DGRAM)", ifr.ifr_addr.sa_family); printf("%s: ", name); printb("flags", ifa->ifa_flags, IFFBITS); if (ioctl(s, SIOCGIFMETRIC, &ifr) != -1) printf(" metric %d", ifr.ifr_metric); if (ioctl(s, SIOCGIFMTU, &ifr) != -1) printf(" mtu %d", ifr.ifr_mtu); putchar('\n'); for (;;) { if ((descr = reallocf(descr, descrlen)) != NULL) { ifr.ifr_buffer.buffer = descr; ifr.ifr_buffer.length = descrlen; if (ioctl(s, SIOCGIFDESCR, &ifr) == 0) { if (ifr.ifr_buffer.buffer == descr) { if (strlen(descr) > 0) printf("\tdescription: %s\n", descr); } else if (ifr.ifr_buffer.length > descrlen) { descrlen = ifr.ifr_buffer.length; continue; } } } else warn("unable to allocate memory for interface" "description"); break; } if (ioctl(s, SIOCGIFCAP, (caddr_t)&ifr) == 0) { if (ifr.ifr_curcap != 0) { printb("\toptions", ifr.ifr_curcap, IFCAPBITS); putchar('\n'); } if (supmedia && ifr.ifr_reqcap != 0) { printb("\tcapabilities", ifr.ifr_reqcap, IFCAPBITS); putchar('\n'); } } tunnel_status(s); for (ift = ifa; ift != NULL; ift = ift->ifa_next) { if (ift->ifa_addr == NULL) continue; if (strcmp(ifa->ifa_name, ift->ifa_name) != 0) continue; if (allfamilies) { const struct afswtch *p; p = af_getbyfamily(ift->ifa_addr->sa_family); if (p != NULL && p->af_status != NULL) p->af_status(s, ift); } else if (afp->af_af == ift->ifa_addr->sa_family) afp->af_status(s, ift); } #if 0 if (allfamilies || afp->af_af == AF_LINK) { const struct afswtch *lafp; /* * Hack; the link level address is received separately * from the routing information so any address is not * handled above. Cobble together an entry and invoke * the status method specially. */ lafp = af_getbyname("lladdr"); if (lafp != NULL) { info.rti_info[RTAX_IFA] = (struct sockaddr *)sdl; lafp->af_status(s, &info); } } #endif if (allfamilies) af_other_status(s); else if (afp->af_other_status != NULL) afp->af_other_status(s); strlcpy(ifs.ifs_name, name, sizeof ifs.ifs_name); if (ioctl(s, SIOCGIFSTATUS, &ifs) == 0) printf("%s", ifs.ascii); if (verbose > 0) sfp_status(s, &ifr, verbose); close(s); return; } static void tunnel_status(int s) { af_all_tunnel_status(s); } void Perror(const char *cmd) { switch (errno) { case ENXIO: errx(1, "%s: no such interface", cmd); break; case EPERM: errx(1, "%s: permission denied", cmd); break; default: err(1, "%s", cmd); } } /* * Print a value a la the %b format of the kernel's printf */ void printb(const char *s, unsigned v, const char *bits) { int i, any = 0; char c; if (bits && *bits == 8) printf("%s=%o", s, v); else printf("%s=%x", s, v); if (bits) { bits++; putchar('<'); while ((i = *bits++) != '\0') { if (v & (1 << (i-1))) { if (any) putchar(','); any = 1; for (; (c = *bits) > 32; bits++) putchar(c); } else for (; *bits > 32; bits++) ; } putchar('>'); } } void print_vhid(const struct ifaddrs *ifa, const char *s) { struct if_data *ifd; if (ifa->ifa_data == NULL) return; ifd = ifa->ifa_data; if (ifd->ifi_vhid == 0) return; printf("vhid %d ", ifd->ifi_vhid); } void ifmaybeload(const char *name) { #define MOD_PREFIX_LEN 3 /* "if_" */ struct module_stat mstat; int fileid, modid; char ifkind[IFNAMSIZ + MOD_PREFIX_LEN], ifname[IFNAMSIZ], *dp; const char *cp; /* loading suppressed by the user */ if (noload) return; /* trim the interface number off the end */ strlcpy(ifname, name, sizeof(ifname)); for (dp = ifname; *dp != 0; dp++) if (isdigit(*dp)) { *dp = 0; break; } /* turn interface and unit into module name */ strlcpy(ifkind, "if_", sizeof(ifkind)); strlcat(ifkind, ifname, sizeof(ifkind)); /* scan files in kernel */ mstat.version = sizeof(struct module_stat); for (fileid = kldnext(0); fileid > 0; fileid = kldnext(fileid)) { /* scan modules in file */ for (modid = kldfirstmod(fileid); modid > 0; modid = modfnext(modid)) { if (modstat(modid, &mstat) < 0) continue; /* strip bus name if present */ if ((cp = strchr(mstat.name, '/')) != NULL) { cp++; } else { cp = mstat.name; } /* already loaded? */ if (strcmp(ifname, cp) == 0 || strcmp(ifkind, cp) == 0) return; } } /* * Try to load the module. But ignore failures, because ifconfig can't * infer the names of all drivers (eg mlx4en(4)). */ (void) kldload(ifkind); } static struct cmd basic_cmds[] = { DEF_CMD("up", IFF_UP, setifflags), DEF_CMD("down", -IFF_UP, setifflags), DEF_CMD("arp", -IFF_NOARP, setifflags), DEF_CMD("-arp", IFF_NOARP, setifflags), DEF_CMD("debug", IFF_DEBUG, setifflags), DEF_CMD("-debug", -IFF_DEBUG, setifflags), DEF_CMD_ARG("description", setifdescr), DEF_CMD_ARG("descr", setifdescr), DEF_CMD("-description", 0, unsetifdescr), DEF_CMD("-descr", 0, unsetifdescr), DEF_CMD("promisc", IFF_PPROMISC, setifflags), DEF_CMD("-promisc", -IFF_PPROMISC, setifflags), DEF_CMD("add", IFF_UP, notealias), DEF_CMD("alias", IFF_UP, notealias), DEF_CMD("-alias", -IFF_UP, notealias), DEF_CMD("delete", -IFF_UP, notealias), DEF_CMD("remove", -IFF_UP, notealias), #ifdef notdef #define EN_SWABIPS 0x1000 DEF_CMD("swabips", EN_SWABIPS, setifflags), DEF_CMD("-swabips", -EN_SWABIPS, setifflags), #endif DEF_CMD_ARG("netmask", setifnetmask), DEF_CMD_ARG("metric", setifmetric), DEF_CMD_ARG("broadcast", setifbroadaddr), DEF_CMD_ARG2("tunnel", settunnel), DEF_CMD("-tunnel", 0, deletetunnel), DEF_CMD("deletetunnel", 0, deletetunnel), #ifdef JAIL DEF_CMD_ARG("vnet", setifvnet), DEF_CMD_ARG("-vnet", setifrvnet), #endif DEF_CMD("link0", IFF_LINK0, setifflags), DEF_CMD("-link0", -IFF_LINK0, setifflags), DEF_CMD("link1", IFF_LINK1, setifflags), DEF_CMD("-link1", -IFF_LINK1, setifflags), DEF_CMD("link2", IFF_LINK2, setifflags), DEF_CMD("-link2", -IFF_LINK2, setifflags), DEF_CMD("monitor", IFF_MONITOR, setifflags), DEF_CMD("-monitor", -IFF_MONITOR, setifflags), DEF_CMD("staticarp", IFF_STATICARP, setifflags), DEF_CMD("-staticarp", -IFF_STATICARP, setifflags), DEF_CMD("rxcsum6", IFCAP_RXCSUM_IPV6, setifcap), DEF_CMD("-rxcsum6", -IFCAP_RXCSUM_IPV6, setifcap), DEF_CMD("txcsum6", IFCAP_TXCSUM_IPV6, setifcap), DEF_CMD("-txcsum6", -IFCAP_TXCSUM_IPV6, setifcap), DEF_CMD("rxcsum", IFCAP_RXCSUM, setifcap), DEF_CMD("-rxcsum", -IFCAP_RXCSUM, setifcap), DEF_CMD("txcsum", IFCAP_TXCSUM, setifcap), DEF_CMD("-txcsum", -IFCAP_TXCSUM, setifcap), DEF_CMD("netcons", IFCAP_NETCONS, setifcap), DEF_CMD("-netcons", -IFCAP_NETCONS, setifcap), DEF_CMD("polling", IFCAP_POLLING, setifcap), DEF_CMD("-polling", -IFCAP_POLLING, setifcap), DEF_CMD("tso6", IFCAP_TSO6, setifcap), DEF_CMD("-tso6", -IFCAP_TSO6, setifcap), DEF_CMD("tso4", IFCAP_TSO4, setifcap), DEF_CMD("-tso4", -IFCAP_TSO4, setifcap), DEF_CMD("tso", IFCAP_TSO, setifcap), DEF_CMD("-tso", -IFCAP_TSO, setifcap), DEF_CMD("toe", IFCAP_TOE, setifcap), DEF_CMD("-toe", -IFCAP_TOE, setifcap), DEF_CMD("lro", IFCAP_LRO, setifcap), DEF_CMD("-lro", -IFCAP_LRO, setifcap), DEF_CMD("wol", IFCAP_WOL, setifcap), DEF_CMD("-wol", -IFCAP_WOL, setifcap), DEF_CMD("wol_ucast", IFCAP_WOL_UCAST, setifcap), DEF_CMD("-wol_ucast", -IFCAP_WOL_UCAST, setifcap), DEF_CMD("wol_mcast", IFCAP_WOL_MCAST, setifcap), DEF_CMD("-wol_mcast", -IFCAP_WOL_MCAST, setifcap), DEF_CMD("wol_magic", IFCAP_WOL_MAGIC, setifcap), DEF_CMD("-wol_magic", -IFCAP_WOL_MAGIC, setifcap), DEF_CMD("txrtlmt", IFCAP_TXRTLMT, setifcap), DEF_CMD("-txrtlmt", -IFCAP_TXRTLMT, setifcap), + DEF_CMD("hwrxtsmp", IFCAP_HWRXTSTMP, setifcap), + DEF_CMD("-hwrxtsmp", -IFCAP_HWRXTSTMP, setifcap), DEF_CMD("normal", -IFF_LINK0, setifflags), DEF_CMD("compress", IFF_LINK0, setifflags), DEF_CMD("noicmp", IFF_LINK1, setifflags), DEF_CMD_ARG("mtu", setifmtu), DEF_CMD_ARG("name", setifname), }; static __constructor void ifconfig_ctor(void) { size_t i; for (i = 0; i < nitems(basic_cmds); i++) cmd_register(&basic_cmds[i]); } diff --git a/sys/net/if.h b/sys/net/if.h index 91a6786d4fec..60b83fcb9a9e 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -1,587 +1,588 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_H_ #define _NET_IF_H_ #include #if __BSD_VISIBLE /* * does not depend on on most other systems. This * helps userland compatibility. (struct timeval ifi_lastchange) * The same holds for . (struct sockaddr ifru_addr) */ #ifndef _KERNEL #include #include #endif #endif /* * Length of interface external name, including terminating '\0'. * Note: this is the same size as a generic device's external name. */ #define IF_NAMESIZE 16 #if __BSD_VISIBLE #define IFNAMSIZ IF_NAMESIZE #define IF_MAXUNIT 0x7fff /* historical value */ #endif #if __BSD_VISIBLE /* * Structure used to query names of interface cloners. */ struct if_clonereq { int ifcr_total; /* total cloners (out) */ int ifcr_count; /* room for this many in user buffer */ char *ifcr_buffer; /* buffer for cloner names */ }; /* * Structure describing information about an interface * which may be of interest to management entities. */ struct if_data { /* generic interface information */ uint8_t ifi_type; /* ethernet, tokenring, etc */ uint8_t ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ uint8_t ifi_addrlen; /* media address length */ uint8_t ifi_hdrlen; /* media header length */ uint8_t ifi_link_state; /* current link state */ uint8_t ifi_vhid; /* carp vhid */ uint16_t ifi_datalen; /* length of this data struct */ uint32_t ifi_mtu; /* maximum transmission unit */ uint32_t ifi_metric; /* routing metric (external only) */ uint64_t ifi_baudrate; /* linespeed */ /* volatile statistics */ uint64_t ifi_ipackets; /* packets received on interface */ uint64_t ifi_ierrors; /* input errors on interface */ uint64_t ifi_opackets; /* packets sent on interface */ uint64_t ifi_oerrors; /* output errors on interface */ uint64_t ifi_collisions; /* collisions on csma interfaces */ uint64_t ifi_ibytes; /* total number of octets received */ uint64_t ifi_obytes; /* total number of octets sent */ uint64_t ifi_imcasts; /* packets received via multicast */ uint64_t ifi_omcasts; /* packets sent via multicast */ uint64_t ifi_iqdrops; /* dropped on input */ uint64_t ifi_oqdrops; /* dropped on output */ uint64_t ifi_noproto; /* destined for unsupported protocol */ uint64_t ifi_hwassist; /* HW offload capabilities, see IFCAP */ /* Unions are here to make sizes MI. */ union { /* uptime at attach or stat reset */ time_t tt; uint64_t ph; } __ifi_epoch; #define ifi_epoch __ifi_epoch.tt union { /* time of last administrative change */ struct timeval tv; struct { uint64_t ph1; uint64_t ph2; } ph; } __ifi_lastchange; #define ifi_lastchange __ifi_lastchange.tv }; /*- * Interface flags are of two types: network stack owned flags, and driver * owned flags. Historically, these values were stored in the same ifnet * flags field, but with the advent of fine-grained locking, they have been * broken out such that the network stack is responsible for synchronizing * the stack-owned fields, and the device driver the device-owned fields. * Both halves can perform lockless reads of the other half's field, subject * to accepting the involved races. * * Both sets of flags come from the same number space, and should not be * permitted to conflict, as they are exposed to user space via a single * field. * * The following symbols identify read and write requirements for fields: * * (i) if_flags field set by device driver before attach, read-only there * after. * (n) if_flags field written only by the network stack, read by either the * stack or driver. * (d) if_drv_flags field written only by the device driver, read by either * the stack or driver. */ #define IFF_UP 0x1 /* (n) interface is up */ #define IFF_BROADCAST 0x2 /* (i) broadcast address valid */ #define IFF_DEBUG 0x4 /* (n) turn on debugging */ #define IFF_LOOPBACK 0x8 /* (i) is a loopback net */ #define IFF_POINTOPOINT 0x10 /* (i) is a point-to-point link */ /* 0x20 was IFF_SMART */ #define IFF_DRV_RUNNING 0x40 /* (d) resources allocated */ #define IFF_NOARP 0x80 /* (n) no address resolution protocol */ #define IFF_PROMISC 0x100 /* (n) receive all packets */ #define IFF_ALLMULTI 0x200 /* (n) receive all multicast packets */ #define IFF_DRV_OACTIVE 0x400 /* (d) tx hardware queue is full */ #define IFF_SIMPLEX 0x800 /* (i) can't hear own transmissions */ #define IFF_LINK0 0x1000 /* per link layer defined bit */ #define IFF_LINK1 0x2000 /* per link layer defined bit */ #define IFF_LINK2 0x4000 /* per link layer defined bit */ #define IFF_ALTPHYS IFF_LINK2 /* use alternate physical connection */ #define IFF_MULTICAST 0x8000 /* (i) supports multicast */ #define IFF_CANTCONFIG 0x10000 /* (i) unconfigurable using ioctl(2) */ #define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */ #define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */ #define IFF_STATICARP 0x80000 /* (n) static ARP */ #define IFF_DYING 0x200000 /* (n) interface is winding down */ #define IFF_RENAMING 0x400000 /* (n) interface is being renamed */ /* * Old names for driver flags so that user space tools can continue to use * the old (portable) names. */ #ifndef _KERNEL #define IFF_RUNNING IFF_DRV_RUNNING #define IFF_OACTIVE IFF_DRV_OACTIVE #endif /* flags set internally only: */ #define IFF_CANTCHANGE \ (IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\ IFF_DYING|IFF_CANTCONFIG) /* * Values for if_link_state. */ #define LINK_STATE_UNKNOWN 0 /* link invalid/unknown */ #define LINK_STATE_DOWN 1 /* link is down */ #define LINK_STATE_UP 2 /* link is up */ /* * Some convenience macros used for setting ifi_baudrate. * XXX 1000 vs. 1024? --thorpej@netbsd.org */ #define IF_Kbps(x) ((uintmax_t)(x) * 1000) /* kilobits/sec. */ #define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */ #define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */ /* * Capabilities that interfaces can advertise. * * struct ifnet.if_capabilities * contains the optional features & capabilities a particular interface * supports (not only the driver but also the detected hw revision). * Capabilities are defined by IFCAP_* below. * struct ifnet.if_capenable * contains the enabled (either by default or through ifconfig) optional * features & capabilities on this interface. * Capabilities are defined by IFCAP_* below. * struct if_data.ifi_hwassist in mbuf CSUM_ flag form, controlled by above * contains the enabled optional feature & capabilites that can be used * individually per packet and are specified in the mbuf pkthdr.csum_flags * field. IFCAP_* and CSUM_* do not match one to one and CSUM_* may be * more detailed or differenciated than IFCAP_*. * Hwassist features are defined CSUM_* in sys/mbuf.h * * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE. * This is not strictly necessary because the common code never * changes capabilities, and it is left to the individual driver * to do the right thing. However, having the filter here * avoids replication of the same code in all individual drivers. */ #define IFCAP_RXCSUM 0x00001 /* can offload checksum on RX */ #define IFCAP_TXCSUM 0x00002 /* can offload checksum on TX */ #define IFCAP_NETCONS 0x00004 /* can be a network console */ #define IFCAP_VLAN_MTU 0x00008 /* VLAN-compatible MTU */ #define IFCAP_VLAN_HWTAGGING 0x00010 /* hardware VLAN tag support */ #define IFCAP_JUMBO_MTU 0x00020 /* 9000 byte MTU supported */ #define IFCAP_POLLING 0x00040 /* driver supports polling */ #define IFCAP_VLAN_HWCSUM 0x00080 /* can do IFCAP_HWCSUM on VLANs */ #define IFCAP_TSO4 0x00100 /* can do TCP Segmentation Offload */ #define IFCAP_TSO6 0x00200 /* can do TCP6 Segmentation Offload */ #define IFCAP_LRO 0x00400 /* can do Large Receive Offload */ #define IFCAP_WOL_UCAST 0x00800 /* wake on any unicast frame */ #define IFCAP_WOL_MCAST 0x01000 /* wake on any multicast frame */ #define IFCAP_WOL_MAGIC 0x02000 /* wake on any Magic Packet */ #define IFCAP_TOE4 0x04000 /* interface can offload TCP */ #define IFCAP_TOE6 0x08000 /* interface can offload TCP6 */ #define IFCAP_VLAN_HWFILTER 0x10000 /* interface hw can filter vlan tag */ #define IFCAP_POLLING_NOCOUNT 0x20000 /* polling ticks cannot be fragmented */ #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ #define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ #define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */ #define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */ #define IFCAP_HWSTATS 0x800000 /* manages counters internally */ #define IFCAP_TXRTLMT 0x1000000 /* hardware supports TX rate limiting */ +#define IFCAP_HWRXTSTMP 0x2000000 /* hardware rx timestamping */ #define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) #define IFCAP_WOL (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC) #define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6) #define IFCAP_CANTCHANGE (IFCAP_NETMAP) #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ /* * Message format for use in obtaining information about interfaces * from getkerninfo and the routing socket * For the new, extensible interface see struct if_msghdrl below. */ struct if_msghdr { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ struct if_data ifm_data;/* statistics and other data about if */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifm_data_off or within ifm_data. Both the if_msghdr and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct if_msghdrl given a pointer to struct if_msghdrl. */ #define IF_MSGHDRL_IFM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifm_data_off) #define IF_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifm_len) struct if_msghdrl { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ u_short _ifm_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifm_len; /* length of if_msghdrl incl. if_data */ u_short ifm_data_off; /* offset of if_data from beginning */ struct if_data ifm_data;/* statistics and other data about if */ }; /* * Message format for use in obtaining information about interface addresses * from getkerninfo and the routing socket * For the new, extensible interface see struct ifa_msghdrl below. */ struct ifa_msghdr { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ int ifam_metric; /* value of ifa_ifp->if_metric */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifam_metric or within ifam_data. Both the ifa_msghdrl and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct ifa_msghdrl given a pointer to struct ifa_msghdrl. */ #define IFA_MSGHDRL_IFAM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifam_data_off) #define IFA_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifam_len) struct ifa_msghdrl { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ u_short _ifam_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifam_len; /* length of ifa_msghdrl incl. if_data */ u_short ifam_data_off; /* offset of if_data from beginning */ int ifam_metric; /* value of ifa_ifp->if_metric */ struct if_data ifam_data;/* statistics and other data about if or * address */ }; /* * Message format for use in obtaining information about multicast addresses * from the routing socket */ struct ifma_msghdr { u_short ifmam_msglen; /* to skip over non-understood messages */ u_char ifmam_version; /* future binary compatibility */ u_char ifmam_type; /* message type */ int ifmam_addrs; /* like rtm_addrs */ int ifmam_flags; /* value of ifa_flags */ u_short ifmam_index; /* index for associated ifp */ }; /* * Message format announcing the arrival or departure of a network interface. */ struct if_announcemsghdr { u_short ifan_msglen; /* to skip over non-understood messages */ u_char ifan_version; /* future binary compatibility */ u_char ifan_type; /* message type */ u_short ifan_index; /* index for associated ifp */ char ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */ u_short ifan_what; /* what type of announcement */ }; #define IFAN_ARRIVAL 0 /* interface arrival */ #define IFAN_DEPARTURE 1 /* interface departure */ /* * Buffer with length to be used in SIOCGIFDESCR/SIOCSIFDESCR requests */ struct ifreq_buffer { size_t length; void *buffer; }; /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; caddr_t ifru_data; int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ #define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ #define ifr_buffer ifr_ifru.ifru_buffer /* user supplied buffer with its length */ #define ifr_flags ifr_ifru.ifru_flags[0] /* flags (low 16 bits) */ #define ifr_flagshigh ifr_ifru.ifru_flags[1] /* flags (high 16 bits) */ #define ifr_jid ifr_ifru.ifru_jid /* jail/vnet */ #define ifr_metric ifr_ifru.ifru_metric /* metric */ #define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ #define ifr_phys ifr_ifru.ifru_phys /* physical wire */ #define ifr_media ifr_ifru.ifru_media /* physical media */ #define ifr_data ifr_ifru.ifru_data /* for use by interface */ #define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */ #define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ #define ifr_index ifr_ifru.ifru_index /* interface index */ #define ifr_fib ifr_ifru.ifru_fib /* interface fib */ #define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ ((ifr).ifr_addr.sa_len > sizeof(struct sockaddr) ? \ (sizeof(struct ifreq) - sizeof(struct sockaddr) + \ (ifr).ifr_addr.sa_len) : sizeof(struct ifreq)) struct ifaliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; int ifra_vhid; }; /* 9.x compat */ struct oifaliasreq { char ifra_name[IFNAMSIZ]; struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; }; struct ifmediareq { char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */ int ifm_current; /* current media options */ int ifm_mask; /* don't care mask */ int ifm_status; /* media status */ int ifm_active; /* active options */ int ifm_count; /* # entries in ifm_ulist array */ int *ifm_ulist; /* media words */ }; struct ifdrv { char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ unsigned long ifd_cmd; size_t ifd_len; void *ifd_data; }; /* * Structure used to retrieve aux status data from interfaces. * Kernel suppliers to this interface should respect the formatting * needed by ifconfig(8): each line starts with a TAB and ends with * a newline. The canonical example to copy and paste is in if_tun.c. */ #define IFSTATMAX 800 /* 10 lines of text */ struct ifstat { char ifs_name[IFNAMSIZ]; /* if name, e.g. "en0" */ char ascii[IFSTATMAX + 1]; }; /* * Structure used in SIOCGIFCONF request. * Used to retrieve interface configuration * for machine (useful for programs which * must know all networks accessible). */ struct ifconf { int ifc_len; /* size of associated buffer */ union { caddr_t ifcu_buf; struct ifreq *ifcu_req; } ifc_ifcu; #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ }; /* * interface groups */ #define IFG_ALL "all" /* group contains all interfaces */ /* XXX: will we implement this? */ #define IFG_EGRESS "egress" /* if(s) default route(s) point to */ struct ifg_req { union { char ifgrqu_group[IFNAMSIZ]; char ifgrqu_member[IFNAMSIZ]; } ifgrq_ifgrqu; #define ifgrq_group ifgrq_ifgrqu.ifgrqu_group #define ifgrq_member ifgrq_ifgrqu.ifgrqu_member }; /* * Used to lookup groups for an interface */ struct ifgroupreq { char ifgr_name[IFNAMSIZ]; u_int ifgr_len; union { char ifgru_group[IFNAMSIZ]; struct ifg_req *ifgru_groups; } ifgr_ifgru; #define ifgr_group ifgr_ifgru.ifgru_group #define ifgr_groups ifgr_ifgru.ifgru_groups }; /* * Structure used to request i2c data * from interface transceivers. */ struct ifi2creq { uint8_t dev_addr; /* i2c address (0xA0, 0xA2) */ uint8_t offset; /* read offset */ uint8_t len; /* read length */ uint8_t spare0; uint32_t spare1; uint8_t data[8]; /* read buffer */ }; /* * RSS hash. */ #define RSS_FUNC_NONE 0 /* RSS disabled */ #define RSS_FUNC_PRIVATE 1 /* non-standard */ #define RSS_FUNC_TOEPLITZ 2 #define RSS_TYPE_IPV4 0x00000001 #define RSS_TYPE_TCP_IPV4 0x00000002 #define RSS_TYPE_IPV6 0x00000004 #define RSS_TYPE_IPV6_EX 0x00000008 #define RSS_TYPE_TCP_IPV6 0x00000010 #define RSS_TYPE_TCP_IPV6_EX 0x00000020 #define RSS_TYPE_UDP_IPV4 0x00000040 #define RSS_TYPE_UDP_IPV6 0x00000080 #define RSS_TYPE_UDP_IPV6_EX 0x00000100 #define RSS_KEYLEN 128 struct ifrsskey { char ifrk_name[IFNAMSIZ]; /* if name, e.g. "en0" */ uint8_t ifrk_func; /* RSS_FUNC_ */ uint8_t ifrk_spare0; uint16_t ifrk_keylen; uint8_t ifrk_key[RSS_KEYLEN]; }; struct ifrsshash { char ifrh_name[IFNAMSIZ]; /* if name, e.g. "en0" */ uint8_t ifrh_func; /* RSS_FUNC_ */ uint8_t ifrh_spare0; uint16_t ifrh_spare1; uint32_t ifrh_types; /* RSS_TYPE_ */ }; #endif /* __BSD_VISIBLE */ #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IFADDR); MALLOC_DECLARE(M_IFMADDR); #endif #endif #ifndef _KERNEL struct if_nameindex { unsigned int if_index; /* 1, 2, ... */ char *if_name; /* null terminated name: "le0", ... */ }; __BEGIN_DECLS void if_freenameindex(struct if_nameindex *); char *if_indextoname(unsigned int, char *); struct if_nameindex *if_nameindex(void); unsigned int if_nametoindex(const char *); __END_DECLS #endif #endif /* !_NET_IF_H_ */ diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index b4f21014bf5e..5e6865fe8e41 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -1,1352 +1,1374 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mbuf.h 8.5 (Berkeley) 2/19/95 * $FreeBSD$ */ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ /* XXX: These includes suck. Sorry! */ #include #ifdef _KERNEL #include #include #ifdef WITNESS #include #endif #endif #ifdef _KERNEL #include #define MBUF_PROBE1(probe, arg0) \ SDT_PROBE1(sdt, , , probe, arg0) #define MBUF_PROBE2(probe, arg0, arg1) \ SDT_PROBE2(sdt, , , probe, arg0, arg1) #define MBUF_PROBE3(probe, arg0, arg1, arg2) \ SDT_PROBE3(sdt, , , probe, arg0, arg1, arg2) #define MBUF_PROBE4(probe, arg0, arg1, arg2, arg3) \ SDT_PROBE4(sdt, , , probe, arg0, arg1, arg2, arg3) #define MBUF_PROBE5(probe, arg0, arg1, arg2, arg3, arg4) \ SDT_PROBE5(sdt, , , probe, arg0, arg1, arg2, arg3, arg4) SDT_PROBE_DECLARE(sdt, , , m__init); SDT_PROBE_DECLARE(sdt, , , m__gethdr); SDT_PROBE_DECLARE(sdt, , , m__get); SDT_PROBE_DECLARE(sdt, , , m__getcl); SDT_PROBE_DECLARE(sdt, , , m__clget); SDT_PROBE_DECLARE(sdt, , , m__cljget); SDT_PROBE_DECLARE(sdt, , , m__cljset); SDT_PROBE_DECLARE(sdt, , , m__free); SDT_PROBE_DECLARE(sdt, , , m__freem); #endif /* _KERNEL */ /* * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead. * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in * sys/param.h), which has no additional overhead and is used instead of the * internal data area; this is done when at least MINCLSIZE of data must be * stored. Additionally, it is possible to allocate a separate buffer * externally and attach it to the mbuf in a way similar to that of mbuf * clusters. * * NB: These calculation do not take actual compiler-induced alignment and * padding inside the complete struct mbuf into account. Appropriate * attention is required when changing members of struct mbuf. * * MLEN is data length in a normal mbuf. * MHLEN is data length in an mbuf with pktheader. * MINCLSIZE is a smallest amount of data that should be put into cluster. * * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are sensible. */ struct mbuf; #define MHSIZE offsetof(struct mbuf, m_dat) #define MPKTHSIZE offsetof(struct mbuf, m_pktdat) #define MLEN ((int)(MSIZE - MHSIZE)) #define MHLEN ((int)(MSIZE - MPKTHSIZE)) #define MINCLSIZE (MHLEN + 1) #ifdef _KERNEL /*- * Macro for type conversion: convert mbuf pointer to data pointer of correct * type: * * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. * mtodo(m, o) -- Same as above but with offset 'o' into data. */ #define mtod(m, t) ((t)((m)->m_data)) #define mtodo(m, o) ((void *)(((m)->m_data) + (o))) /* * Argument structure passed to UMA routines during mbuf and packet * allocations. */ struct mb_args { int flags; /* Flags for mbuf being allocated */ short type; /* Type of mbuf being allocated */ }; #endif /* _KERNEL */ /* * Packet tag structure (see below for details). */ struct m_tag { SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ u_int16_t m_tag_id; /* Tag ID */ u_int16_t m_tag_len; /* Length of data */ u_int32_t m_tag_cookie; /* ABI/Module ID */ void (*m_tag_free)(struct m_tag *); }; /* * Static network interface owned tag. * Allocated through ifp->if_snd_tag_alloc(). */ struct m_snd_tag { struct ifnet *ifp; /* network interface tag belongs to */ }; /* * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. * Size ILP32: 48 * LP64: 56 * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are correct. */ struct pkthdr { union { struct m_snd_tag *snd_tag; /* send tag, if any */ struct ifnet *rcvif; /* rcv interface */ }; SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ int32_t len; /* total packet length */ /* Layer crossing persistent information. */ uint32_t flowid; /* packet's 4-tuple system */ - uint64_t csum_flags; /* checksum and offload features */ + uint32_t csum_flags; /* checksum and offload features */ uint16_t fibnum; /* this packet should use this fib */ uint8_t cosqos; /* class/quality of service */ uint8_t rsstype; /* hash type */ - uint8_t l2hlen; /* layer 2 header length */ - uint8_t l3hlen; /* layer 3 header length */ - uint8_t l4hlen; /* layer 4 header length */ - uint8_t l5hlen; /* layer 5 header length */ + union { + uint64_t rcv_tstmp; /* timestamp in ns */ + struct { + uint8_t l2hlen; /* layer 2 hdr len */ + uint8_t l3hlen; /* layer 3 hdr len */ + uint8_t l4hlen; /* layer 4 hdr len */ + uint8_t l5hlen; /* layer 5 hdr len */ + uint32_t spare; + }; + }; union { uint8_t eight[8]; uint16_t sixteen[4]; uint32_t thirtytwo[2]; uint64_t sixtyfour[1]; uintptr_t unintptr[1]; void *ptr; } PH_per; /* Layer specific non-persistent local storage for reassembly, etc. */ union { uint8_t eight[8]; uint16_t sixteen[4]; uint32_t thirtytwo[2]; uint64_t sixtyfour[1]; uintptr_t unintptr[1]; void *ptr; } PH_loc; }; #define ether_vtag PH_per.sixteen[0] #define PH_vt PH_per #define vt_nrecs sixteen[0] #define tso_segsz PH_per.sixteen[1] #define lro_nsegs tso_segsz #define csum_phsum PH_per.sixteen[2] #define csum_data PH_per.thirtytwo[1] /* * Description of external storage mapped into mbuf; valid only if M_EXT is * set. * Size ILP32: 28 * LP64: 48 * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are correct. */ typedef void m_ext_free_t(struct mbuf *); struct m_ext { union { /* * If EXT_FLAG_EMBREF is set, then we use refcount in the * mbuf, the 'ext_count' member. Otherwise, we have a * shadow copy and we use pointer 'ext_cnt'. The original * mbuf is responsible to carry the pointer to free routine * and its arguments. They aren't copied into shadows in * mb_dupcl() to avoid dereferencing next cachelines. */ volatile u_int ext_count; volatile u_int *ext_cnt; }; char *ext_buf; /* start of buffer */ uint32_t ext_size; /* size of buffer, for ext_free */ uint32_t ext_type:8, /* type of external storage */ ext_flags:24; /* external storage mbuf flags */ /* * Fields below store the free context for the external storage. * They are valid only in the refcount carrying mbuf, the one with * EXT_FLAG_EMBREF flag, with exclusion for EXT_EXTREF type, where * the free context is copied into all mbufs that use same external * storage. */ #define m_ext_copylen offsetof(struct m_ext, ext_free) m_ext_free_t *ext_free; /* free routine if not the usual */ void *ext_arg1; /* optional argument pointer */ void *ext_arg2; /* optional argument pointer */ }; /* * The core of the mbuf object along with some shortcut defines for practical * purposes. */ struct mbuf { /* * Header present at the beginning of every mbuf. * Size ILP32: 24 * LP64: 32 * Compile-time assertions in uipc_mbuf.c test these values to ensure * that they are correct. */ union { /* next buffer in chain */ struct mbuf *m_next; SLIST_ENTRY(mbuf) m_slist; STAILQ_ENTRY(mbuf) m_stailq; }; union { /* next chain in queue/record */ struct mbuf *m_nextpkt; SLIST_ENTRY(mbuf) m_slistpkt; STAILQ_ENTRY(mbuf) m_stailqpkt; }; caddr_t m_data; /* location of data */ int32_t m_len; /* amount of data in this mbuf */ uint32_t m_type:8, /* type of data in this mbuf */ m_flags:24; /* flags; see below */ #if !defined(__LP64__) uint32_t m_pad; /* pad for 64bit alignment */ #endif /* * A set of optional headers (packet header, external storage header) * and internal data storage. Historically, these arrays were sized * to MHLEN (space left after a packet header) and MLEN (space left * after only a regular mbuf header); they are now variable size in * order to support future work on variable-size mbufs. */ union { struct { struct pkthdr m_pkthdr; /* M_PKTHDR set */ union { struct m_ext m_ext; /* M_EXT set */ char m_pktdat[0]; }; }; char m_dat[0]; /* !M_PKTHDR, !M_EXT */ }; }; /* * mbuf flags of global significance and layer crossing. * Those of only protocol/layer specific significance are to be mapped * to M_PROTO[1-12] and cleared at layer handoff boundaries. * NB: Limited to the lower 24 bits. */ #define M_EXT 0x00000001 /* has associated external storage */ #define M_PKTHDR 0x00000002 /* start of record */ #define M_EOR 0x00000004 /* end of record */ #define M_RDONLY 0x00000008 /* associated data is marked read-only */ #define M_BCAST 0x00000010 /* send/received as link-level broadcast */ #define M_MCAST 0x00000020 /* send/received as link-level multicast */ #define M_PROMISC 0x00000040 /* packet was not for us */ #define M_VLANTAG 0x00000080 /* ether_vtag is valid */ #define M_UNUSED_8 0x00000100 /* --available-- */ #define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */ +#define M_TSTMP 0x00000400 /* rcv_tstmp field is valid */ +#define M_TSTMP_HPREC 0x00000800 /* rcv_tstmp is high-prec, typically + hw-stamped on port (useful for IEEE 1588 + and 802.1AS) */ #define M_PROTO1 0x00001000 /* protocol-specific */ #define M_PROTO2 0x00002000 /* protocol-specific */ #define M_PROTO3 0x00004000 /* protocol-specific */ #define M_PROTO4 0x00008000 /* protocol-specific */ #define M_PROTO5 0x00010000 /* protocol-specific */ #define M_PROTO6 0x00020000 /* protocol-specific */ #define M_PROTO7 0x00040000 /* protocol-specific */ #define M_PROTO8 0x00080000 /* protocol-specific */ #define M_PROTO9 0x00100000 /* protocol-specific */ #define M_PROTO10 0x00200000 /* protocol-specific */ #define M_PROTO11 0x00400000 /* protocol-specific */ #define M_PROTO12 0x00800000 /* protocol-specific */ #define MB_DTOR_SKIP 0x1 /* don't pollute the cache by touching a freed mbuf */ /* * Flags to purge when crossing layers. */ #define M_PROTOFLAGS \ (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\ M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12) /* * Flags preserved when copying m_pkthdr. */ #define M_COPYFLAGS \ - (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG| \ - M_PROTOFLAGS) + (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG|M_TSTMP| \ + M_TSTMP_HPREC|M_PROTOFLAGS) /* * Mbuf flag description for use with printf(9) %b identifier. */ #define M_FLAG_BITS \ "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \ - "\7M_PROMISC\10M_VLANTAG" + "\7M_PROMISC\10M_VLANTAG\13M_TSTMP\14M_TSTMP_HPREC" #define M_FLAG_PROTOBITS \ "\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \ "\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \ "\27M_PROTO11\30M_PROTO12" #define M_FLAG_PRINTF (M_FLAG_BITS M_FLAG_PROTOBITS) /* * Network interface cards are able to hash protocol fields (such as IPv4 * addresses and TCP port numbers) classify packets into flows. These flows * can then be used to maintain ordering while delivering packets to the OS * via parallel input queues, as well as to provide a stateless affinity * model. NIC drivers can pass up the hash via m->m_pkthdr.flowid, and set * m_flag fields to indicate how the hash should be interpreted by the * network stack. * * Most NICs support RSS, which provides ordering and explicit affinity, and * use the hash m_flag bits to indicate what header fields were covered by * the hash. M_HASHTYPE_OPAQUE and M_HASHTYPE_OPAQUE_HASH can be set by non- * RSS cards or configurations that provide an opaque flow identifier, allowing * for ordering and distribution without explicit affinity. Additionally, * M_HASHTYPE_OPAQUE_HASH indicates that the flow identifier has hash * properties. * * The meaning of the IPV6_EX suffix: * "o Home address from the home address option in the IPv6 destination * options header. If the extension header is not present, use the Source * IPv6 Address. * o IPv6 address that is contained in the Routing-Header-Type-2 from the * associated extension header. If the extension header is not present, * use the Destination IPv6 Address." * Quoted from: * https://docs.microsoft.com/en-us/windows-hardware/drivers/network/rss-hashing-types#ndishashipv6ex */ #define M_HASHTYPE_HASHPROP 0x80 /* has hash properties */ #define M_HASHTYPE_HASH(t) (M_HASHTYPE_HASHPROP | (t)) /* Microsoft RSS standard hash types */ #define M_HASHTYPE_NONE 0 #define M_HASHTYPE_RSS_IPV4 M_HASHTYPE_HASH(1) /* IPv4 2-tuple */ #define M_HASHTYPE_RSS_TCP_IPV4 M_HASHTYPE_HASH(2) /* TCPv4 4-tuple */ #define M_HASHTYPE_RSS_IPV6 M_HASHTYPE_HASH(3) /* IPv6 2-tuple */ #define M_HASHTYPE_RSS_TCP_IPV6 M_HASHTYPE_HASH(4) /* TCPv6 4-tuple */ #define M_HASHTYPE_RSS_IPV6_EX M_HASHTYPE_HASH(5) /* IPv6 2-tuple + * ext hdrs */ #define M_HASHTYPE_RSS_TCP_IPV6_EX M_HASHTYPE_HASH(6) /* TCPv6 4-tuple + * ext hdrs */ #define M_HASHTYPE_RSS_UDP_IPV4 M_HASHTYPE_HASH(7) /* IPv4 UDP 4-tuple*/ #define M_HASHTYPE_RSS_UDP_IPV6 M_HASHTYPE_HASH(9) /* IPv6 UDP 4-tuple*/ #define M_HASHTYPE_RSS_UDP_IPV6_EX M_HASHTYPE_HASH(10)/* IPv6 UDP 4-tuple + * ext hdrs */ #define M_HASHTYPE_OPAQUE 63 /* ordering, not affinity */ #define M_HASHTYPE_OPAQUE_HASH M_HASHTYPE_HASH(M_HASHTYPE_OPAQUE) /* ordering+hash, not affinity*/ #define M_HASHTYPE_CLEAR(m) ((m)->m_pkthdr.rsstype = 0) #define M_HASHTYPE_GET(m) ((m)->m_pkthdr.rsstype) #define M_HASHTYPE_SET(m, v) ((m)->m_pkthdr.rsstype = (v)) #define M_HASHTYPE_TEST(m, v) (M_HASHTYPE_GET(m) == (v)) #define M_HASHTYPE_ISHASH(m) (M_HASHTYPE_GET(m) & M_HASHTYPE_HASHPROP) /* * COS/QOS class and quality of service tags. * It uses DSCP code points as base. */ #define QOS_DSCP_CS0 0x00 #define QOS_DSCP_DEF QOS_DSCP_CS0 #define QOS_DSCP_CS1 0x20 #define QOS_DSCP_AF11 0x28 #define QOS_DSCP_AF12 0x30 #define QOS_DSCP_AF13 0x38 #define QOS_DSCP_CS2 0x40 #define QOS_DSCP_AF21 0x48 #define QOS_DSCP_AF22 0x50 #define QOS_DSCP_AF23 0x58 #define QOS_DSCP_CS3 0x60 #define QOS_DSCP_AF31 0x68 #define QOS_DSCP_AF32 0x70 #define QOS_DSCP_AF33 0x78 #define QOS_DSCP_CS4 0x80 #define QOS_DSCP_AF41 0x88 #define QOS_DSCP_AF42 0x90 #define QOS_DSCP_AF43 0x98 #define QOS_DSCP_CS5 0xa0 #define QOS_DSCP_EF 0xb8 #define QOS_DSCP_CS6 0xc0 #define QOS_DSCP_CS7 0xe0 /* * External mbuf storage buffer types. */ #define EXT_CLUSTER 1 /* mbuf cluster */ #define EXT_SFBUF 2 /* sendfile(2)'s sf_buf */ #define EXT_JUMBOP 3 /* jumbo cluster page sized */ #define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */ #define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference */ #define EXT_VENDOR1 224 /* for vendor-internal use */ #define EXT_VENDOR2 225 /* for vendor-internal use */ #define EXT_VENDOR3 226 /* for vendor-internal use */ #define EXT_VENDOR4 227 /* for vendor-internal use */ #define EXT_EXP1 244 /* for experimental use */ #define EXT_EXP2 245 /* for experimental use */ #define EXT_EXP3 246 /* for experimental use */ #define EXT_EXP4 247 /* for experimental use */ #define EXT_NET_DRV 252 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 253 /* custom module's ext_buf type */ #define EXT_DISPOSABLE 254 /* can throw this buffer away w/page flipping */ #define EXT_EXTREF 255 /* has externally maintained ext_cnt ptr */ /* * Flags for external mbuf buffer types. * NB: limited to the lower 24 bits. */ #define EXT_FLAG_EMBREF 0x000001 /* embedded ext_count */ #define EXT_FLAG_EXTREF 0x000002 /* external ext_cnt, notyet */ #define EXT_FLAG_NOFREE 0x000010 /* don't free mbuf to pool, notyet */ #define EXT_FLAG_VENDOR1 0x010000 /* These flags are vendor */ #define EXT_FLAG_VENDOR2 0x020000 /* or submodule specific, */ #define EXT_FLAG_VENDOR3 0x040000 /* not used by mbuf code. */ #define EXT_FLAG_VENDOR4 0x080000 /* Set/read by submodule. */ #define EXT_FLAG_EXP1 0x100000 /* for experimental use */ #define EXT_FLAG_EXP2 0x200000 /* for experimental use */ #define EXT_FLAG_EXP3 0x400000 /* for experimental use */ #define EXT_FLAG_EXP4 0x800000 /* for experimental use */ /* * EXT flag description for use with printf(9) %b identifier. */ #define EXT_FLAG_BITS \ "\20\1EXT_FLAG_EMBREF\2EXT_FLAG_EXTREF\5EXT_FLAG_NOFREE" \ "\21EXT_FLAG_VENDOR1\22EXT_FLAG_VENDOR2\23EXT_FLAG_VENDOR3" \ "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \ "\30EXT_FLAG_EXP4" /* * Flags indicating checksum, segmentation and other offload work to be * done, or already done, by hardware or lower layers. It is split into * separate inbound and outbound flags. * * Outbound flags that are set by upper protocol layers requesting lower * layers, or ideally the hardware, to perform these offloading tasks. * For outbound packets this field and its flags can be directly tested * against ifnet if_hwassist. */ #define CSUM_IP 0x00000001 /* IP header checksum offload */ #define CSUM_IP_UDP 0x00000002 /* UDP checksum offload */ #define CSUM_IP_TCP 0x00000004 /* TCP checksum offload */ #define CSUM_IP_SCTP 0x00000008 /* SCTP checksum offload */ #define CSUM_IP_TSO 0x00000010 /* TCP segmentation offload */ #define CSUM_IP_ISCSI 0x00000020 /* iSCSI checksum offload */ #define CSUM_IP6_UDP 0x00000200 /* UDP checksum offload */ #define CSUM_IP6_TCP 0x00000400 /* TCP checksum offload */ #define CSUM_IP6_SCTP 0x00000800 /* SCTP checksum offload */ #define CSUM_IP6_TSO 0x00001000 /* TCP segmentation offload */ #define CSUM_IP6_ISCSI 0x00002000 /* iSCSI checksum offload */ /* Inbound checksum support where the checksum was verified by hardware. */ #define CSUM_L3_CALC 0x01000000 /* calculated layer 3 csum */ #define CSUM_L3_VALID 0x02000000 /* checksum is correct */ #define CSUM_L4_CALC 0x04000000 /* calculated layer 4 csum */ #define CSUM_L4_VALID 0x08000000 /* checksum is correct */ #define CSUM_L5_CALC 0x10000000 /* calculated layer 5 csum */ #define CSUM_L5_VALID 0x20000000 /* checksum is correct */ #define CSUM_COALESCED 0x40000000 /* contains merged segments */ /* * CSUM flag description for use with printf(9) %b identifier. */ #define CSUM_BITS \ "\20\1CSUM_IP\2CSUM_IP_UDP\3CSUM_IP_TCP\4CSUM_IP_SCTP\5CSUM_IP_TSO" \ "\6CSUM_IP_ISCSI" \ "\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \ "\16CSUM_IP6_ISCSI" \ "\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \ "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED" /* CSUM flags compatibility mappings. */ #define CSUM_IP_CHECKED CSUM_L3_CALC #define CSUM_IP_VALID CSUM_L3_VALID #define CSUM_DATA_VALID CSUM_L4_VALID #define CSUM_PSEUDO_HDR CSUM_L4_CALC #define CSUM_SCTP_VALID CSUM_L4_VALID #define CSUM_DELAY_DATA (CSUM_TCP|CSUM_UDP) #define CSUM_DELAY_IP CSUM_IP /* Only v4, no v6 IP hdr csum */ #define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6) #define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID #define CSUM_TCP CSUM_IP_TCP #define CSUM_UDP CSUM_IP_UDP #define CSUM_SCTP CSUM_IP_SCTP #define CSUM_TSO (CSUM_IP_TSO|CSUM_IP6_TSO) #define CSUM_UDP_IPV6 CSUM_IP6_UDP #define CSUM_TCP_IPV6 CSUM_IP6_TCP #define CSUM_SCTP_IPV6 CSUM_IP6_SCTP /* * mbuf types describing the content of the mbuf (including external storage). */ #define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */ #define MT_DATA 1 /* dynamic (data) allocation */ #define MT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */ #define MT_VENDOR1 4 /* for vendor-internal use */ #define MT_VENDOR2 5 /* for vendor-internal use */ #define MT_VENDOR3 6 /* for vendor-internal use */ #define MT_VENDOR4 7 /* for vendor-internal use */ #define MT_SONAME 8 /* socket name */ #define MT_EXP1 9 /* for experimental use */ #define MT_EXP2 10 /* for experimental use */ #define MT_EXP3 11 /* for experimental use */ #define MT_EXP4 12 /* for experimental use */ #define MT_CONTROL 14 /* extra-data protocol message */ #define MT_OOBDATA 15 /* expedited data */ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ #define MT_NOINIT 255 /* Not a type but a flag to allocate a non-initialized mbuf */ /* * String names of mbuf-related UMA(9) and malloc(9) types. Exposed to * !_KERNEL so that monitoring tools can look up the zones with * libmemstat(3). */ #define MBUF_MEM_NAME "mbuf" #define MBUF_CLUSTER_MEM_NAME "mbuf_cluster" #define MBUF_PACKET_MEM_NAME "mbuf_packet" #define MBUF_JUMBOP_MEM_NAME "mbuf_jumbo_page" #define MBUF_JUMBO9_MEM_NAME "mbuf_jumbo_9k" #define MBUF_JUMBO16_MEM_NAME "mbuf_jumbo_16k" #define MBUF_TAG_MEM_NAME "mbuf_tag" #define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt" #ifdef _KERNEL #ifdef WITNESS #define MBUF_CHECKSLEEP(how) do { \ if (how == M_WAITOK) \ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, \ "Sleeping in \"%s\"", __func__); \ } while (0) #else #define MBUF_CHECKSLEEP(how) #endif /* * Network buffer allocation API * * The rest of it is defined in kern/kern_mbuf.c */ extern uma_zone_t zone_mbuf; extern uma_zone_t zone_clust; extern uma_zone_t zone_pack; extern uma_zone_t zone_jumbop; extern uma_zone_t zone_jumbo9; extern uma_zone_t zone_jumbo16; void mb_dupcl(struct mbuf *, struct mbuf *); void mb_free_ext(struct mbuf *); void m_adj(struct mbuf *, int); int m_apply(struct mbuf *, int, int, int (*)(void *, void *, u_int), void *); int m_append(struct mbuf *, int, c_caddr_t); void m_cat(struct mbuf *, struct mbuf *); void m_catpkt(struct mbuf *, struct mbuf *); int m_clget(struct mbuf *m, int how); void *m_cljget(struct mbuf *m, int how, int size); struct mbuf *m_collapse(struct mbuf *, int, int); void m_copyback(struct mbuf *, int, int, c_caddr_t); void m_copydata(const struct mbuf *, int, int, caddr_t); struct mbuf *m_copym(struct mbuf *, int, int, int); struct mbuf *m_copypacket(struct mbuf *, int); void m_copy_pkthdr(struct mbuf *, struct mbuf *); struct mbuf *m_copyup(struct mbuf *, int, int); struct mbuf *m_defrag(struct mbuf *, int); void m_demote_pkthdr(struct mbuf *); void m_demote(struct mbuf *, int, int); struct mbuf *m_devget(char *, int, int, struct ifnet *, void (*)(char *, caddr_t, u_int)); struct mbuf *m_dup(const struct mbuf *, int); int m_dup_pkthdr(struct mbuf *, const struct mbuf *, int); void m_extadd(struct mbuf *, char *, u_int, m_ext_free_t, void *, void *, int, int); u_int m_fixhdr(struct mbuf *); struct mbuf *m_fragment(struct mbuf *, int, int); void m_freem(struct mbuf *); struct mbuf *m_get2(int, int, short, int); struct mbuf *m_getjcl(int, short, int, int); struct mbuf *m_getm2(struct mbuf *, int, int, short, int); struct mbuf *m_getptr(struct mbuf *, int, int *); u_int m_length(struct mbuf *, struct mbuf **); int m_mbuftouio(struct uio *, const struct mbuf *, int); void m_move_pkthdr(struct mbuf *, struct mbuf *); int m_pkthdr_init(struct mbuf *, int); struct mbuf *m_prepend(struct mbuf *, int, int); void m_print(const struct mbuf *, int); struct mbuf *m_pulldown(struct mbuf *, int, int, int *); struct mbuf *m_pullup(struct mbuf *, int); int m_sanity(struct mbuf *, int); struct mbuf *m_split(struct mbuf *, int, int); struct mbuf *m_uiotombuf(struct uio *, int, int, int, int); struct mbuf *m_unshare(struct mbuf *, int); static __inline int m_gettype(int size) { int type; switch (size) { case MSIZE: type = EXT_MBUF; break; case MCLBYTES: type = EXT_CLUSTER; break; #if MJUMPAGESIZE != MCLBYTES case MJUMPAGESIZE: type = EXT_JUMBOP; break; #endif case MJUM9BYTES: type = EXT_JUMBO9; break; case MJUM16BYTES: type = EXT_JUMBO16; break; default: panic("%s: invalid cluster size %d", __func__, size); } return (type); } /* * Associated an external reference counted buffer with an mbuf. */ static __inline void m_extaddref(struct mbuf *m, char *buf, u_int size, u_int *ref_cnt, m_ext_free_t freef, void *arg1, void *arg2) { KASSERT(ref_cnt != NULL, ("%s: ref_cnt not provided", __func__)); atomic_add_int(ref_cnt, 1); m->m_flags |= M_EXT; m->m_ext.ext_buf = buf; m->m_ext.ext_cnt = ref_cnt; m->m_data = m->m_ext.ext_buf; m->m_ext.ext_size = size; m->m_ext.ext_free = freef; m->m_ext.ext_arg1 = arg1; m->m_ext.ext_arg2 = arg2; m->m_ext.ext_type = EXT_EXTREF; m->m_ext.ext_flags = 0; } static __inline uma_zone_t m_getzone(int size) { uma_zone_t zone; switch (size) { case MCLBYTES: zone = zone_clust; break; #if MJUMPAGESIZE != MCLBYTES case MJUMPAGESIZE: zone = zone_jumbop; break; #endif case MJUM9BYTES: zone = zone_jumbo9; break; case MJUM16BYTES: zone = zone_jumbo16; break; default: panic("%s: invalid cluster size %d", __func__, size); } return (zone); } /* * Initialize an mbuf with linear storage. * * Inline because the consumer text overhead will be roughly the same to * initialize or call a function with this many parameters and M_PKTHDR * should go away with constant propagation for !MGETHDR. */ static __inline int m_init(struct mbuf *m, int how, short type, int flags) { int error; m->m_next = NULL; m->m_nextpkt = NULL; m->m_data = m->m_dat; m->m_len = 0; m->m_flags = flags; m->m_type = type; if (flags & M_PKTHDR) error = m_pkthdr_init(m, how); else error = 0; MBUF_PROBE5(m__init, m, how, type, flags, error); return (error); } static __inline struct mbuf * m_get(int how, short type) { struct mbuf *m; struct mb_args args; args.flags = 0; args.type = type; m = uma_zalloc_arg(zone_mbuf, &args, how); MBUF_PROBE3(m__get, how, type, m); return (m); } static __inline struct mbuf * m_gethdr(int how, short type) { struct mbuf *m; struct mb_args args; args.flags = M_PKTHDR; args.type = type; m = uma_zalloc_arg(zone_mbuf, &args, how); MBUF_PROBE3(m__gethdr, how, type, m); return (m); } static __inline struct mbuf * m_getcl(int how, short type, int flags) { struct mbuf *m; struct mb_args args; args.flags = flags; args.type = type; m = uma_zalloc_arg(zone_pack, &args, how); MBUF_PROBE4(m__getcl, how, type, flags, m); return (m); } /* * XXX: m_cljset() is a dangerous API. One must attach only a new, * unreferenced cluster to an mbuf(9). It is not possible to assert * that, so care can be taken only by users of the API. */ static __inline void m_cljset(struct mbuf *m, void *cl, int type) { int size; switch (type) { case EXT_CLUSTER: size = MCLBYTES; break; #if MJUMPAGESIZE != MCLBYTES case EXT_JUMBOP: size = MJUMPAGESIZE; break; #endif case EXT_JUMBO9: size = MJUM9BYTES; break; case EXT_JUMBO16: size = MJUM16BYTES; break; default: panic("%s: unknown cluster type %d", __func__, type); break; } m->m_data = m->m_ext.ext_buf = cl; m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL; m->m_ext.ext_size = size; m->m_ext.ext_type = type; m->m_ext.ext_flags = EXT_FLAG_EMBREF; m->m_ext.ext_count = 1; m->m_flags |= M_EXT; MBUF_PROBE3(m__cljset, m, cl, type); } static __inline void m_chtype(struct mbuf *m, short new_type) { m->m_type = new_type; } static __inline void m_clrprotoflags(struct mbuf *m) { while (m) { m->m_flags &= ~M_PROTOFLAGS; m = m->m_next; } } static __inline struct mbuf * m_last(struct mbuf *m) { while (m->m_next) m = m->m_next; return (m); } static inline u_int m_extrefcnt(struct mbuf *m) { KASSERT(m->m_flags & M_EXT, ("%s: M_EXT missing", __func__)); return ((m->m_ext.ext_flags & EXT_FLAG_EMBREF) ? m->m_ext.ext_count : *m->m_ext.ext_cnt); } /* * mbuf, cluster, and external object allocation macros (for compatibility * purposes). */ #define M_MOVE_PKTHDR(to, from) m_move_pkthdr((to), (from)) #define MGET(m, how, type) ((m) = m_get((how), (type))) #define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type))) #define MCLGET(m, how) m_clget((m), (how)) #define MEXTADD(m, buf, size, free, arg1, arg2, flags, type) \ m_extadd((m), (char *)(buf), (size), (free), (arg1), (arg2), \ (flags), (type)) #define m_getm(m, len, how, type) \ m_getm2((m), (len), (how), (type), M_PKTHDR) /* * Evaluate TRUE if it's safe to write to the mbuf m's data region (this can * be both the local data payload, or an external buffer area, depending on * whether M_EXT is set). */ #define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \ (!(((m)->m_flags & M_EXT)) || \ (m_extrefcnt(m) == 1))) /* Check if the supplied mbuf has a packet header, or else panic. */ #define M_ASSERTPKTHDR(m) \ KASSERT((m) != NULL && (m)->m_flags & M_PKTHDR, \ ("%s: no mbuf packet header!", __func__)) /* * Ensure that the supplied mbuf is a valid, non-free mbuf. * * XXX: Broken at the moment. Need some UMA magic to make it work again. */ #define M_ASSERTVALID(m) \ KASSERT((((struct mbuf *)m)->m_flags & 0) == 0, \ ("%s: attempted use of a free mbuf!", __func__)) /* * Return the address of the start of the buffer associated with an mbuf, * handling external storage, packet-header mbufs, and regular data mbufs. */ #define M_START(m) \ (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \ ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] : \ &(m)->m_dat[0]) /* * Return the size of the buffer associated with an mbuf, handling external * storage, packet-header mbufs, and regular data mbufs. */ #define M_SIZE(m) \ (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \ ((m)->m_flags & M_PKTHDR) ? MHLEN : \ MLEN) /* * Set the m_data pointer of a newly allocated mbuf to place an object of the * specified size at the end of the mbuf, longword aligned. * * NB: Historically, we had M_ALIGN(), MH_ALIGN(), and MEXT_ALIGN() as * separate macros, each asserting that it was called at the proper moment. * This required callers to themselves test the storage type and call the * right one. Rather than require callers to be aware of those layout * decisions, we centralize here. */ static __inline void m_align(struct mbuf *m, int len) { #ifdef INVARIANTS const char *msg = "%s: not a virgin mbuf"; #endif int adjust; KASSERT(m->m_data == M_START(m), (msg, __func__)); adjust = M_SIZE(m) - len; m->m_data += adjust &~ (sizeof(long)-1); } #define M_ALIGN(m, len) m_align(m, len) #define MH_ALIGN(m, len) m_align(m, len) #define MEXT_ALIGN(m, len) m_align(m, len) /* * Compute the amount of space available before the current start of data in * an mbuf. * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. * * NB: In previous versions, M_LEADINGSPACE() would only check M_WRITABLE() * for mbufs with external storage. We now allow mbuf-embedded data to be * read-only as well. */ #define M_LEADINGSPACE(m) \ (M_WRITABLE(m) ? ((m)->m_data - M_START(m)) : 0) /* * Compute the amount of space available after the end of data in an mbuf. * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. * * NB: In previous versions, M_TRAILINGSPACE() would only check M_WRITABLE() * for mbufs with external storage. We now allow mbuf-embedded data to be * read-only as well. */ #define M_TRAILINGSPACE(m) \ (M_WRITABLE(m) ? \ ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len)) : 0) /* * Arrange to prepend space of size plen to mbuf m. If a new mbuf must be * allocated, how specifies whether to wait. If the allocation fails, the * original mbuf chain is freed and m is set to NULL. */ #define M_PREPEND(m, plen, how) do { \ struct mbuf **_mmp = &(m); \ struct mbuf *_mm = *_mmp; \ int _mplen = (plen); \ int __mhow = (how); \ \ MBUF_CHECKSLEEP(how); \ if (M_LEADINGSPACE(_mm) >= _mplen) { \ _mm->m_data -= _mplen; \ _mm->m_len += _mplen; \ } else \ _mm = m_prepend(_mm, _mplen, __mhow); \ if (_mm != NULL && _mm->m_flags & M_PKTHDR) \ _mm->m_pkthdr.len += _mplen; \ *_mmp = _mm; \ } while (0) /* * Change mbuf to new type. This is a relatively expensive operation and * should be avoided. */ #define MCHTYPE(m, t) m_chtype((m), (t)) /* Length to m_copy to copy all. */ #define M_COPYALL 1000000000 extern int max_datalen; /* MHLEN - max_hdr */ extern int max_hdr; /* Largest link + protocol header */ extern int max_linkhdr; /* Largest link-level header */ extern int max_protohdr; /* Largest protocol header */ extern int nmbclusters; /* Maximum number of clusters */ /*- * Network packets may have annotations attached by affixing a list of * "packet tags" to the pkthdr structure. Packet tags are dynamically * allocated semi-opaque data structures that have a fixed header * (struct m_tag) that specifies the size of the memory block and a * pair that identifies it. The cookie is a 32-bit unique * unsigned value used to identify a module or ABI. By convention this value * is chosen as the date+time that the module is created, expressed as the * number of seconds since the epoch (e.g., using date -u +'%s'). The type * value is an ABI/module-specific value that identifies a particular * annotation and is private to the module. For compatibility with systems * like OpenBSD that define packet tags w/o an ABI/module cookie, the value * PACKET_ABI_COMPAT is used to implement m_tag_get and m_tag_find * compatibility shim functions and several tag types are defined below. * Users that do not require compatibility should use a private cookie value * so that packet tag-related definitions can be maintained privately. * * Note that the packet tag returned by m_tag_alloc has the default memory * alignment implemented by malloc. To reference private data one can use a * construct like: * * struct m_tag *mtag = m_tag_alloc(...); * struct foo *p = (struct foo *)(mtag+1); * * if the alignment of struct m_tag is sufficient for referencing members of * struct foo. Otherwise it is necessary to embed struct m_tag within the * private data structure to insure proper alignment; e.g., * * struct foo { * struct m_tag tag; * ... * }; * struct foo *p = (struct foo *) m_tag_alloc(...); * struct m_tag *mtag = &p->tag; */ /* * Persistent tags stay with an mbuf until the mbuf is reclaimed. Otherwise * tags are expected to ``vanish'' when they pass through a network * interface. For most interfaces this happens normally as the tags are * reclaimed when the mbuf is free'd. However in some special cases * reclaiming must be done manually. An example is packets that pass through * the loopback interface. Also, one must be careful to do this when * ``turning around'' packets (e.g., icmp_reflect). * * To mark a tag persistent bit-or this flag in when defining the tag id. * The tag will then be treated as described above. */ #define MTAG_PERSISTENT 0x800 #define PACKET_TAG_NONE 0 /* Nadda */ /* Packet tags for use with PACKET_ABI_COMPAT. */ #define PACKET_TAG_IPSEC_IN_DONE 1 /* IPsec applied, in */ #define PACKET_TAG_IPSEC_OUT_DONE 2 /* IPsec applied, out */ #define PACKET_TAG_IPSEC_IN_CRYPTO_DONE 3 /* NIC IPsec crypto done */ #define PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED 4 /* NIC IPsec crypto req'ed */ #define PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO 5 /* NIC notifies IPsec */ #define PACKET_TAG_IPSEC_PENDING_TDB 6 /* Reminder to do IPsec */ #define PACKET_TAG_BRIDGE 7 /* Bridge processing done */ #define PACKET_TAG_GIF 8 /* GIF processing done */ #define PACKET_TAG_GRE 9 /* GRE processing done */ #define PACKET_TAG_IN_PACKET_CHECKSUM 10 /* NIC checksumming done */ #define PACKET_TAG_ENCAP 11 /* Encap. processing */ #define PACKET_TAG_IPSEC_SOCKET 12 /* IPSEC socket ref */ #define PACKET_TAG_IPSEC_HISTORY 13 /* IPSEC history */ #define PACKET_TAG_IPV6_INPUT 14 /* IPV6 input processing */ #define PACKET_TAG_DUMMYNET 15 /* dummynet info */ #define PACKET_TAG_DIVERT 17 /* divert info */ #define PACKET_TAG_IPFORWARD 18 /* ipforward info */ #define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */ #define PACKET_TAG_PF (21 | MTAG_PERSISTENT) /* PF/ALTQ information */ #define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ #define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ #define PACKET_TAG_CARP 28 /* CARP info */ #define PACKET_TAG_IPSEC_NAT_T_PORTS 29 /* two uint16_t */ #define PACKET_TAG_ND_OUTGOING 30 /* ND outgoing */ /* Specific cookies and tags. */ /* Packet tag routines. */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); void m_tag_delete(struct mbuf *, struct m_tag *); void m_tag_delete_chain(struct mbuf *, struct m_tag *); void m_tag_free_default(struct m_tag *); struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *); struct m_tag *m_tag_copy(struct m_tag *, int); int m_tag_copy_chain(struct mbuf *, const struct mbuf *, int); void m_tag_delete_nonpersistent(struct mbuf *); /* * Initialize the list of tags associated with an mbuf. */ static __inline void m_tag_init(struct mbuf *m) { SLIST_INIT(&m->m_pkthdr.tags); } /* * Set up the contents of a tag. Note that this does not fill in the free * method; the caller is expected to do that. * * XXX probably should be called m_tag_init, but that was already taken. */ static __inline void m_tag_setup(struct m_tag *t, u_int32_t cookie, int type, int len) { t->m_tag_id = type; t->m_tag_len = len; t->m_tag_cookie = cookie; } /* * Reclaim resources associated with a tag. */ static __inline void m_tag_free(struct m_tag *t) { (*t->m_tag_free)(t); } /* * Return the first tag associated with an mbuf. */ static __inline struct m_tag * m_tag_first(struct mbuf *m) { return (SLIST_FIRST(&m->m_pkthdr.tags)); } /* * Return the next tag in the list of tags associated with an mbuf. */ static __inline struct m_tag * m_tag_next(struct mbuf *m __unused, struct m_tag *t) { return (SLIST_NEXT(t, m_tag_link)); } /* * Prepend a tag to the list of tags associated with an mbuf. */ static __inline void m_tag_prepend(struct mbuf *m, struct m_tag *t) { SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); } /* * Unlink a tag from the list of tags associated with an mbuf. */ static __inline void m_tag_unlink(struct mbuf *m, struct m_tag *t) { SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); } /* These are for OpenBSD compatibility. */ #define MTAG_ABI_COMPAT 0 /* compatibility ABI */ static __inline struct m_tag * m_tag_get(int type, int length, int wait) { return (m_tag_alloc(MTAG_ABI_COMPAT, type, length, wait)); } static __inline struct m_tag * m_tag_find(struct mbuf *m, int type, struct m_tag *start) { return (SLIST_EMPTY(&m->m_pkthdr.tags) ? (struct m_tag *)NULL : m_tag_locate(m, MTAG_ABI_COMPAT, type, start)); } static __inline struct mbuf * m_free(struct mbuf *m) { struct mbuf *n = m->m_next; MBUF_PROBE1(m__free, m); if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE)) m_tag_delete_chain(m, NULL); if (m->m_flags & M_EXT) mb_free_ext(m); else if ((m->m_flags & M_NOFREE) == 0) uma_zfree(zone_mbuf, m); return (n); } static __inline int rt_m_getfib(struct mbuf *m) { KASSERT(m->m_flags & M_PKTHDR , ("Attempt to get FIB from non header mbuf.")); return (m->m_pkthdr.fibnum); } #define M_GETFIB(_m) rt_m_getfib(_m) #define M_SETFIB(_m, _fib) do { \ KASSERT((_m)->m_flags & M_PKTHDR, ("Attempt to set FIB on non header mbuf.")); \ ((_m)->m_pkthdr.fibnum) = (_fib); \ } while (0) /* flags passed as first argument for "m_ether_tcpip_hash()" */ #define MBUF_HASHFLAG_L2 (1 << 2) #define MBUF_HASHFLAG_L3 (1 << 3) #define MBUF_HASHFLAG_L4 (1 << 4) /* mbuf hashing helper routines */ uint32_t m_ether_tcpip_hash_init(void); uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t); #ifdef MBUF_PROFILING void m_profile(struct mbuf *m); #define M_PROFILE(m) m_profile(m) #else #define M_PROFILE(m) #endif struct mbufq { STAILQ_HEAD(, mbuf) mq_head; int mq_len; int mq_maxlen; }; static inline void mbufq_init(struct mbufq *mq, int maxlen) { STAILQ_INIT(&mq->mq_head); mq->mq_maxlen = maxlen; mq->mq_len = 0; } static inline struct mbuf * mbufq_flush(struct mbufq *mq) { struct mbuf *m; m = STAILQ_FIRST(&mq->mq_head); STAILQ_INIT(&mq->mq_head); mq->mq_len = 0; return (m); } static inline void mbufq_drain(struct mbufq *mq) { struct mbuf *m, *n; n = mbufq_flush(mq); while ((m = n) != NULL) { n = STAILQ_NEXT(m, m_stailqpkt); m_freem(m); } } static inline struct mbuf * mbufq_first(const struct mbufq *mq) { return (STAILQ_FIRST(&mq->mq_head)); } static inline struct mbuf * mbufq_last(const struct mbufq *mq) { return (STAILQ_LAST(&mq->mq_head, mbuf, m_stailqpkt)); } static inline int mbufq_full(const struct mbufq *mq) { return (mq->mq_len >= mq->mq_maxlen); } static inline int mbufq_len(const struct mbufq *mq) { return (mq->mq_len); } static inline int mbufq_enqueue(struct mbufq *mq, struct mbuf *m) { if (mbufq_full(mq)) return (ENOBUFS); STAILQ_INSERT_TAIL(&mq->mq_head, m, m_stailqpkt); mq->mq_len++; return (0); } static inline struct mbuf * mbufq_dequeue(struct mbufq *mq) { struct mbuf *m; m = STAILQ_FIRST(&mq->mq_head); if (m) { STAILQ_REMOVE_HEAD(&mq->mq_head, m_stailqpkt); m->m_nextpkt = NULL; mq->mq_len--; } return (m); } static inline void mbufq_prepend(struct mbufq *mq, struct mbuf *m) { STAILQ_INSERT_HEAD(&mq->mq_head, m, m_stailqpkt); mq->mq_len++; } /* * Note: this doesn't enforce the maximum list size for dst. */ static inline void mbufq_concat(struct mbufq *mq_dst, struct mbufq *mq_src) { mq_dst->mq_len += mq_src->mq_len; STAILQ_CONCAT(&mq_dst->mq_head, &mq_src->mq_head); mq_src->mq_len = 0; } +#ifdef _SYS_TIMESPEC_H_ +static inline void +mbuf_tstmp2timespec(struct mbuf *m, struct timespec *ts) +{ + + KASSERT((m->m_flags & M_PKTHDR) != 0, ("mbuf %p no M_PKTHDR", m)); + KASSERT((m->m_flags & M_TSTMP) != 0, ("mbuf %p no M_TSTMP", m)); + ts->tv_sec = m->m_pkthdr.rcv_tstmp / 1000000000; + ts->tv_nsec = m->m_pkthdr.rcv_tstmp % 1000000000; +} +#endif + #endif /* _KERNEL */ #endif /* !_SYS_MBUF_H_ */