diff --git a/sbin/dhclient/bpf.c b/sbin/dhclient/bpf.c index 4a753fd27595..a50abca62fd3 100644 --- a/sbin/dhclient/bpf.c +++ b/sbin/dhclient/bpf.c @@ -1,489 +1,497 @@ /* $OpenBSD: bpf.c,v 1.13 2004/05/05 14:28:58 deraadt Exp $ */ /* BPF socket interface code, originally contributed by Archie Cobbs. */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1995, 1996, 1998, 1999 * The Internet Software Consortium. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of The Internet Software Consortium nor the names * of its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE INTERNET SOFTWARE CONSORTIUM AND * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE INTERNET SOFTWARE CONSORTIUM OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * This software has been written for the Internet Software Consortium * by Ted Lemon in cooperation with Vixie * Enterprises. To learn more about the Internet Software Consortium, * see ``http://www.vix.com/isc''. To learn more about Vixie * Enterprises, see ``http://www.vix.com''. */ #include __FBSDID("$FreeBSD$"); #include "dhcpd.h" #include "privsep.h" #include #include #include #include #include #include #include #include #include #define BPF_FORMAT "/dev/bpf%d" /* * Called by get_interface_list for each interface that's discovered. * Opens a packet filter for each interface and adds it to the select * mask. */ int if_register_bpf(struct interface_info *info, int flags) { char filename[50]; int sock, b; /* Open a BPF device */ for (b = 0;; b++) { snprintf(filename, sizeof(filename), BPF_FORMAT, b); sock = open(filename, flags); if (sock < 0) { if (errno == EBUSY) continue; else error("Can't find free bpf: %m"); } else break; } /* Set the BPF device to point at this interface. */ if (ioctl(sock, BIOCSETIF, info->ifp) < 0) error("Can't attach interface %s to bpf device %s: %m", info->name, filename); + /* Tag the packets with the proper VLAN PCP setting. */ + if (info->client->config->vlan_pcp != 0) { + if (ioctl(sock, BIOCSETVLANPCP, + &info->client->config->vlan_pcp) < 0) + error( "Can't set the VLAN PCP tag on interface %s: %m", + info->name); + } + return (sock); } /* * Packet write filter program: * 'ip and udp and src port bootps and dst port (bootps or bootpc)' */ static struct bpf_insn dhcp_bpf_wfilter[] = { BPF_STMT(BPF_LD + BPF_B + BPF_IND, 14), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, (IPVERSION << 4) + 5, 0, 12), /* Make sure this is an IP packet... */ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 12), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 0, 10), /* Make sure it's a UDP packet... */ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, 23), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 8), /* Make sure this isn't a fragment... */ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 20), BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, 0x1fff, 6, 0), /* patched */ /* Get the IP header length... */ BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, 14), /* Make sure it's from the right port... */ BPF_STMT(BPF_LD + BPF_H + BPF_IND, 14), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 68, 0, 3), /* Make sure it is to the right ports ... */ BPF_STMT(BPF_LD + BPF_H + BPF_IND, 16), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 67, 0, 1), /* If we passed all the tests, ask for the whole packet. */ BPF_STMT(BPF_RET+BPF_K, (u_int)-1), /* Otherwise, drop it. */ BPF_STMT(BPF_RET+BPF_K, 0), }; static int dhcp_bpf_wfilter_len = nitems(dhcp_bpf_wfilter); void if_register_send(struct interface_info *info) { cap_rights_t rights; struct bpf_version v; struct bpf_program p; int sock, on = 1; /* Open a BPF device and hang it on this interface... */ info->wfdesc = if_register_bpf(info, O_WRONLY); /* Make sure the BPF version is in range... */ if (ioctl(info->wfdesc, BIOCVERSION, &v) < 0) error("Can't get BPF version: %m"); if (v.bv_major != BPF_MAJOR_VERSION || v.bv_minor < BPF_MINOR_VERSION) error("Kernel BPF version out of range - recompile dhcpd!"); /* Set up the bpf write filter program structure. */ p.bf_len = dhcp_bpf_wfilter_len; p.bf_insns = dhcp_bpf_wfilter; if (dhcp_bpf_wfilter[7].k == 0x1fff) dhcp_bpf_wfilter[7].k = htons(IP_MF|IP_OFFMASK); if (ioctl(info->wfdesc, BIOCSETWF, &p) < 0) error("Can't install write filter program: %m"); if (ioctl(info->wfdesc, BIOCLOCK, NULL) < 0) error("Cannot lock bpf"); cap_rights_init(&rights, CAP_WRITE); if (caph_rights_limit(info->wfdesc, &rights) < 0) error("Can't limit bpf descriptor: %m"); /* * Use raw socket for unicast send. */ if ((sock = socket(AF_INET, SOCK_RAW, IPPROTO_UDP)) == -1) error("socket(SOCK_RAW): %m"); if (setsockopt(sock, IPPROTO_IP, IP_HDRINCL, &on, sizeof(on)) == -1) error("setsockopt(IP_HDRINCL): %m"); info->ufdesc = sock; } /* * Packet filter program... * * XXX: Changes to the filter program may require changes to the * constant offsets used in if_register_send to patch the BPF program! */ static struct bpf_insn dhcp_bpf_filter[] = { /* Make sure this is an IP packet... */ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 12), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 0, 8), /* Make sure it's a UDP packet... */ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, 23), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 6), /* Make sure this isn't a fragment... */ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 20), BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, 0x1fff, 4, 0), /* Get the IP header length... */ BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, 14), /* Make sure it's to the right port... */ BPF_STMT(BPF_LD + BPF_H + BPF_IND, 16), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 67, 0, 1), /* patch */ /* If we passed all the tests, ask for the whole packet. */ BPF_STMT(BPF_RET+BPF_K, (u_int)-1), /* Otherwise, drop it. */ BPF_STMT(BPF_RET+BPF_K, 0), }; static int dhcp_bpf_filter_len = nitems(dhcp_bpf_filter); void if_register_receive(struct interface_info *info) { static const unsigned long cmds[2] = { SIOCGIFFLAGS, SIOCGIFMEDIA }; cap_rights_t rights; struct bpf_version v; struct bpf_program p; int flag = 1, sz; /* Open a BPF device and hang it on this interface... */ info->rfdesc = if_register_bpf(info, O_RDONLY); /* Make sure the BPF version is in range... */ if (ioctl(info->rfdesc, BIOCVERSION, &v) < 0) error("Can't get BPF version: %m"); if (v.bv_major != BPF_MAJOR_VERSION || v.bv_minor < BPF_MINOR_VERSION) error("Kernel BPF version out of range - recompile dhcpd!"); /* * Set immediate mode so that reads return as soon as a packet * comes in, rather than waiting for the input buffer to fill * with packets. */ if (ioctl(info->rfdesc, BIOCIMMEDIATE, &flag) < 0) error("Can't set immediate mode on bpf device: %m"); /* Get the required BPF buffer length from the kernel. */ if (ioctl(info->rfdesc, BIOCGBLEN, &sz) < 0) error("Can't get bpf buffer length: %m"); info->rbuf_max = sz; info->rbuf = malloc(info->rbuf_max); if (!info->rbuf) error("Can't allocate %lu bytes for bpf input buffer.", (unsigned long)info->rbuf_max); info->rbuf_offset = 0; info->rbuf_len = 0; /* Set up the bpf filter program structure. */ p.bf_len = dhcp_bpf_filter_len; p.bf_insns = dhcp_bpf_filter; /* Patch the server port into the BPF program... * * XXX: changes to filter program may require changes to the * insn number(s) used below! */ dhcp_bpf_filter[8].k = LOCAL_PORT; if (ioctl(info->rfdesc, BIOCSETF, &p) < 0) error("Can't install packet filter program: %m"); if (ioctl(info->rfdesc, BIOCLOCK, NULL) < 0) error("Cannot lock bpf"); cap_rights_init(&rights, CAP_IOCTL, CAP_EVENT, CAP_READ); if (caph_rights_limit(info->rfdesc, &rights) < 0) error("Can't limit bpf descriptor: %m"); if (caph_ioctls_limit(info->rfdesc, cmds, 2) < 0) error("Can't limit ioctls for bpf descriptor: %m"); } void send_packet_unpriv(int privfd, struct dhcp_packet *raw, size_t len, struct in_addr from, struct in_addr to) { struct imsg_hdr hdr; struct buf *buf; int errs; hdr.code = IMSG_SEND_PACKET; hdr.len = sizeof(hdr) + sizeof(size_t) + len + sizeof(from) + sizeof(to); if ((buf = buf_open(hdr.len)) == NULL) error("buf_open: %m"); errs = 0; errs += buf_add(buf, &hdr, sizeof(hdr)); errs += buf_add(buf, &len, sizeof(len)); errs += buf_add(buf, raw, len); errs += buf_add(buf, &from, sizeof(from)); errs += buf_add(buf, &to, sizeof(to)); if (errs) error("buf_add: %m"); if (buf_close(privfd, buf) == -1) error("buf_close: %m"); } void send_packet_priv(struct interface_info *interface, struct imsg_hdr *hdr, int fd) { unsigned char buf[256]; struct iovec iov[2]; struct msghdr msg; struct dhcp_packet raw; size_t len; struct in_addr from, to; int result, bufp = 0; if (hdr->len < sizeof(*hdr) + sizeof(size_t)) error("corrupted message received"); buf_read(fd, &len, sizeof(len)); if (hdr->len != sizeof(*hdr) + sizeof(size_t) + len + sizeof(from) + sizeof(to)) { error("corrupted message received"); } if (len > sizeof(raw)) error("corrupted message received"); buf_read(fd, &raw, len); buf_read(fd, &from, sizeof(from)); buf_read(fd, &to, sizeof(to)); /* Assemble the headers... */ if (to.s_addr == INADDR_BROADCAST) assemble_hw_header(interface, buf, &bufp); assemble_udp_ip_header(buf, &bufp, from.s_addr, to.s_addr, htons(REMOTE_PORT), (unsigned char *)&raw, len); iov[0].iov_base = buf; iov[0].iov_len = bufp; iov[1].iov_base = &raw; iov[1].iov_len = len; /* Fire it off */ if (to.s_addr == INADDR_BROADCAST) result = writev(interface->wfdesc, iov, 2); else { struct sockaddr_in sato; sato.sin_addr = to; sato.sin_port = htons(REMOTE_PORT); sato.sin_family = AF_INET; sato.sin_len = sizeof(sato); memset(&msg, 0, sizeof(msg)); msg.msg_name = (struct sockaddr *)&sato; msg.msg_namelen = sizeof(sato); msg.msg_iov = iov; msg.msg_iovlen = 2; result = sendmsg(interface->ufdesc, &msg, 0); } if (result < 0) warning("send_packet: %m"); } ssize_t receive_packet(struct interface_info *interface, unsigned char *buf, size_t len, struct sockaddr_in *from, struct hardware *hfrom) { int length = 0, offset = 0; struct bpf_hdr hdr; /* * All this complexity is because BPF doesn't guarantee that * only one packet will be returned at a time. We're getting * what we deserve, though - this is a terrible abuse of the BPF * interface. Sigh. */ /* Process packets until we get one we can return or until we've * done a read and gotten nothing we can return... */ do { /* If the buffer is empty, fill it. */ if (interface->rbuf_offset >= interface->rbuf_len) { length = read(interface->rfdesc, interface->rbuf, interface->rbuf_max); if (length <= 0) return (length); interface->rbuf_offset = 0; interface->rbuf_len = length; } /* * If there isn't room for a whole bpf header, something * went wrong, but we'll ignore it and hope it goes * away... XXX */ if (interface->rbuf_len - interface->rbuf_offset < sizeof(hdr)) { interface->rbuf_offset = interface->rbuf_len; continue; } /* Copy out a bpf header... */ memcpy(&hdr, &interface->rbuf[interface->rbuf_offset], sizeof(hdr)); /* * If the bpf header plus data doesn't fit in what's * left of the buffer, stick head in sand yet again... */ if (interface->rbuf_offset + hdr.bh_hdrlen + hdr.bh_caplen > interface->rbuf_len) { interface->rbuf_offset = interface->rbuf_len; continue; } /* Skip over the BPF header... */ interface->rbuf_offset += hdr.bh_hdrlen; /* * If the captured data wasn't the whole packet, or if * the packet won't fit in the input buffer, all we can * do is drop it. */ if (hdr.bh_caplen != hdr.bh_datalen) { interface->rbuf_offset = BPF_WORDALIGN(interface->rbuf_offset + hdr.bh_caplen); continue; } /* Decode the physical header... */ offset = decode_hw_header(interface->rbuf, interface->rbuf_offset, hfrom); /* * If a physical layer checksum failed (dunno of any * physical layer that supports this, but WTH), skip * this packet. */ if (offset < 0) { interface->rbuf_offset = BPF_WORDALIGN(interface->rbuf_offset + hdr.bh_caplen); continue; } interface->rbuf_offset += offset; hdr.bh_caplen -= offset; /* Decode the IP and UDP headers... */ offset = decode_udp_ip_header(interface->rbuf, interface->rbuf_offset, from, NULL, hdr.bh_caplen); /* If the IP or UDP checksum was bad, skip the packet... */ if (offset < 0) { interface->rbuf_offset = BPF_WORDALIGN(interface->rbuf_offset + hdr.bh_caplen); continue; } interface->rbuf_offset += offset; hdr.bh_caplen -= offset; /* * If there's not enough room to stash the packet data, * we have to skip it (this shouldn't happen in real * life, though). */ if (hdr.bh_caplen > len) { interface->rbuf_offset = BPF_WORDALIGN(interface->rbuf_offset + hdr.bh_caplen); continue; } /* Copy out the data in the packet... */ memcpy(buf, interface->rbuf + interface->rbuf_offset, hdr.bh_caplen); interface->rbuf_offset = BPF_WORDALIGN(interface->rbuf_offset + hdr.bh_caplen); return (hdr.bh_caplen); } while (!length); return (0); } diff --git a/sbin/dhclient/clparse.c b/sbin/dhclient/clparse.c index 66695fd6b8f0..58ece525f8d1 100644 --- a/sbin/dhclient/clparse.c +++ b/sbin/dhclient/clparse.c @@ -1,958 +1,964 @@ /* $OpenBSD: clparse.c,v 1.18 2004/09/15 18:15:18 henning Exp $ */ /* Parser for dhclient config and lease files... */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1997 The Internet Software Consortium. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of The Internet Software Consortium nor the names * of its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE INTERNET SOFTWARE CONSORTIUM AND * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE INTERNET SOFTWARE CONSORTIUM OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * This software has been written for the Internet Software Consortium * by Ted Lemon in cooperation with Vixie * Enterprises. To learn more about the Internet Software Consortium, * see ``http://www.vix.com/isc''. To learn more about Vixie * Enterprises, see ``http://www.vix.com''. */ #include __FBSDID("$FreeBSD$"); #include "dhcpd.h" #include "dhctoken.h" struct client_config top_level_config; static struct interface_info *dummy_interfaces; static char client_script_name[] = "/sbin/dhclient-script"; /* * client-conf-file :== client-declarations EOF * client-declarations :== * | client-declaration * | client-declarations client-declaration */ int read_client_conf(void) { FILE *cfile; char *val; int token; struct client_config *config; new_parse(path_dhclient_conf); /* Set up the initial dhcp option universe. */ initialize_universes(); /* Initialize the top level client configuration. */ memset(&top_level_config, 0, sizeof(top_level_config)); /* Set some defaults... */ + top_level_config.vlan_pcp = 0; top_level_config.timeout = 60; top_level_config.select_interval = 0; top_level_config.reboot_timeout = 10; top_level_config.retry_interval = 300; top_level_config.backoff_cutoff = 15; top_level_config.initial_interval = 3; top_level_config.bootp_policy = ACCEPT; top_level_config.script_name = client_script_name; top_level_config.requested_options [top_level_config.requested_option_count++] = DHO_SUBNET_MASK; top_level_config.requested_options [top_level_config.requested_option_count++] = DHO_BROADCAST_ADDRESS; top_level_config.requested_options [top_level_config.requested_option_count++] = DHO_TIME_OFFSET; top_level_config.requested_options [top_level_config.requested_option_count++] = DHO_CLASSLESS_ROUTES; top_level_config.requested_options [top_level_config.requested_option_count++] = DHO_ROUTERS; top_level_config.requested_options [top_level_config.requested_option_count++] = DHO_DOMAIN_NAME; top_level_config.requested_options [top_level_config.requested_option_count++] = DHO_DOMAIN_NAME_SERVERS; top_level_config.requested_options [top_level_config.requested_option_count++] = DHO_HOST_NAME; top_level_config.requested_options [top_level_config.requested_option_count++] = DHO_DOMAIN_SEARCH; top_level_config.requested_options [top_level_config.requested_option_count++] = DHO_INTERFACE_MTU; if ((cfile = fopen(path_dhclient_conf, "r")) != NULL) { do { token = peek_token(&val, cfile); if (token == EOF) break; parse_client_statement(cfile, NULL, &top_level_config); } while (1); token = next_token(&val, cfile); /* Clear the peek buffer */ fclose(cfile); } /* * Set up state and config structures for clients that don't * have per-interface configuration declarations. */ config = NULL; if (!ifi->client) { ifi->client = malloc(sizeof(struct client_state)); if (!ifi->client) error("no memory for client state."); memset(ifi->client, 0, sizeof(*(ifi->client))); } if (!ifi->client->config) { if (!config) { config = malloc(sizeof(struct client_config)); if (!config) error("no memory for client config."); memcpy(config, &top_level_config, sizeof(top_level_config)); } ifi->client->config = config; } return (!warnings_occurred); } /* * lease-file :== client-lease-statements EOF * client-lease-statements :== * | client-lease-statements LEASE client-lease-statement */ void read_client_leases(void) { FILE *cfile; char *val; int token; new_parse(path_dhclient_db); /* Open the lease file. If we can't open it, just return - we can safely trust the server to remember our state. */ if ((cfile = fopen(path_dhclient_db, "r")) == NULL) return; do { token = next_token(&val, cfile); if (token == EOF) break; if (token != LEASE) { warning("Corrupt lease file - possible data loss!"); skip_to_semi(cfile); break; } else parse_client_lease_statement(cfile, 0); } while (1); fclose(cfile); } /* * client-declaration :== * SEND option-decl | * DEFAULT option-decl | * SUPERSEDE option-decl | * PREPEND option-decl | * APPEND option-decl | * hardware-declaration | * REQUEST option-list | * REQUIRE option-list | * TIMEOUT number | * RETRY number | * REBOOT number | * SELECT_TIMEOUT number | * SCRIPT string | * interface-declaration | * LEASE client-lease-statement | * ALIAS client-lease-statement */ void parse_client_statement(FILE *cfile, struct interface_info *ip, struct client_config *config) { int token; char *val; struct option *option; + time_t tmp; switch (next_token(&val, cfile)) { case SEND: parse_option_decl(cfile, &config->send_options[0]); return; case DEFAULT: option = parse_option_decl(cfile, &config->defaults[0]); if (option) config->default_actions[option->code] = ACTION_DEFAULT; return; case SUPERSEDE: option = parse_option_decl(cfile, &config->defaults[0]); if (option) config->default_actions[option->code] = ACTION_SUPERSEDE; return; case APPEND: option = parse_option_decl(cfile, &config->defaults[0]); if (option) config->default_actions[option->code] = ACTION_APPEND; return; case PREPEND: option = parse_option_decl(cfile, &config->defaults[0]); if (option) config->default_actions[option->code] = ACTION_PREPEND; return; case MEDIA: parse_string_list(cfile, &config->media, 1); return; case HARDWARE: if (ip) parse_hardware_param(cfile, &ip->hw_address); else { parse_warn("hardware address parameter %s", "not allowed here."); skip_to_semi(cfile); } return; case REQUEST: config->requested_option_count = parse_option_list(cfile, config->requested_options); return; case REQUIRE: memset(config->required_options, 0, sizeof(config->required_options)); parse_option_list(cfile, config->required_options); return; case TIMEOUT: parse_lease_time(cfile, &config->timeout); return; case RETRY: parse_lease_time(cfile, &config->retry_interval); return; case SELECT_TIMEOUT: parse_lease_time(cfile, &config->select_interval); return; case REBOOT: parse_lease_time(cfile, &config->reboot_timeout); return; + case VLAN_PCP: + parse_lease_time(cfile, &tmp); + config->vlan_pcp = (u_int)tmp; + return; case BACKOFF_CUTOFF: parse_lease_time(cfile, &config->backoff_cutoff); return; case INITIAL_INTERVAL: parse_lease_time(cfile, &config->initial_interval); return; case SCRIPT: config->script_name = parse_string(cfile); return; case INTERFACE: if (ip) parse_warn("nested interface declaration."); parse_interface_declaration(cfile, config); return; case LEASE: parse_client_lease_statement(cfile, 1); return; case ALIAS: parse_client_lease_statement(cfile, 2); return; case REJECT: parse_reject_statement(cfile, config); return; default: parse_warn("expecting a statement."); skip_to_semi(cfile); break; } token = next_token(&val, cfile); if (token != SEMI) { parse_warn("semicolon expected."); skip_to_semi(cfile); } } unsigned parse_X(FILE *cfile, u_int8_t *buf, unsigned max) { int token; char *val; unsigned len; token = peek_token(&val, cfile); if (token == NUMBER_OR_NAME || token == NUMBER) { len = 0; do { token = next_token(&val, cfile); if (token != NUMBER && token != NUMBER_OR_NAME) { parse_warn("expecting hexadecimal constant."); skip_to_semi(cfile); return (0); } convert_num(&buf[len], val, 16, 8); if (len++ > max) { parse_warn("hexadecimal constant too long."); skip_to_semi(cfile); return (0); } token = peek_token(&val, cfile); if (token == COLON) token = next_token(&val, cfile); } while (token == COLON); val = (char *)buf; } else if (token == STRING) { token = next_token(&val, cfile); len = strlen(val); if (len + 1 > max) { parse_warn("string constant too long."); skip_to_semi(cfile); return (0); } memcpy(buf, val, len + 1); } else { parse_warn("expecting string or hexadecimal data"); skip_to_semi(cfile); return (0); } return (len); } /* * option-list :== option_name | * option_list COMMA option_name */ int parse_option_list(FILE *cfile, u_int8_t *list) { int ix, i; int token; char *val; ix = 0; do { token = next_token(&val, cfile); if (!is_identifier(token)) { parse_warn("expected option name."); skip_to_semi(cfile); return (0); } for (i = 0; i < 256; i++) if (!strcasecmp(dhcp_options[i].name, val)) break; if (i == 256) { parse_warn("%s: unexpected option name.", val); skip_to_semi(cfile); return (0); } list[ix++] = i; if (ix == 256) { parse_warn("%s: too many options.", val); skip_to_semi(cfile); return (0); } token = next_token(&val, cfile); } while (token == COMMA); if (token != SEMI) { parse_warn("expecting semicolon."); skip_to_semi(cfile); return (0); } return (ix); } /* * interface-declaration :== * INTERFACE string LBRACE client-declarations RBRACE */ void parse_interface_declaration(FILE *cfile, struct client_config *outer_config) { int token; char *val; struct interface_info *ip; token = next_token(&val, cfile); if (token != STRING) { parse_warn("expecting interface name (in quotes)."); skip_to_semi(cfile); return; } ip = interface_or_dummy(val); if (!ip->client) make_client_state(ip); if (!ip->client->config) make_client_config(ip, outer_config); token = next_token(&val, cfile); if (token != LBRACE) { parse_warn("expecting left brace."); skip_to_semi(cfile); return; } do { token = peek_token(&val, cfile); if (token == EOF) { parse_warn("unterminated interface declaration."); return; } if (token == RBRACE) break; parse_client_statement(cfile, ip, ip->client->config); } while (1); token = next_token(&val, cfile); } struct interface_info * interface_or_dummy(char *name) { struct interface_info *ip; /* Find the interface (if any) that matches the name. */ if (!strcmp(ifi->name, name)) return (ifi); /* If it's not a real interface, see if it's on the dummy list. */ for (ip = dummy_interfaces; ip; ip = ip->next) if (!strcmp(ip->name, name)) return (ip); /* * If we didn't find an interface, make a dummy interface as a * placeholder. */ ip = malloc(sizeof(*ip)); if (!ip) error("Insufficient memory to record interface %s", name); memset(ip, 0, sizeof(*ip)); strlcpy(ip->name, name, IFNAMSIZ); ip->next = dummy_interfaces; dummy_interfaces = ip; return (ip); } void make_client_state(struct interface_info *ip) { ip->client = malloc(sizeof(*(ip->client))); if (!ip->client) error("no memory for state on %s", ip->name); memset(ip->client, 0, sizeof(*(ip->client))); } void make_client_config(struct interface_info *ip, struct client_config *config) { ip->client->config = malloc(sizeof(struct client_config)); if (!ip->client->config) error("no memory for config for %s", ip->name); memset(ip->client->config, 0, sizeof(*(ip->client->config))); memcpy(ip->client->config, config, sizeof(*config)); } /* * client-lease-statement :== * RBRACE client-lease-declarations LBRACE * * client-lease-declarations :== * | * client-lease-declaration | * client-lease-declarations client-lease-declaration */ void parse_client_lease_statement(FILE *cfile, int is_static) { struct client_lease *lease, *lp, *pl; struct interface_info *ip; int token; char *val; token = next_token(&val, cfile); if (token != LBRACE) { parse_warn("expecting left brace."); skip_to_semi(cfile); return; } lease = malloc(sizeof(struct client_lease)); if (!lease) error("no memory for lease."); memset(lease, 0, sizeof(*lease)); lease->is_static = is_static; ip = NULL; do { token = peek_token(&val, cfile); if (token == EOF) { parse_warn("unterminated lease declaration."); free_client_lease(lease); return; } if (token == RBRACE) break; parse_client_lease_declaration(cfile, lease, &ip); } while (1); token = next_token(&val, cfile); /* If the lease declaration didn't include an interface * declaration that we recognized, it's of no use to us. */ if (!ip) { free_client_lease(lease); return; } /* Make sure there's a client state structure... */ if (!ip->client) make_client_state(ip); /* If this is an alias lease, it doesn't need to be sorted in. */ if (is_static == 2) { ip->client->alias = lease; return; } /* * The new lease may supersede a lease that's not the active * lease but is still on the lease list, so scan the lease list * looking for a lease with the same address, and if we find it, * toss it. */ pl = NULL; for (lp = ip->client->leases; lp; lp = lp->next) { if (lp->address.len == lease->address.len && !memcmp(lp->address.iabuf, lease->address.iabuf, lease->address.len)) { if (pl) pl->next = lp->next; else ip->client->leases = lp->next; free_client_lease(lp); break; } } /* * If this is a preloaded lease, just put it on the list of * recorded leases - don't make it the active lease. */ if (is_static) { lease->next = ip->client->leases; ip->client->leases = lease; return; } /* * The last lease in the lease file on a particular interface is * the active lease for that interface. Of course, we don't * know what the last lease in the file is until we've parsed * the whole file, so at this point, we assume that the lease we * just parsed is the active lease for its interface. If * there's already an active lease for the interface, and this * lease is for the same ip address, then we just toss the old * active lease and replace it with this one. If this lease is * for a different address, then if the old active lease has * expired, we dump it; if not, we put it on the list of leases * for this interface which are still valid but no longer * active. */ if (ip->client->active) { if (ip->client->active->expiry < cur_time) free_client_lease(ip->client->active); else if (ip->client->active->address.len == lease->address.len && !memcmp(ip->client->active->address.iabuf, lease->address.iabuf, lease->address.len)) free_client_lease(ip->client->active); else { ip->client->active->next = ip->client->leases; ip->client->leases = ip->client->active; } } ip->client->active = lease; /* Phew. */ } /* * client-lease-declaration :== * BOOTP | * INTERFACE string | * FIXED_ADDR ip_address | * FILENAME string | * SERVER_NAME string | * OPTION option-decl | * RENEW time-decl | * REBIND time-decl | * EXPIRE time-decl */ void parse_client_lease_declaration(FILE *cfile, struct client_lease *lease, struct interface_info **ipp) { int token; char *val; struct interface_info *ip; switch (next_token(&val, cfile)) { case BOOTP: lease->is_bootp = 1; break; case INTERFACE: token = next_token(&val, cfile); if (token != STRING) { parse_warn("expecting interface name (in quotes)."); skip_to_semi(cfile); break; } ip = interface_or_dummy(val); *ipp = ip; break; case FIXED_ADDR: if (!parse_ip_addr(cfile, &lease->address)) return; break; case MEDIUM: parse_string_list(cfile, &lease->medium, 0); return; case FILENAME: lease->filename = parse_string(cfile); return; case NEXT_SERVER: if (!parse_ip_addr(cfile, &lease->nextserver)) return; break; case SERVER_NAME: lease->server_name = parse_string(cfile); return; case RENEW: lease->renewal = parse_date(cfile); return; case REBIND: lease->rebind = parse_date(cfile); return; case EXPIRE: lease->expiry = parse_date(cfile); return; case OPTION: parse_option_decl(cfile, lease->options); return; default: parse_warn("expecting lease declaration."); skip_to_semi(cfile); break; } token = next_token(&val, cfile); if (token != SEMI) { parse_warn("expecting semicolon."); skip_to_semi(cfile); } } struct option * parse_option_decl(FILE *cfile, struct option_data *options) { char *val; int token; u_int8_t buf[4]; u_int8_t hunkbuf[1024]; unsigned hunkix = 0; char *vendor; const char *fmt; struct universe *universe; struct option *option; struct iaddr ip_addr; u_int8_t *dp; unsigned len; int nul_term = 0; token = next_token(&val, cfile); if (!is_identifier(token)) { parse_warn("expecting identifier after option keyword."); if (token != SEMI) skip_to_semi(cfile); return (NULL); } if ((vendor = strdup(val)) == NULL) error("no memory for vendor information."); token = peek_token(&val, cfile); if (token == DOT) { /* Go ahead and take the DOT token... */ token = next_token(&val, cfile); /* The next token should be an identifier... */ token = next_token(&val, cfile); if (!is_identifier(token)) { parse_warn("expecting identifier after '.'"); if (token != SEMI) skip_to_semi(cfile); free(vendor); return (NULL); } /* Look up the option name hash table for the specified vendor. */ universe = ((struct universe *)hash_lookup(&universe_hash, (unsigned char *)vendor, 0)); /* If it's not there, we can't parse the rest of the declaration. */ if (!universe) { parse_warn("no vendor named %s.", vendor); skip_to_semi(cfile); free(vendor); return (NULL); } } else { /* Use the default hash table, which contains all the standard dhcp option names. */ val = vendor; universe = &dhcp_universe; } /* Look up the actual option info... */ option = (struct option *)hash_lookup(universe->hash, (unsigned char *)val, 0); /* If we didn't get an option structure, it's an undefined option. */ if (!option) { if (val == vendor) parse_warn("no option named %s", val); else parse_warn("no option named %s for vendor %s", val, vendor); skip_to_semi(cfile); free(vendor); return (NULL); } /* Free the initial identifier token. */ free(vendor); /* Parse the option data... */ do { for (fmt = option->format; *fmt; fmt++) { if (*fmt == 'A') break; switch (*fmt) { case 'X': len = parse_X(cfile, &hunkbuf[hunkix], sizeof(hunkbuf) - hunkix); hunkix += len; break; case 't': /* Text string... */ token = next_token(&val, cfile); if (token != STRING) { parse_warn("expecting string."); skip_to_semi(cfile); return (NULL); } len = strlen(val); if (hunkix + len + 1 > sizeof(hunkbuf)) { parse_warn("option data buffer %s", "overflow"); skip_to_semi(cfile); return (NULL); } memcpy(&hunkbuf[hunkix], val, len + 1); nul_term = 1; hunkix += len; break; case 'I': /* IP address. */ if (!parse_ip_addr(cfile, &ip_addr)) return (NULL); len = ip_addr.len; dp = ip_addr.iabuf; alloc: if (hunkix + len > sizeof(hunkbuf)) { parse_warn("option data buffer " "overflow"); skip_to_semi(cfile); return (NULL); } memcpy(&hunkbuf[hunkix], dp, len); hunkix += len; break; case 'L': /* Unsigned 32-bit integer... */ case 'l': /* Signed 32-bit integer... */ token = next_token(&val, cfile); if (token != NUMBER) { need_number: parse_warn("expecting number."); if (token != SEMI) skip_to_semi(cfile); return (NULL); } convert_num(buf, val, 0, 32); len = 4; dp = buf; goto alloc; case 's': /* Signed 16-bit integer. */ case 'S': /* Unsigned 16-bit integer. */ token = next_token(&val, cfile); if (token != NUMBER) goto need_number; convert_num(buf, val, 0, 16); len = 2; dp = buf; goto alloc; case 'b': /* Signed 8-bit integer. */ case 'B': /* Unsigned 8-bit integer. */ token = next_token(&val, cfile); if (token != NUMBER) goto need_number; convert_num(buf, val, 0, 8); len = 1; dp = buf; goto alloc; case 'f': /* Boolean flag. */ token = next_token(&val, cfile); if (!is_identifier(token)) { parse_warn("expecting identifier."); bad_flag: if (token != SEMI) skip_to_semi(cfile); return (NULL); } if (!strcasecmp(val, "true") || !strcasecmp(val, "on")) buf[0] = 1; else if (!strcasecmp(val, "false") || !strcasecmp(val, "off")) buf[0] = 0; else { parse_warn("expecting boolean."); goto bad_flag; } len = 1; dp = buf; goto alloc; default: warning("Bad format %c in parse_option_param.", *fmt); skip_to_semi(cfile); return (NULL); } } token = next_token(&val, cfile); } while (*fmt == 'A' && token == COMMA); if (token != SEMI) { parse_warn("semicolon expected."); skip_to_semi(cfile); return (NULL); } options[option->code].data = malloc(hunkix + nul_term); if (!options[option->code].data) error("out of memory allocating option data."); memcpy(options[option->code].data, hunkbuf, hunkix + nul_term); options[option->code].len = hunkix; return (option); } void parse_string_list(FILE *cfile, struct string_list **lp, int multiple) { int token; char *val; size_t valsize; struct string_list *cur, *tmp; /* Find the last medium in the media list. */ if (*lp) for (cur = *lp; cur->next; cur = cur->next) ; /* nothing */ else cur = NULL; do { token = next_token(&val, cfile); if (token != STRING) { parse_warn("Expecting media options."); skip_to_semi(cfile); return; } valsize = strlen(val) + 1; tmp = new_string_list(valsize); if (tmp == NULL) error("no memory for string list entry."); memcpy(tmp->string, val, valsize); tmp->next = NULL; /* Store this medium at the end of the media list. */ if (cur) cur->next = tmp; else *lp = tmp; cur = tmp; token = next_token(&val, cfile); } while (multiple && token == COMMA); if (token != SEMI) { parse_warn("expecting semicolon."); skip_to_semi(cfile); } } void parse_reject_statement(FILE *cfile, struct client_config *config) { int token; char *val; struct iaddr addr; struct iaddrlist *list; do { if (!parse_ip_addr(cfile, &addr)) { parse_warn("expecting IP address."); skip_to_semi(cfile); return; } list = malloc(sizeof(struct iaddrlist)); if (!list) error("no memory for reject list!"); list->addr = addr; list->next = config->reject_list; config->reject_list = list; token = next_token(&val, cfile); } while (token == COMMA); if (token != SEMI) { parse_warn("expecting semicolon."); skip_to_semi(cfile); } } diff --git a/sbin/dhclient/conflex.c b/sbin/dhclient/conflex.c index 66fc9e397125..c11c9189527e 100644 --- a/sbin/dhclient/conflex.c +++ b/sbin/dhclient/conflex.c @@ -1,534 +1,536 @@ /* $OpenBSD: conflex.c,v 1.7 2004/09/15 19:02:38 deraadt Exp $ */ /* Lexical scanner for dhcpd config file... */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1995, 1996, 1997 The Internet Software Consortium. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of The Internet Software Consortium nor the names * of its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE INTERNET SOFTWARE CONSORTIUM AND * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE INTERNET SOFTWARE CONSORTIUM OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * This software has been written for the Internet Software Consortium * by Ted Lemon in cooperation with Vixie * Enterprises. To learn more about the Internet Software Consortium, * see ``http://www.vix.com/isc''. To learn more about Vixie * Enterprises, see ``http://www.vix.com''. */ #include __FBSDID("$FreeBSD$"); #include #include "dhcpd.h" #include "dhctoken.h" int lexline; int lexchar; char *token_line; static char *prev_line; static char *cur_line; const char *tlname; int eol_token; static char line1[81]; static char line2[81]; static unsigned lpos; static unsigned line; static int tlpos; static int tline; static int token; static int ugflag; static char *tval; static char tokbuf[1500]; static int get_char(FILE *); static int get_token(FILE *); static void skip_to_eol(FILE *); static int read_string(FILE *); static int read_number(int, FILE *); static int read_num_or_name(int, FILE *); static int intern(char *, int); void new_parse(const char *name) { tlname = name; lpos = line = 1; cur_line = line1; prev_line = line2; token_line = cur_line; cur_line[0] = prev_line[0] = 0; warnings_occurred = 0; } static int get_char(FILE *cfile) { int c = getc(cfile); if (!ugflag) { if (c == '\n') { if (cur_line == line1) { cur_line = line2; prev_line = line1; } else { cur_line = line1; prev_line = line2; } line++; lpos = 1; cur_line[0] = 0; } else if (c != EOF) { if (lpos < sizeof(line1)) { cur_line[lpos - 1] = c; cur_line[lpos] = 0; } lpos++; } } else ugflag = 0; return (c); } static int get_token(FILE *cfile) { int c, ttok; static char tb[2]; int l, p; do { l = line; p = lpos; c = get_char(cfile); if (!(c == '\n' && eol_token) && isascii(c) && isspace(c)) continue; if (c == '#') { skip_to_eol(cfile); continue; } if (c == '"') { lexline = l; lexchar = p; ttok = read_string(cfile); break; } if ((isascii(c) && isdigit(c)) || c == '-') { lexline = l; lexchar = p; ttok = read_number(c, cfile); break; } else if (isascii(c) && isalpha(c)) { lexline = l; lexchar = p; ttok = read_num_or_name(c, cfile); break; } else { lexline = l; lexchar = p; tb[0] = c; tb[1] = 0; tval = tb; ttok = c; break; } } while (1); return (ttok); } int next_token(char **rval, FILE *cfile) { int rv; if (token) { if (lexline != tline) token_line = cur_line; lexchar = tlpos; lexline = tline; rv = token; token = 0; } else { rv = get_token(cfile); token_line = cur_line; } if (rval) *rval = tval; return (rv); } int peek_token(char **rval, FILE *cfile) { int x; if (!token) { tlpos = lexchar; tline = lexline; token = get_token(cfile); if (lexline != tline) token_line = prev_line; x = lexchar; lexchar = tlpos; tlpos = x; x = lexline; lexline = tline; tline = x; } if (rval) *rval = tval; return (token); } static void skip_to_eol(FILE *cfile) { int c; do { c = get_char(cfile); if (c == EOF) return; if (c == '\n') return; } while (1); } static int read_string(FILE *cfile) { int c, bs = 0; unsigned i; for (i = 0; i < sizeof(tokbuf); i++) { c = get_char(cfile); if (c == EOF) { parse_warn("eof in string constant"); break; } if (bs) { bs = 0; i--; tokbuf[i] = c; } else if (c == '\\') bs = 1; else if (c == '"') break; else tokbuf[i] = c; } /* * Normally, I'd feel guilty about this, but we're talking about * strings that'll fit in a DHCP packet here... */ if (i == sizeof(tokbuf)) { parse_warn("string constant larger than internal buffer"); i--; } tokbuf[i] = 0; tval = tokbuf; return (STRING); } static int read_number(int c, FILE *cfile) { int seenx = 0, _token = NUMBER; unsigned i = 0; tokbuf[i++] = c; for (; i < sizeof(tokbuf); i++) { c = get_char(cfile); if (!seenx && c == 'x') seenx = 1; else if (!isascii(c) || !isxdigit(c)) { ungetc(c, cfile); ugflag = 1; break; } tokbuf[i] = c; } if (i == sizeof(tokbuf)) { parse_warn("numeric token larger than internal buffer"); i--; } tokbuf[i] = 0; tval = tokbuf; return (_token); } static int read_num_or_name(int c, FILE *cfile) { unsigned i = 0; int rv = NUMBER_OR_NAME; tokbuf[i++] = c; for (; i < sizeof(tokbuf); i++) { c = get_char(cfile); if (!isascii(c) || (c != '-' && c != '_' && !isalnum(c))) { ungetc(c, cfile); ugflag = 1; break; } if (!isxdigit(c)) rv = NAME; tokbuf[i] = c; } if (i == sizeof(tokbuf)) { parse_warn("token larger than internal buffer"); i--; } tokbuf[i] = 0; tval = tokbuf; return (intern(tval, rv)); } static int intern(char *atom, int dfv) { if (!isascii(atom[0])) return (dfv); switch (tolower(atom[0])) { case 'a': if (!strcasecmp(atom + 1, "lways-reply-rfc1048")) return (ALWAYS_REPLY_RFC1048); if (!strcasecmp(atom + 1, "ppend")) return (APPEND); if (!strcasecmp(atom + 1, "llow")) return (ALLOW); if (!strcasecmp(atom + 1, "lias")) return (ALIAS); if (!strcasecmp(atom + 1, "bandoned")) return (ABANDONED); if (!strcasecmp(atom + 1, "uthoritative")) return (AUTHORITATIVE); break; case 'b': if (!strcasecmp(atom + 1, "ackoff-cutoff")) return (BACKOFF_CUTOFF); if (!strcasecmp(atom + 1, "ootp")) return (BOOTP); if (!strcasecmp(atom + 1, "ooting")) return (BOOTING); if (!strcasecmp(atom + 1, "oot-unknown-clients")) return (BOOT_UNKNOWN_CLIENTS); break; case 'c': if (!strcasecmp(atom + 1, "lass")) return (CLASS); if (!strcasecmp(atom + 1, "iaddr")) return (CIADDR); if (!strcasecmp(atom + 1, "lient-identifier")) return (CLIENT_IDENTIFIER); if (!strcasecmp(atom + 1, "lient-hostname")) return (CLIENT_HOSTNAME); break; case 'd': if (!strcasecmp(atom + 1, "omain")) return (DOMAIN); if (!strcasecmp(atom + 1, "eny")) return (DENY); if (!strncasecmp(atom + 1, "efault", 6)) { if (!atom[7]) return (DEFAULT); if (!strcasecmp(atom + 7, "-lease-time")) return (DEFAULT_LEASE_TIME); break; } if (!strncasecmp(atom + 1, "ynamic-bootp", 12)) { if (!atom[13]) return (DYNAMIC_BOOTP); if (!strcasecmp(atom + 13, "-lease-cutoff")) return (DYNAMIC_BOOTP_LEASE_CUTOFF); if (!strcasecmp(atom + 13, "-lease-length")) return (DYNAMIC_BOOTP_LEASE_LENGTH); break; } break; case 'e': if (!strcasecmp(atom + 1, "thernet")) return (ETHERNET); if (!strcasecmp(atom + 1, "nds")) return (ENDS); if (!strcasecmp(atom + 1, "xpire")) return (EXPIRE); break; case 'f': if (!strcasecmp(atom + 1, "ilename")) return (FILENAME); if (!strcasecmp(atom + 1, "ixed-address")) return (FIXED_ADDR); if (!strcasecmp(atom + 1, "ddi")) return (FDDI); break; case 'g': if (!strcasecmp(atom + 1, "iaddr")) return (GIADDR); if (!strcasecmp(atom + 1, "roup")) return (GROUP); if (!strcasecmp(atom + 1, "et-lease-hostnames")) return (GET_LEASE_HOSTNAMES); break; case 'h': if (!strcasecmp(atom + 1, "ost")) return (HOST); if (!strcasecmp(atom + 1, "ardware")) return (HARDWARE); if (!strcasecmp(atom + 1, "ostname")) return (HOSTNAME); break; case 'i': if (!strcasecmp(atom + 1, "nitial-interval")) return (INITIAL_INTERVAL); if (!strcasecmp(atom + 1, "nterface")) return (INTERFACE); break; case 'l': if (!strcasecmp(atom + 1, "ease")) return (LEASE); break; case 'm': if (!strcasecmp(atom + 1, "ax-lease-time")) return (MAX_LEASE_TIME); if (!strncasecmp(atom + 1, "edi", 3)) { if (!strcasecmp(atom + 4, "a")) return (MEDIA); if (!strcasecmp(atom + 4, "um")) return (MEDIUM); break; } break; case 'n': if (!strcasecmp(atom + 1, "ameserver")) return (NAMESERVER); if (!strcasecmp(atom + 1, "etmask")) return (NETMASK); if (!strcasecmp(atom + 1, "ext-server")) return (NEXT_SERVER); if (!strcasecmp(atom + 1, "ot")) return (TOKEN_NOT); break; case 'o': if (!strcasecmp(atom + 1, "ption")) return (OPTION); if (!strcasecmp(atom + 1, "ne-lease-per-client")) return (ONE_LEASE_PER_CLIENT); break; case 'p': if (!strcasecmp(atom + 1, "repend")) return (PREPEND); if (!strcasecmp(atom + 1, "acket")) return (PACKET); break; case 'r': if (!strcasecmp(atom + 1, "ange")) return (RANGE); if (!strcasecmp(atom + 1, "equest")) return (REQUEST); if (!strcasecmp(atom + 1, "equire")) return (REQUIRE); if (!strcasecmp(atom + 1, "etry")) return (RETRY); if (!strcasecmp(atom + 1, "enew")) return (RENEW); if (!strcasecmp(atom + 1, "ebind")) return (REBIND); if (!strcasecmp(atom + 1, "eboot")) return (REBOOT); if (!strcasecmp(atom + 1, "eject")) return (REJECT); break; case 's': if (!strcasecmp(atom + 1, "earch")) return (SEARCH); if (!strcasecmp(atom + 1, "tarts")) return (STARTS); if (!strcasecmp(atom + 1, "iaddr")) return (SIADDR); if (!strcasecmp(atom + 1, "ubnet")) return (SUBNET); if (!strcasecmp(atom + 1, "hared-network")) return (SHARED_NETWORK); if (!strcasecmp(atom + 1, "erver-name")) return (SERVER_NAME); if (!strcasecmp(atom + 1, "erver-identifier")) return (SERVER_IDENTIFIER); if (!strcasecmp(atom + 1, "elect-timeout")) return (SELECT_TIMEOUT); if (!strcasecmp(atom + 1, "end")) return (SEND); if (!strcasecmp(atom + 1, "cript")) return (SCRIPT); if (!strcasecmp(atom + 1, "upersede")) return (SUPERSEDE); break; case 't': if (!strcasecmp(atom + 1, "imestamp")) return (TIMESTAMP); if (!strcasecmp(atom + 1, "imeout")) return (TIMEOUT); if (!strcasecmp(atom + 1, "oken-ring")) return (TOKEN_RING); break; case 'u': if (!strncasecmp(atom + 1, "se", 2)) { if (!strcasecmp(atom + 3, "r-class")) return (USER_CLASS); if (!strcasecmp(atom + 3, "-host-decl-names")) return (USE_HOST_DECL_NAMES); if (!strcasecmp(atom + 3, "-lease-addr-for-default-route")) return (USE_LEASE_ADDR_FOR_DEFAULT_ROUTE); break; } if (!strcasecmp(atom + 1, "id")) return (UID); if (!strcasecmp(atom + 1, "nknown-clients")) return (UNKNOWN_CLIENTS); break; case 'v': if (!strcasecmp(atom + 1, "endor-class")) return (VENDOR_CLASS); + if (!strcasecmp(atom + 1, "lan-pcp")) + return (VLAN_PCP); break; case 'y': if (!strcasecmp(atom + 1, "iaddr")) return (YIADDR); break; } return (dfv); } diff --git a/sbin/dhclient/dhclient.conf.5 b/sbin/dhclient/dhclient.conf.5 index 2f28c5722574..14a0de4111dd 100644 --- a/sbin/dhclient/dhclient.conf.5 +++ b/sbin/dhclient/dhclient.conf.5 @@ -1,552 +1,559 @@ .\" $OpenBSD: dhclient.conf.5,v 1.5 2004/11/01 23:10:18 henning Exp $ .\" .\" Copyright (c) 1997 The Internet Software Consortium. .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of The Internet Software Consortium nor the names .\" of its contributors may be used to endorse or promote products derived .\" from this software without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE INTERNET SOFTWARE CONSORTIUM AND .\" CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, .\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF .\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE .\" DISCLAIMED. IN NO EVENT SHALL THE INTERNET SOFTWARE CONSORTIUM OR .\" CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, .\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT .\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF .\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND .\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, .\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" This software has been written for the Internet Software Consortium .\" by Ted Lemon in cooperation with Vixie .\" Enterprises. To learn more about the Internet Software Consortium, .\" see ``http://www.isc.org/isc''. To learn more about Vixie .\" Enterprises, see ``http://www.vix.com''. .\" .\" $FreeBSD$ .\" -.Dd May 31, 2018 +.Dd July 21, 2021 .Dt DHCLIENT.CONF 5 .Os .Sh NAME .Nm dhclient.conf .Nd DHCP client configuration file .Sh DESCRIPTION The .Nm file contains configuration information for .Xr dhclient 8 , the Internet Software Consortium DHCP Client. .Pp The .Nm file is a free-form ASCII text file. It is parsed by the recursive-descent parser built into .Xr dhclient 8 . The file may contain extra tabs and newlines for formatting purposes. Keywords in the file are case-insensitive. Comments may be placed anywhere within the file (except within quotes). Comments begin with the .Ql # character and end at the end of the line. .Pp The .Nm file can be used to configure the behaviour of the client in a wide variety of ways: protocol timing, information requested from the server, information required of the server, defaults to use if the server does not provide certain information, values with which to override information provided by the server, or values to prepend or append to information provided by the server. The configuration file can also be preinitialized with addresses to use on networks that do not have DHCP servers. .Sh PROTOCOL TIMING The timing behaviour of the client need not be configured by the user. If no timing configuration is provided by the user, a fairly reasonable timing behaviour will be used by default - one which results in fairly timely updates without placing an inordinate load on the server. .Pp The following statements can be used to adjust the timing behaviour of the DHCP client if required, however: .Bl -tag -width indent .It Ic timeout Ar time ; The .Ic timeout statement determines the amount of time that must pass between the time that the client begins to try to determine its address and the time that it decides that it is not going to be able to contact a server. By default, this timeout is sixty seconds. After the timeout has passed, if there are any static leases defined in the configuration file, or any leases remaining in the lease database that have not yet expired, the client will loop through these leases attempting to validate them, and if it finds one that appears to be valid, it will use that lease's address. If there are no valid static leases or unexpired leases in the lease database, the client will restart the protocol after the defined retry interval. .It Ic retry Ar time ; The .Ic retry statement determines the time that must pass after the client has determined that there is no DHCP server present before it tries again to contact a DHCP server. By default, this is five minutes. .It Ic select-timeout Ar time ; It is possible (some might say desirable) for there to be more than one DHCP server serving any given network. In this case, it is possible that a client may be sent more than one offer in response to its initial lease discovery message. It may be that one of these offers is preferable to the other (e.g., one offer may have the address the client previously used, and the other may not). .Pp The .Ic select-timeout is the time after the client sends its first lease discovery request at which it stops waiting for offers from servers, assuming that it has received at least one such offer. If no offers have been received by the time the .Ic select-timeout has expired, the client will accept the first offer that arrives. .Pp By default, the .Ic select-timeout is zero seconds - that is, the client will take the first offer it sees. .It Ic reboot Ar time ; When the client is restarted, it first tries to reacquire the last address it had. This is called the INIT-REBOOT state. If it is still attached to the same network it was attached to when it last ran, this is the quickest way to get started. The .Ic reboot statement sets the time that must elapse after the client first tries to reacquire its old address before it gives up and tries to discover a new address. By default, the reboot timeout is ten seconds. .It Ic backoff-cutoff Ar time ; The client uses an exponential backoff algorithm with some randomness, so that if many clients try to configure themselves at the same time, they will not make their requests in lockstep. The .Ic backoff-cutoff statement determines the maximum amount of time that the client is allowed to back off. It defaults to two minutes. .It Ic initial-interval Ar time ; The .Ic initial-interval statement sets the amount of time between the first attempt to reach a server and the second attempt to reach a server. Each time a message is sent, the interval between messages is incremented by twice the current interval multiplied by a random number between zero and one. If it is greater than the .Ic backoff-cutoff amount, it is set to that amount. It defaults to ten seconds. .El .Sh LEASE REQUIREMENTS AND REQUESTS The DHCP protocol allows the client to request that the server send it specific information, and not send it other information that it is not prepared to accept. The protocol also allows the client to reject offers from servers if they do not contain information the client needs, or if the information provided is not satisfactory. .Pp There is a variety of data contained in offers that DHCP servers send to DHCP clients. The data that can be specifically requested is what are called .Em DHCP Options . DHCP Options are defined in .Xr dhcp-options 5 . .Bl -tag -width indent .It Ic request Oo Ar option Oc Oo , Ar ... option Oc ; The .Ic request statement causes the client to request that any server responding to the client send the client its values for the specified options. Only the option names should be specified in the request statement - not option parameters. .It Ic require Oo Ar option Oc Oo , Ar ... option Oc ; The .Ic require statement lists options that must be sent in order for an offer to be accepted. Offers that do not contain all the listed options will be ignored. .It Ic send No { Oo Ar option declaration Oc Oo , Ar ... option declaration Oc } The .Ic send statement causes the client to send the specified options to the server with the specified values. These are full option declarations as described in .Xr dhcp-options 5 . Options that are always sent in the DHCP protocol should not be specified here, except that the client can specify a .Ar dhcp-lease-time option other than the default requested lease time, which is two hours. The other obvious use for this statement is to send information to the server that will allow it to differentiate between this client and other clients or kinds of clients. .El .Sh OPTION MODIFIERS In some cases, a client may receive option data from the server which is not really appropriate for that client, or may not receive information that it needs, and for which a useful default value exists. It may also receive information which is useful, but which needs to be supplemented with local information. To handle these needs, several option modifiers are available. .Bl -tag -width indent .It Xo .Ic default No { Op Ar option declaration .Oo , Ar ... option declaration Oc } .Xc If for some set of options the client should use the value supplied by the server, but needs to use some default value if no value was supplied by the server, these values can be defined in the .Ic default statement. .It Xo .Ic supersede No { Op Ar option declaration .Oo , Ar ... option declaration Oc } .Xc If for some set of options the client should always use its own value rather than any value supplied by the server, these values can be defined in the .Ic supersede statement. .Pp Some options values have special meaning: .Bl -tag -width indent .It Ar interface-mtu Any server-supplied interface MTU is ignored by the client if a .Ic supersede zero value is configured. .El .It Xo .Ic prepend No { Op Ar option declaration .Oo , Ar ... option declaration Oc } .Xc If for some set of options the client should use a value you supply, and then use the values supplied by the server, if any, these values can be defined in the .Ic prepend statement. The .Ic prepend statement can only be used for options which allow more than one value to be given. This restriction is not enforced - if violated, the results are unpredictable. .It Xo .Ic append No { Op Ar option declaration .Oo , Ar ... option declaration Oc } .Xc If for some set of options the client should first use the values supplied by the server, if any, and then use values you supply, these values can be defined in the .Ic append statement. The .Ic append statement can only be used for options which allow more than one value to be given. This restriction is not enforced - if you ignore it, the behaviour will be unpredictable. .El .Sh LEASE DECLARATIONS The lease declaration: .Pp .D1 Ic lease No { Ar lease-declaration Oo Ar ... lease-declaration Oc } .Pp The DHCP client may decide after some period of time (see .Sx PROTOCOL TIMING ) that it is not going to succeed in contacting a server. At that time, it consults its own database of old leases and tests each one that has not yet timed out by pinging the listed router for that lease to see if that lease could work. It is possible to define one or more .Em fixed leases in the client configuration file for networks where there is no DHCP or BOOTP service, so that the client can still automatically configure its address. This is done with the .Ic lease statement. .Pp NOTE: the lease statement is also used in the .Pa dhclient.leases file in order to record leases that have been received from DHCP servers. Some of the syntax for leases as described below is only needed in the .Pa dhclient.leases file. Such syntax is documented here for completeness. .Pp A lease statement consists of the .Ic lease keyword, followed by a left curly brace, followed by one or more lease declaration statements, followed by a right curly brace. The following lease declarations are possible: .Bl -tag -width indent .It Ic bootp ; The .Ic bootp statement is used to indicate that the lease was acquired using the BOOTP protocol rather than the DHCP protocol. It is never necessary to specify this in the client configuration file. The client uses this syntax in its lease database file. .It Ic interface Qq Ar string ; The .Ic interface lease statement is used to indicate the interface on which the lease is valid. If set, this lease will only be tried on a particular interface. When the client receives a lease from a server, it always records the interface number on which it received that lease. If predefined leases are specified in the .Nm file, the interface should also be specified, although this is not required. .It Ic fixed-address Ar ip-address ; The .Ic fixed-address statement is used to set the IP address of a particular lease. This is required for all lease statements. The IP address must be specified as a dotted quad (e.g., .Li 12.34.56.78 ) . .It Ic filename Qq Ar string ; The .Ic filename statement specifies the name of the boot filename to use. This is not used by the standard client configuration script, but is included for completeness. .It Ic server-name Qq Ar string ; The .Ic server-name statement specifies the name of the boot server name to use. This is also not used by the standard client configuration script. .It Ic option Ar option-declaration ; The .Ic option statement is used to specify the value of an option supplied by the server, or, in the case of predefined leases declared in .Nm , the value that the user wishes the client configuration script to use if the predefined lease is used. .It Ic script Qq Ar script-name ; The .Ic script statement is used to specify the pathname of the DHCP client configuration script. This script is used by the DHCP client to set each interface's initial configuration prior to requesting an address, to test the address once it has been offered, and to set the interface's final configuration once a lease has been acquired. If no lease is acquired, the script is used to test predefined leases, if any, and also called once if no valid lease can be identified. For more information, see .Xr dhclient.leases 5 . .It Ic medium Qq Ar "media setup" ; The .Ic medium statement can be used on systems where network interfaces cannot automatically determine the type of network to which they are connected. The .Ar "media setup" string is a system-dependent parameter which is passed to the DHCP client configuration script when initializing the interface. On .Ux and .Ux Ns -like systems, the argument is passed on the .Xr ifconfig 8 command line when configuring the interface. .Pp The DHCP client automatically declares this parameter if it used a media type (see the .Ic media statement) when configuring the interface in order to obtain a lease. This statement should be used in predefined leases only if the network interface requires media type configuration. .It Ic renew Ar date ; .It Ic rebind Ar date ; .It Ic expire Ar date ; The .Ic renew statement defines the time at which the DHCP client should begin trying to contact its server to renew a lease that it is using. The .Ic rebind statement defines the time at which the DHCP client should begin to try to contact .Em any DHCP server in order to renew its lease. The .Ic expire statement defines the time at which the DHCP client must stop using a lease if it has not been able to contact a server in order to renew it. .El .Pp These declarations are automatically set in leases acquired by the DHCP client, but must also be configured in predefined leases - a predefined lease whose expiry time has passed will not be used by the DHCP client. .Pp Dates are specified as follows: .Bd -ragged -offset indent .Ar .Sm off .Ar No / Ar No / Ar .Ar : : .Sm on .Ed .Pp The weekday is present to make it easy for a human to tell when a lease expires - it is specified as a number from zero to six, with zero being Sunday. When declaring a predefined lease, it can always be specified as zero. The year is specified with the century, so it should generally be four digits except for really long leases. The month is specified as a number starting with 1 for January. The day of the month is likewise specified starting with 1. The hour is a number between 0 and 23, the minute a number between 0 and 59, and the second also a number between 0 and 59. .Sh ALIAS DECLARATIONS .Ic alias No { Ar declarations ... No } .Pp Some DHCP clients running TCP/IP roaming protocols may require that in addition to the lease they may acquire via DHCP, their interface also be configured with a predefined IP alias so that they can have a permanent IP address even while roaming. The Internet Software Consortium DHCP client does not support roaming with fixed addresses directly, but in order to facilitate such experimentation, the DHCP client can be set up to configure an IP alias using the .Ic alias declaration. .Pp The .Ic alias declaration resembles a lease declaration, except that options other than the subnet-mask option are ignored by the standard client configuration script, and expiry times are ignored. A typical alias declaration includes an interface declaration, a fixed-address declaration for the IP alias address, and a subnet-mask option declaration. A medium statement should never be included in an alias declaration. .Sh OTHER DECLARATIONS .Bl -tag -width indent .It Ic reject Ar ip-address ; The .Ic reject statement causes the DHCP client to reject offers from servers who use the specified address as a server identifier. This can be used to avoid being configured by rogue or misconfigured DHCP servers, although it should be a last resort - better to track down the bad DHCP server and fix it. .It Ic interface Qo Ar name Qc { Ar declarations ... No } A client with more than one network interface may require different behaviour depending on which interface is being configured. All timing parameters and declarations other than lease and alias declarations can be enclosed in an interface declaration, and those parameters will then be used only for the interface that matches the specified name. Interfaces for which there is no interface declaration will use the parameters declared outside of any interface declaration, or the default settings. .It Ic media Qo Ar "media setup" Qc Oo , Qo Ar "media setup" Qc , Ar ... Oc ; The .Ic media statement defines one or more media configuration parameters which may be tried while attempting to acquire an IP address. The DHCP client will cycle through each media setup string on the list, configuring the interface using that setup and attempting to boot, and then trying the next one. This can be used for network interfaces which are not capable of sensing the media type unaided - whichever media type succeeds in getting a request to the server and hearing the reply is probably right (no guarantees). .Pp The media setup is only used for the initial phase of address acquisition (the DHCPDISCOVER and DHCPOFFER packets). Once an address has been acquired, the DHCP client will record it in its lease database and will record the media type used to acquire the address. Whenever the client tries to renew the lease, it will use that same media type. The lease must expire before the client will go back to cycling through media types. +.It Ic vlan-pcp Ar code ; +The +.Ic vlan-pcp +statement sets the PCP (Priority Code Point) value for the VLAN header. +This requires the +.Va net.link.vlan.mtag_pcp +sysctl to be set to 1. .El .Sh EXAMPLES The following configuration file is used on a laptop which has an IP alias of .Li 192.5.5.213 , and has one interface, .Li ep0 (a 3Com 3C589C). Booting intervals have been shortened somewhat from the default, because the client is known to spend most of its time on networks with little DHCP activity. The laptop does roam to multiple networks. .Bd -literal -offset indent timeout 60; retry 60; reboot 10; select-timeout 5; initial-interval 2; reject 192.33.137.209; interface "ep0" { send host-name "andare.fugue.com"; send dhcp-client-identifier 1:0:a0:24:ab:fb:9c; send dhcp-lease-time 3600; supersede domain-name "fugue.com rc.vix.com home.vix.com"; prepend domain-name-servers 127.0.0.1; request subnet-mask, broadcast-address, time-offset, routers, domain-name, domain-name-servers, host-name; require subnet-mask, domain-name-servers; script "/etc/dhclient-script"; media "media 10baseT/UTP", "media 10base2/BNC"; } alias { interface "ep0"; fixed-address 192.5.5.213; option subnet-mask 255.255.255.255; } .Ed .Pp This is a very complicated .Nm file - in general, yours should be much simpler. In many cases, it is sufficient to just create an empty .Nm file - the defaults are usually fine. .Sh SEE ALSO .Xr dhclient.leases 5 , .Xr dhcp-options 5 , .Xr dhcpd.conf 5 , .Xr dhclient 8 , .Xr dhcpd 8 .Rs .%R "RFC 2132, RFC 2131" .Re .Sh AUTHORS .An -nosplit The .Xr dhclient 8 utility was written by .An Ted Lemon Aq Mt mellon@vix.com under a contract with Vixie Labs. .Pp The current implementation was reworked by .An Henning Brauer Aq Mt henning@openbsd.org . diff --git a/sbin/dhclient/dhcpd.h b/sbin/dhclient/dhcpd.h index 240a3ae23bda..b151daa90a1c 100644 --- a/sbin/dhclient/dhcpd.h +++ b/sbin/dhclient/dhcpd.h @@ -1,451 +1,452 @@ /* $OpenBSD: dhcpd.h,v 1.33 2004/05/06 22:29:15 deraadt Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2004 Henning Brauer * Copyright (c) 1995, 1996, 1997, 1998, 1999 * The Internet Software Consortium. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of The Internet Software Consortium nor the names * of its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE INTERNET SOFTWARE CONSORTIUM AND * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE INTERNET SOFTWARE CONSORTIUM OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * This software has been written for the Internet Software Consortium * by Ted Lemon in cooperation with Vixie * Enterprises. To learn more about the Internet Software Consortium, * see ``http://www.vix.com/isc''. To learn more about Vixie * Enterprises, see ``http://www.vix.com''. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "dhcp.h" #include "tree.h" #define LOCAL_PORT 68 #define REMOTE_PORT 67 struct option_data { size_t len; u_int8_t *data; }; struct string_list { struct string_list *next; char *string; }; struct iaddr { size_t len; unsigned char iabuf[16]; }; struct iaddrlist { struct iaddrlist *next; struct iaddr addr; }; struct packet { struct dhcp_packet *raw; int packet_length; int packet_type; int options_valid; int client_port; struct iaddr client_addr; struct interface_info *interface; struct hardware *haddr; struct option_data options[256]; }; struct hardware { u_int8_t htype; u_int8_t hlen; u_int8_t haddr[16]; }; struct client_lease { struct client_lease *next; time_t expiry, renewal, rebind; struct iaddr address; struct iaddr nextserver; char *server_name; char *filename; struct string_list *medium; unsigned int is_static : 1; unsigned int is_bootp : 1; struct option_data options[256]; }; /* Possible states in which the client can be. */ enum dhcp_state { S_REBOOTING, S_INIT, S_SELECTING, S_REQUESTING, S_BOUND, S_RENEWING, S_REBINDING }; struct client_config { struct option_data defaults[256]; enum { ACTION_DEFAULT, ACTION_SUPERSEDE, ACTION_PREPEND, ACTION_APPEND } default_actions[256]; struct option_data send_options[256]; u_int8_t required_options[256]; u_int8_t requested_options[256]; int requested_option_count; + u_int vlan_pcp; time_t timeout; time_t initial_interval; time_t retry_interval; time_t select_interval; time_t reboot_timeout; time_t backoff_cutoff; struct string_list *media; char *script_name; enum { IGNORE, ACCEPT, PREFER } bootp_policy; struct string_list *medium; struct iaddrlist *reject_list; }; struct client_state { struct client_lease *active; struct client_lease *new; struct client_lease *offered_leases; struct client_lease *leases; struct client_lease *alias; enum dhcp_state state; struct iaddr destination; u_int32_t xid; u_int16_t secs; time_t first_sending; time_t interval; struct string_list *medium; struct dhcp_packet packet; int packet_length; struct iaddr requested_address; struct client_config *config; char **scriptEnv; int scriptEnvsize; struct string_list *env; int envc; }; struct interface_info { struct interface_info *next; struct hardware hw_address; struct in_addr primary_address; char name[IFNAMSIZ]; int rfdesc; int wfdesc; int ufdesc; unsigned char *rbuf; size_t rbuf_max; size_t rbuf_offset; size_t rbuf_len; struct ifreq *ifp; struct client_state *client; int noifmedia; int errors; int dead; u_int16_t index; int linkstat; }; struct timeout { struct timeout *next; time_t when; void (*func)(void *); void *what; }; struct protocol { struct protocol *next; int fd; void (*handler)(struct protocol *); void *local; }; #define DEFAULT_HASH_SIZE 97 struct hash_bucket { struct hash_bucket *next; const unsigned char *name; int len; unsigned char *value; }; struct hash_table { int hash_count; struct hash_bucket *buckets[DEFAULT_HASH_SIZE]; }; /* Default path to dhcpd config file. */ #define _PATH_DHCLIENT_CONF "/etc/dhclient.conf" #define _PATH_DHCLIENT_DB "/var/db/dhclient.leases" #define DHCPD_LOG_FACILITY LOG_DAEMON #define MAX_TIME 0x7fffffff #define MIN_TIME 0 /* External definitions... */ /* options.c */ int cons_options(struct packet *, struct dhcp_packet *, int, struct tree_cache **, int, int, int, u_int8_t *, int); const char *pretty_print_option(unsigned int, unsigned char *, int, int, int); void do_packet(struct interface_info *, struct dhcp_packet *, int, unsigned int, struct iaddr, struct hardware *); /* errwarn.c */ extern int warnings_occurred; void error(const char *, ...) __attribute__ ((__format__ (__printf__, 1, 2))) __dead2; int warning(const char *, ...) __attribute__ ((__format__ (__printf__, 1, 2))); int note(const char *, ...) __attribute__ ((__format__ (__printf__, 1, 2))); int debug(const char *, ...) __attribute__ ((__format__ (__printf__, 1, 2))); int parse_warn(const char *, ...) __attribute__ ((__format__ (__printf__, 1, 2))); /* conflex.c */ extern int lexline, lexchar; extern char *token_line; extern const char *tlname; extern char comments[4096]; extern int comment_index; extern int eol_token; void new_parse(const char *); int next_token(char **, FILE *); int peek_token(char **, FILE *); /* parse.c */ void skip_to_semi(FILE *); int parse_semi(FILE *); char *parse_string(FILE *); int parse_ip_addr(FILE *, struct iaddr *); void parse_hardware_param(FILE *, struct hardware *); void parse_lease_time(FILE *, time_t *); unsigned char *parse_numeric_aggregate(FILE *, unsigned char *, size_t *, int, unsigned, int); void convert_num(unsigned char *, char *, unsigned, int); time_t parse_date(FILE *); /* tree.c */ pair cons(caddr_t, pair); /* alloc.c */ struct string_list *new_string_list(size_t size); struct hash_table *new_hash_table(int); struct hash_bucket *new_hash_bucket(void); /* bpf.c */ int if_register_bpf(struct interface_info *, int); void if_register_send(struct interface_info *); void if_register_receive(struct interface_info *); void send_packet_unpriv(int, struct dhcp_packet *, size_t, struct in_addr, struct in_addr); struct imsg_hdr; void send_packet_priv(struct interface_info *, struct imsg_hdr *, int); ssize_t receive_packet(struct interface_info *, unsigned char *, size_t, struct sockaddr_in *, struct hardware *); /* dispatch.c */ extern void (*bootp_packet_handler)(struct interface_info *, struct dhcp_packet *, int, unsigned int, struct iaddr, struct hardware *); void discover_interfaces(struct interface_info *); void reinitialize_interfaces(void); void dispatch(void); void got_one(struct protocol *); void add_timeout(time_t, void (*)(void *), void *); void cancel_timeout(void (*)(void *), void *); void add_protocol(const char *, int, void (*)(struct protocol *), void *); void remove_protocol(struct protocol *); int interface_link_status(char *); void interface_set_mtu_unpriv(int, u_int16_t); void interface_set_mtu_priv(char *, u_int16_t); /* hash.c */ struct hash_table *new_hash(void); void add_hash(struct hash_table *, const unsigned char *, int, unsigned char *); void *hash_lookup(struct hash_table *, unsigned char *, int); /* tables.c */ extern struct option dhcp_options[256]; extern unsigned char dhcp_option_default_priority_list[]; extern int sizeof_dhcp_option_default_priority_list; extern struct hash_table universe_hash; extern struct universe dhcp_universe; void initialize_universes(void); /* convert.c */ u_int32_t getULong(unsigned char *); int32_t getLong(unsigned char *); u_int16_t getUShort(unsigned char *); int16_t getShort(unsigned char *); void putULong(unsigned char *, u_int32_t); void putLong(unsigned char *, int32_t); void putUShort(unsigned char *, unsigned int); void putShort(unsigned char *, int); /* inet.c */ struct iaddr subnet_number(struct iaddr, struct iaddr); struct iaddr broadcast_addr(struct iaddr, struct iaddr); int addr_eq(struct iaddr, struct iaddr); char *piaddr(struct iaddr); /* dhclient.c */ extern cap_channel_t *capsyslog; extern const char *path_dhclient_conf; extern char *path_dhclient_db; extern time_t cur_time; extern int log_priority; extern int log_perror; extern struct client_config top_level_config; extern struct pidfh *pidfile; extern struct interface_info *ifi; void dhcpoffer(struct packet *); void dhcpack(struct packet *); void dhcpnak(struct packet *); void send_discover(void *); void send_request(void *); void send_decline(void *); void state_reboot(void *); void state_init(void *); void state_selecting(void *); void state_requesting(void *); void state_bound(void *); void state_panic(void *); void bind_lease(struct interface_info *); void make_discover(struct interface_info *, struct client_lease *); void make_request(struct interface_info *, struct client_lease *); void make_decline(struct interface_info *, struct client_lease *); void free_client_lease(struct client_lease *); void rewrite_client_leases(void); void write_client_lease(struct interface_info *, struct client_lease *, int); void priv_script_init(const char *, char *); void priv_script_write_params(const char *, struct client_lease *); int priv_script_go(void); void script_init(const char *, struct string_list *); void script_write_params(const char *, struct client_lease *); int script_go(void); void client_envadd(struct client_state *, const char *, const char *, const char *, ...); void script_set_env(struct client_state *, const char *, const char *, const char *); void script_flush_env(struct client_state *); int dhcp_option_ev_name(char *, size_t, struct option *); struct client_lease *packet_to_lease(struct packet *); void go_daemon(void); void client_location_changed(void); void bootp(struct packet *); void dhcp(struct packet *); /* packet.c */ void assemble_hw_header(struct interface_info *, unsigned char *, int *); void assemble_udp_ip_header(unsigned char *, int *, u_int32_t, u_int32_t, unsigned int, unsigned char *, int); ssize_t decode_hw_header(unsigned char *, int, struct hardware *); ssize_t decode_udp_ip_header(unsigned char *, int, struct sockaddr_in *, unsigned char *, int); /* clparse.c */ int read_client_conf(void); void read_client_leases(void); void parse_client_statement(FILE *, struct interface_info *, struct client_config *); unsigned parse_X(FILE *, u_int8_t *, unsigned); int parse_option_list(FILE *, u_int8_t *); void parse_interface_declaration(FILE *, struct client_config *); struct interface_info *interface_or_dummy(char *); void make_client_state(struct interface_info *); void make_client_config(struct interface_info *, struct client_config *); void parse_client_lease_statement(FILE *, int); void parse_client_lease_declaration(FILE *, struct client_lease *, struct interface_info **); struct option *parse_option_decl(FILE *, struct option_data *); void parse_string_list(FILE *, struct string_list **, int); void parse_reject_statement(FILE *, struct client_config *); /* privsep.c */ struct buf *buf_open(size_t); int buf_add(struct buf *, const void *, size_t); int buf_close(int, struct buf *); ssize_t buf_read(int, void *, size_t); void dispatch_imsg(struct interface_info *, int); diff --git a/sbin/dhclient/dhctoken.h b/sbin/dhclient/dhctoken.h index 26c81aa734dc..c929307c7e06 100644 --- a/sbin/dhclient/dhctoken.h +++ b/sbin/dhclient/dhctoken.h @@ -1,140 +1,141 @@ /* $OpenBSD: dhctoken.h,v 1.2 2004/02/04 12:16:56 henning Exp $ */ /* Tokens for config file lexer and parser. */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1995, 1996, 1997, 1998, 1999 * The Internet Software Consortium. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of The Internet Software Consortium nor the names * of its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE INTERNET SOFTWARE CONSORTIUM AND * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE INTERNET SOFTWARE CONSORTIUM OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * This software has been written for the Internet Software Consortium * by Ted Lemon in cooperation with Vixie * Enterprises. To learn more about the Internet Software Consortium, * see ``http://www.vix.com/isc''. To learn more about Vixie * Enterprises, see ``http://www.vix.com''. * * $FreeBSD$ */ #define SEMI ';' #define DOT '.' #define COLON ':' #define COMMA ',' #define SLASH '/' #define LBRACE '{' #define RBRACE '}' #define FIRST_TOKEN HOST #define HOST 256 #define HARDWARE 257 #define FILENAME 258 #define FIXED_ADDR 259 #define OPTION 260 #define ETHERNET 261 #define STRING 262 #define NUMBER 263 #define NUMBER_OR_NAME 264 #define NAME 265 #define TIMESTAMP 266 #define STARTS 267 #define ENDS 268 #define UID 269 #define CLASS 270 #define LEASE 271 #define RANGE 272 #define PACKET 273 #define CIADDR 274 #define YIADDR 275 #define SIADDR 276 #define GIADDR 277 #define SUBNET 278 #define NETMASK 279 #define DEFAULT_LEASE_TIME 280 #define MAX_LEASE_TIME 281 #define VENDOR_CLASS 282 #define USER_CLASS 283 #define SHARED_NETWORK 284 #define SERVER_NAME 285 #define DYNAMIC_BOOTP 286 #define SERVER_IDENTIFIER 287 #define DYNAMIC_BOOTP_LEASE_CUTOFF 288 #define DYNAMIC_BOOTP_LEASE_LENGTH 289 #define BOOT_UNKNOWN_CLIENTS 290 #define NEXT_SERVER 291 #define TOKEN_RING 292 #define GROUP 293 #define ONE_LEASE_PER_CLIENT 294 #define GET_LEASE_HOSTNAMES 295 #define USE_HOST_DECL_NAMES 296 #define SEND 297 #define CLIENT_IDENTIFIER 298 #define REQUEST 299 #define REQUIRE 300 #define TIMEOUT 301 #define RETRY 302 #define SELECT_TIMEOUT 303 #define SCRIPT 304 #define INTERFACE 305 #define RENEW 306 #define REBIND 307 #define EXPIRE 308 #define UNKNOWN_CLIENTS 309 #define ALLOW 310 #define BOOTP 311 #define DENY 312 #define BOOTING 313 #define DEFAULT 314 #define MEDIA 315 #define MEDIUM 316 #define ALIAS 317 #define REBOOT 318 #define ABANDONED 319 #define BACKOFF_CUTOFF 320 #define INITIAL_INTERVAL 321 #define NAMESERVER 322 #define DOMAIN 323 #define SEARCH 324 #define SUPERSEDE 325 #define APPEND 326 #define PREPEND 327 #define HOSTNAME 328 #define CLIENT_HOSTNAME 329 #define REJECT 330 #define FDDI 331 #define USE_LEASE_ADDR_FOR_DEFAULT_ROUTE 332 #define AUTHORITATIVE 333 #define TOKEN_NOT 334 #define ALWAYS_REPLY_RFC1048 335 +#define VLAN_PCP 336 #define is_identifier(x) ((x) >= FIRST_TOKEN && \ (x) != STRING && \ (x) != NUMBER && \ (x) != EOF) diff --git a/share/man/man4/bpf.4 b/share/man/man4/bpf.4 index 200328891041..66f4ae72918b 100644 --- a/share/man/man4/bpf.4 +++ b/share/man/man4/bpf.4 @@ -1,1192 +1,1194 @@ .\" Copyright (c) 2007 Seccuris Inc. .\" All rights reserved. .\" .\" This software was developed by Robert N. M. Watson under contract to .\" Seccuris Inc. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" Copyright (c) 1990 The Regents of the University of California. .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that: (1) source code distributions .\" retain the above copyright notice and this paragraph in its entirety, (2) .\" distributions including binary code include the above copyright notice and .\" this paragraph in its entirety in the documentation or other materials .\" provided with the distribution, and (3) all advertising materials mentioning .\" features or use of this software display the following acknowledgement: .\" ``This product includes software developed by the University of California, .\" Lawrence Berkeley Laboratory and its contributors.'' Neither the name of .\" the University nor the names of its contributors may be used to endorse .\" or promote products derived from this software without specific prior .\" written permission. .\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED .\" WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF .\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. .\" .\" This document is derived in part from the enet man page (enet.4) .\" distributed with 4.3BSD Unix. .\" .\" $FreeBSD$ .\" -.Dd October 9, 2020 +.Dd July 22, 2021 .Dt BPF 4 .Os .Sh NAME .Nm bpf .Nd Berkeley Packet Filter .Sh SYNOPSIS .Cd device bpf .Sh DESCRIPTION The Berkeley Packet Filter provides a raw interface to data link layers in a protocol independent fashion. All packets on the network, even those destined for other hosts, are accessible through this mechanism. .Pp The packet filter appears as a character special device, .Pa /dev/bpf . After opening the device, the file descriptor must be bound to a specific network interface with the .Dv BIOCSETIF ioctl. A given interface can be shared by multiple listeners, and the filter underlying each descriptor will see an identical packet stream. .Pp Associated with each open instance of a .Nm file is a user-settable packet filter. Whenever a packet is received by an interface, all file descriptors listening on that interface apply their filter. Each descriptor that accepts the packet receives its own copy. .Pp A packet can be sent out on the network by writing to a .Nm file descriptor. The writes are unbuffered, meaning only one packet can be processed per write. Currently, only writes to Ethernets and .Tn SLIP links are supported. .Sh BUFFER MODES .Nm devices deliver packet data to the application via memory buffers provided by the application. The buffer mode is set using the .Dv BIOCSETBUFMODE ioctl, and read using the .Dv BIOCGETBUFMODE ioctl. .Ss Buffered read mode By default, .Nm devices operate in the .Dv BPF_BUFMODE_BUFFER mode, in which packet data is copied explicitly from kernel to user memory using the .Xr read 2 system call. The user process will declare a fixed buffer size that will be used both for sizing internal buffers and for all .Xr read 2 operations on the file. This size is queried using the .Dv BIOCGBLEN ioctl, and is set using the .Dv BIOCSBLEN ioctl. Note that an individual packet larger than the buffer size is necessarily truncated. .Ss Zero-copy buffer mode .Nm devices may also operate in the .Dv BPF_BUFMODE_ZEROCOPY mode, in which packet data is written directly into two user memory buffers by the kernel, avoiding both system call and copying overhead. Buffers are of fixed (and equal) size, page-aligned, and an even multiple of the page size. The maximum zero-copy buffer size is returned by the .Dv BIOCGETZMAX ioctl. Note that an individual packet larger than the buffer size is necessarily truncated. .Pp The user process registers two memory buffers using the .Dv BIOCSETZBUF ioctl, which accepts a .Vt struct bpf_zbuf pointer as an argument: .Bd -literal struct bpf_zbuf { void *bz_bufa; void *bz_bufb; size_t bz_buflen; }; .Ed .Pp .Vt bz_bufa is a pointer to the userspace address of the first buffer that will be filled, and .Vt bz_bufb is a pointer to the second buffer. .Nm will then cycle between the two buffers as they fill and are acknowledged. .Pp Each buffer begins with a fixed-length header to hold synchronization and data length information for the buffer: .Bd -literal struct bpf_zbuf_header { volatile u_int bzh_kernel_gen; /* Kernel generation number. */ volatile u_int bzh_kernel_len; /* Length of data in the buffer. */ volatile u_int bzh_user_gen; /* User generation number. */ /* ...padding for future use... */ }; .Ed .Pp The header structure of each buffer, including all padding, should be zeroed before it is configured using .Dv BIOCSETZBUF . Remaining space in the buffer will be used by the kernel to store packet data, laid out in the same format as with buffered read mode. .Pp The kernel and the user process follow a simple acknowledgement protocol via the buffer header to synchronize access to the buffer: when the header generation numbers, .Vt bzh_kernel_gen and .Vt bzh_user_gen , hold the same value, the kernel owns the buffer, and when they differ, userspace owns the buffer. .Pp While the kernel owns the buffer, the contents are unstable and may change asynchronously; while the user process owns the buffer, its contents are stable and will not be changed until the buffer has been acknowledged. .Pp Initializing the buffer headers to all 0's before registering the buffer has the effect of assigning initial ownership of both buffers to the kernel. The kernel signals that a buffer has been assigned to userspace by modifying .Vt bzh_kernel_gen , and userspace acknowledges the buffer and returns it to the kernel by setting the value of .Vt bzh_user_gen to the value of .Vt bzh_kernel_gen . .Pp In order to avoid caching and memory re-ordering effects, the user process must use atomic operations and memory barriers when checking for and acknowledging buffers: .Bd -literal #include /* * Return ownership of a buffer to the kernel for reuse. */ static void buffer_acknowledge(struct bpf_zbuf_header *bzh) { atomic_store_rel_int(&bzh->bzh_user_gen, bzh->bzh_kernel_gen); } /* * Check whether a buffer has been assigned to userspace by the kernel. * Return true if userspace owns the buffer, and false otherwise. */ static int buffer_check(struct bpf_zbuf_header *bzh) { return (bzh->bzh_user_gen != atomic_load_acq_int(&bzh->bzh_kernel_gen)); } .Ed .Pp The user process may force the assignment of the next buffer, if any data is pending, to userspace using the .Dv BIOCROTZBUF ioctl. This allows the user process to retrieve data in a partially filled buffer before the buffer is full, such as following a timeout; the process must recheck for buffer ownership using the header generation numbers, as the buffer will not be assigned to userspace if no data was present. .Pp As in the buffered read mode, .Xr kqueue 2 , .Xr poll 2 , and .Xr select 2 may be used to sleep awaiting the availability of a completed buffer. They will return a readable file descriptor when ownership of the next buffer is assigned to user space. .Pp In the current implementation, the kernel may assign zero, one, or both buffers to the user process; however, an earlier implementation maintained the invariant that at most one buffer could be assigned to the user process at a time. In order to both ensure progress and high performance, user processes should acknowledge a completely processed buffer as quickly as possible, returning it for reuse, and not block waiting on a second buffer while holding another buffer. .Sh IOCTLS The .Xr ioctl 2 command codes below are defined in .In net/bpf.h . All commands require these includes: .Bd -literal #include #include #include #include .Ed .Pp Additionally, .Dv BIOCGETIF and .Dv BIOCSETIF require .In sys/socket.h and .In net/if.h . .Pp In addition to .Dv FIONREAD the following commands may be applied to any open .Nm file. The (third) argument to .Xr ioctl 2 should be a pointer to the type indicated. .Bl -tag -width BIOCGETBUFMODE .It Dv BIOCGBLEN .Pq Li u_int Returns the required buffer length for reads on .Nm files. .It Dv BIOCSBLEN .Pq Li u_int Sets the buffer length for reads on .Nm files. The buffer must be set before the file is attached to an interface with .Dv BIOCSETIF . If the requested buffer size cannot be accommodated, the closest allowable size will be set and returned in the argument. A read call will result in .Er EINVAL if it is passed a buffer that is not this size. .It Dv BIOCGDLT .Pq Li u_int Returns the type of the data link layer underlying the attached interface. .Er EINVAL is returned if no interface has been specified. The device types, prefixed with .Dq Li DLT_ , are defined in .In net/bpf.h . .It Dv BIOCGDLTLIST .Pq Li "struct bpf_dltlist" Returns an array of the available types of the data link layer underlying the attached interface: .Bd -literal -offset indent struct bpf_dltlist { u_int bfl_len; u_int *bfl_list; }; .Ed .Pp The available types are returned in the array pointed to by the .Va bfl_list field while their length in u_int is supplied to the .Va bfl_len field. .Er ENOMEM is returned if there is not enough buffer space and .Er EFAULT is returned if a bad address is encountered. The .Va bfl_len field is modified on return to indicate the actual length in u_int of the array returned. If .Va bfl_list is .Dv NULL , the .Va bfl_len field is set to indicate the required length of an array in u_int. .It Dv BIOCSDLT .Pq Li u_int Changes the type of the data link layer underlying the attached interface. .Er EINVAL is returned if no interface has been specified or the specified type is not available for the interface. .It Dv BIOCPROMISC Forces the interface into promiscuous mode. All packets, not just those destined for the local host, are processed. Since more than one file can be listening on a given interface, a listener that opened its interface non-promiscuously may receive packets promiscuously. This problem can be remedied with an appropriate filter. .Pp The interface remains in promiscuous mode until all files listening promiscuously are closed. .It Dv BIOCFLUSH Flushes the buffer of incoming packets, and resets the statistics that are returned by BIOCGSTATS. .It Dv BIOCGETIF .Pq Li "struct ifreq" Returns the name of the hardware interface that the file is listening on. The name is returned in the ifr_name field of the .Li ifreq structure. All other fields are undefined. .It Dv BIOCSETIF .Pq Li "struct ifreq" Sets the hardware interface associated with the file. This command must be performed before any packets can be read. The device is indicated by name using the .Li ifr_name field of the .Li ifreq structure. Additionally, performs the actions of .Dv BIOCFLUSH . .It Dv BIOCSRTIMEOUT .It Dv BIOCGRTIMEOUT .Pq Li "struct timeval" Sets or gets the read timeout parameter. The argument specifies the length of time to wait before timing out on a read request. This parameter is initialized to zero by .Xr open 2 , indicating no timeout. .It Dv BIOCGSTATS .Pq Li "struct bpf_stat" Returns the following structure of packet statistics: .Bd -literal struct bpf_stat { u_int bs_recv; /* number of packets received */ u_int bs_drop; /* number of packets dropped */ }; .Ed .Pp The fields are: .Bl -hang -offset indent .It Li bs_recv the number of packets received by the descriptor since opened or reset (including any buffered since the last read call); and .It Li bs_drop the number of packets which were accepted by the filter but dropped by the kernel because of buffer overflows (i.e., the application's reads are not keeping up with the packet traffic). .El .It Dv BIOCIMMEDIATE .Pq Li u_int Enables or disables .Dq immediate mode , based on the truth value of the argument. When immediate mode is enabled, reads return immediately upon packet reception. Otherwise, a read will block until either the kernel buffer becomes full or a timeout occurs. This is useful for programs like .Xr rarpd 8 which must respond to messages in real time. The default for a new file is off. .It Dv BIOCSETF .It Dv BIOCSETFNR .Pq Li "struct bpf_program" Sets the read filter program used by the kernel to discard uninteresting packets. An array of instructions and its length is passed in using the following structure: .Bd -literal struct bpf_program { u_int bf_len; struct bpf_insn *bf_insns; }; .Ed .Pp The filter program is pointed to by the .Li bf_insns field while its length in units of .Sq Li struct bpf_insn is given by the .Li bf_len field. See section .Sx "FILTER MACHINE" for an explanation of the filter language. The only difference between .Dv BIOCSETF and .Dv BIOCSETFNR is .Dv BIOCSETF performs the actions of .Dv BIOCFLUSH while .Dv BIOCSETFNR does not. .It Dv BIOCSETWF .Pq Li "struct bpf_program" Sets the write filter program used by the kernel to control what type of packets can be written to the interface. See the .Dv BIOCSETF command for more information on the .Nm filter program. .It Dv BIOCVERSION .Pq Li "struct bpf_version" Returns the major and minor version numbers of the filter language currently recognized by the kernel. Before installing a filter, applications must check that the current version is compatible with the running kernel. Version numbers are compatible if the major numbers match and the application minor is less than or equal to the kernel minor. The kernel version number is returned in the following structure: .Bd -literal struct bpf_version { u_short bv_major; u_short bv_minor; }; .Ed .Pp The current version numbers are given by .Dv BPF_MAJOR_VERSION and .Dv BPF_MINOR_VERSION from .In net/bpf.h . An incompatible filter may result in undefined behavior (most likely, an error returned by .Fn ioctl or haphazard packet matching). .It Dv BIOCGRSIG .It Dv BIOCSRSIG .Pq Li u_int Sets or gets the receive signal. This signal will be sent to the process or process group specified by .Dv FIOSETOWN . It defaults to .Dv SIGIO . .It Dv BIOCSHDRCMPLT .It Dv BIOCGHDRCMPLT .Pq Li u_int Sets or gets the status of the .Dq header complete flag. Set to zero if the link level source address should be filled in automatically by the interface output routine. Set to one if the link level source address will be written, as provided, to the wire. This flag is initialized to zero by default. .It Dv BIOCSSEESENT .It Dv BIOCGSEESENT .Pq Li u_int These commands are obsolete but left for compatibility. Use .Dv BIOCSDIRECTION and .Dv BIOCGDIRECTION instead. Sets or gets the flag determining whether locally generated packets on the interface should be returned by BPF. Set to zero to see only incoming packets on the interface. Set to one to see packets originating locally and remotely on the interface. This flag is initialized to one by default. .It Dv BIOCSDIRECTION .It Dv BIOCGDIRECTION .Pq Li u_int Sets or gets the setting determining whether incoming, outgoing, or all packets on the interface should be returned by BPF. Set to .Dv BPF_D_IN to see only incoming packets on the interface. Set to .Dv BPF_D_INOUT to see packets originating locally and remotely on the interface. Set to .Dv BPF_D_OUT to see only outgoing packets on the interface. This setting is initialized to .Dv BPF_D_INOUT by default. .It Dv BIOCSTSTAMP .It Dv BIOCGTSTAMP .Pq Li u_int Set or get format and resolution of the time stamps returned by BPF. Set to .Dv BPF_T_MICROTIME , .Dv BPF_T_MICROTIME_FAST , .Dv BPF_T_MICROTIME_MONOTONIC , or .Dv BPF_T_MICROTIME_MONOTONIC_FAST to get time stamps in 64-bit .Vt struct timeval format. Set to .Dv BPF_T_NANOTIME , .Dv BPF_T_NANOTIME_FAST , .Dv BPF_T_NANOTIME_MONOTONIC , or .Dv BPF_T_NANOTIME_MONOTONIC_FAST to get time stamps in 64-bit .Vt struct timespec format. Set to .Dv BPF_T_BINTIME , .Dv BPF_T_BINTIME_FAST , .Dv BPF_T_NANOTIME_MONOTONIC , or .Dv BPF_T_BINTIME_MONOTONIC_FAST to get time stamps in 64-bit .Vt struct bintime format. Set to .Dv BPF_T_NONE to ignore time stamp. All 64-bit time stamp formats are wrapped in .Vt struct bpf_ts . The .Dv BPF_T_MICROTIME_FAST , .Dv BPF_T_NANOTIME_FAST , .Dv BPF_T_BINTIME_FAST , .Dv BPF_T_MICROTIME_MONOTONIC_FAST , .Dv BPF_T_NANOTIME_MONOTONIC_FAST , and .Dv BPF_T_BINTIME_MONOTONIC_FAST are analogs of corresponding formats without _FAST suffix but do not perform a full time counter query, so their accuracy is one timer tick. The .Dv BPF_T_MICROTIME_MONOTONIC , .Dv BPF_T_NANOTIME_MONOTONIC , .Dv BPF_T_BINTIME_MONOTONIC , .Dv BPF_T_MICROTIME_MONOTONIC_FAST , .Dv BPF_T_NANOTIME_MONOTONIC_FAST , and .Dv BPF_T_BINTIME_MONOTONIC_FAST store the time elapsed since kernel boot. This setting is initialized to .Dv BPF_T_MICROTIME by default. .It Dv BIOCFEEDBACK .Pq Li u_int Set packet feedback mode. This allows injected packets to be fed back as input to the interface when output via the interface is successful. When .Dv BPF_D_INOUT direction is set, injected outgoing packet is not returned by BPF to avoid duplication. This flag is initialized to zero by default. .It Dv BIOCLOCK Set the locked flag on the .Nm descriptor. This prevents the execution of ioctl commands which could change the underlying operating parameters of the device. .It Dv BIOCGETBUFMODE .It Dv BIOCSETBUFMODE .Pq Li u_int Get or set the current .Nm buffering mode; possible values are .Dv BPF_BUFMODE_BUFFER , buffered read mode, and .Dv BPF_BUFMODE_ZBUF , zero-copy buffer mode. .It Dv BIOCSETZBUF .Pq Li struct bpf_zbuf Set the current zero-copy buffer locations; buffer locations may be set only once zero-copy buffer mode has been selected, and prior to attaching to an interface. Buffers must be of identical size, page-aligned, and an integer multiple of pages in size. The three fields .Vt bz_bufa , .Vt bz_bufb , and .Vt bz_buflen must be filled out. If buffers have already been set for this device, the ioctl will fail. .It Dv BIOCGETZMAX .Pq Li size_t Get the largest individual zero-copy buffer size allowed. As two buffers are used in zero-copy buffer mode, the limit (in practice) is twice the returned size. As zero-copy buffers consume kernel address space, conservative selection of buffer size is suggested, especially when there are multiple .Nm descriptors in use on 32-bit systems. .It Dv BIOCROTZBUF Force ownership of the next buffer to be assigned to userspace, if any data present in the buffer. If no data is present, the buffer will remain owned by the kernel. This allows consumers of zero-copy buffering to implement timeouts and retrieve partially filled buffers. In order to handle the case where no data is present in the buffer and therefore ownership is not assigned, the user process must check .Vt bzh_kernel_gen against .Vt bzh_user_gen . +.It Dv BIOCSETVLANPCP +Set the VLAN PCP bits to the supplied value. .El .Sh STANDARD IOCTLS .Nm now supports several standard .Xr ioctl 2 Ns 's which allow the user to do async and/or non-blocking I/O to an open .I bpf file descriptor. .Bl -tag -width SIOCGIFADDR .It Dv FIONREAD .Pq Li int Returns the number of bytes that are immediately available for reading. .It Dv SIOCGIFADDR .Pq Li "struct ifreq" Returns the address associated with the interface. .It Dv FIONBIO .Pq Li int Sets or clears non-blocking I/O. If arg is non-zero, then doing a .Xr read 2 when no data is available will return -1 and .Va errno will be set to .Er EAGAIN . If arg is zero, non-blocking I/O is disabled. Note: setting this overrides the timeout set by .Dv BIOCSRTIMEOUT . .It Dv FIOASYNC .Pq Li int Enables or disables async I/O. When enabled (arg is non-zero), the process or process group specified by .Dv FIOSETOWN will start receiving .Dv SIGIO 's when packets arrive. Note that you must do an .Dv FIOSETOWN in order for this to take affect, as the system will not default this for you. The signal may be changed via .Dv BIOCSRSIG . .It Dv FIOSETOWN .It Dv FIOGETOWN .Pq Li int Sets or gets the process or process group (if negative) that should receive .Dv SIGIO when packets are available. The signal may be changed using .Dv BIOCSRSIG (see above). .El .Sh BPF HEADER One of the following structures is prepended to each packet returned by .Xr read 2 or via a zero-copy buffer: .Bd -literal struct bpf_xhdr { struct bpf_ts bh_tstamp; /* time stamp */ uint32_t bh_caplen; /* length of captured portion */ uint32_t bh_datalen; /* original length of packet */ u_short bh_hdrlen; /* length of bpf header (this struct plus alignment padding) */ }; struct bpf_hdr { struct timeval bh_tstamp; /* time stamp */ uint32_t bh_caplen; /* length of captured portion */ uint32_t bh_datalen; /* original length of packet */ u_short bh_hdrlen; /* length of bpf header (this struct plus alignment padding) */ }; .Ed .Pp The fields, whose values are stored in host order, and are: .Pp .Bl -tag -compact -width bh_datalen .It Li bh_tstamp The time at which the packet was processed by the packet filter. .It Li bh_caplen The length of the captured portion of the packet. This is the minimum of the truncation amount specified by the filter and the length of the packet. .It Li bh_datalen The length of the packet off the wire. This value is independent of the truncation amount specified by the filter. .It Li bh_hdrlen The length of the .Nm header, which may not be equal to .\" XXX - not really a function call .Fn sizeof "struct bpf_xhdr" or .Fn sizeof "struct bpf_hdr" . .El .Pp The .Li bh_hdrlen field exists to account for padding between the header and the link level protocol. The purpose here is to guarantee proper alignment of the packet data structures, which is required on alignment sensitive architectures and improves performance on many other architectures. The packet filter ensures that the .Vt bpf_xhdr , .Vt bpf_hdr and the network layer header will be word aligned. Currently, .Vt bpf_hdr is used when the time stamp is set to .Dv BPF_T_MICROTIME , .Dv BPF_T_MICROTIME_FAST , .Dv BPF_T_MICROTIME_MONOTONIC , .Dv BPF_T_MICROTIME_MONOTONIC_FAST , or .Dv BPF_T_NONE for backward compatibility reasons. Otherwise, .Vt bpf_xhdr is used. However, .Vt bpf_hdr may be deprecated in the near future. Suitable precautions must be taken when accessing the link layer protocol fields on alignment restricted machines. (This is not a problem on an Ethernet, since the type field is a short falling on an even offset, and the addresses are probably accessed in a bytewise fashion). .Pp Additionally, individual packets are padded so that each starts on a word boundary. This requires that an application has some knowledge of how to get from packet to packet. The macro .Dv BPF_WORDALIGN is defined in .In net/bpf.h to facilitate this process. It rounds up its argument to the nearest word aligned value (where a word is .Dv BPF_ALIGNMENT bytes wide). .Pp For example, if .Sq Li p points to the start of a packet, this expression will advance it to the next packet: .Dl p = (char *)p + BPF_WORDALIGN(p->bh_hdrlen + p->bh_caplen) .Pp For the alignment mechanisms to work properly, the buffer passed to .Xr read 2 must itself be word aligned. The .Xr malloc 3 function will always return an aligned buffer. .Sh FILTER MACHINE A filter program is an array of instructions, with all branches forwardly directed, terminated by a .Em return instruction. Each instruction performs some action on the pseudo-machine state, which consists of an accumulator, index register, scratch memory store, and implicit program counter. .Pp The following structure defines the instruction format: .Bd -literal struct bpf_insn { u_short code; u_char jt; u_char jf; bpf_u_int32 k; }; .Ed .Pp The .Li k field is used in different ways by different instructions, and the .Li jt and .Li jf fields are used as offsets by the branch instructions. The opcodes are encoded in a semi-hierarchical fashion. There are eight classes of instructions: .Dv BPF_LD , .Dv BPF_LDX , .Dv BPF_ST , .Dv BPF_STX , .Dv BPF_ALU , .Dv BPF_JMP , .Dv BPF_RET , and .Dv BPF_MISC . Various other mode and operator bits are or'd into the class to give the actual instructions. The classes and modes are defined in .In net/bpf.h . .Pp Below are the semantics for each defined .Nm instruction. We use the convention that A is the accumulator, X is the index register, P[] packet data, and M[] scratch memory store. P[i:n] gives the data at byte offset .Dq i in the packet, interpreted as a word (n=4), unsigned halfword (n=2), or unsigned byte (n=1). M[i] gives the i'th word in the scratch memory store, which is only addressed in word units. The memory store is indexed from 0 to .Dv BPF_MEMWORDS - 1. .Li k , .Li jt , and .Li jf are the corresponding fields in the instruction definition. .Dq len refers to the length of the packet. .Bl -tag -width BPF_STXx .It Dv BPF_LD These instructions copy a value into the accumulator. The type of the source operand is specified by an .Dq addressing mode and can be a constant .Pq Dv BPF_IMM , packet data at a fixed offset .Pq Dv BPF_ABS , packet data at a variable offset .Pq Dv BPF_IND , the packet length .Pq Dv BPF_LEN , or a word in the scratch memory store .Pq Dv BPF_MEM . For .Dv BPF_IND and .Dv BPF_ABS , the data size must be specified as a word .Pq Dv BPF_W , halfword .Pq Dv BPF_H , or byte .Pq Dv BPF_B . The semantics of all the recognized .Dv BPF_LD instructions follow. .Bd -literal BPF_LD+BPF_W+BPF_ABS A <- P[k:4] BPF_LD+BPF_H+BPF_ABS A <- P[k:2] BPF_LD+BPF_B+BPF_ABS A <- P[k:1] BPF_LD+BPF_W+BPF_IND A <- P[X+k:4] BPF_LD+BPF_H+BPF_IND A <- P[X+k:2] BPF_LD+BPF_B+BPF_IND A <- P[X+k:1] BPF_LD+BPF_W+BPF_LEN A <- len BPF_LD+BPF_IMM A <- k BPF_LD+BPF_MEM A <- M[k] .Ed .It Dv BPF_LDX These instructions load a value into the index register. Note that the addressing modes are more restrictive than those of the accumulator loads, but they include .Dv BPF_MSH , a hack for efficiently loading the IP header length. .Bd -literal BPF_LDX+BPF_W+BPF_IMM X <- k BPF_LDX+BPF_W+BPF_MEM X <- M[k] BPF_LDX+BPF_W+BPF_LEN X <- len BPF_LDX+BPF_B+BPF_MSH X <- 4*(P[k:1]&0xf) .Ed .It Dv BPF_ST This instruction stores the accumulator into the scratch memory. We do not need an addressing mode since there is only one possibility for the destination. .Bd -literal BPF_ST M[k] <- A .Ed .It Dv BPF_STX This instruction stores the index register in the scratch memory store. .Bd -literal BPF_STX M[k] <- X .Ed .It Dv BPF_ALU The alu instructions perform operations between the accumulator and index register or constant, and store the result back in the accumulator. For binary operations, a source mode is required .Dv ( BPF_K or .Dv BPF_X ) . .Bd -literal BPF_ALU+BPF_ADD+BPF_K A <- A + k BPF_ALU+BPF_SUB+BPF_K A <- A - k BPF_ALU+BPF_MUL+BPF_K A <- A * k BPF_ALU+BPF_DIV+BPF_K A <- A / k BPF_ALU+BPF_MOD+BPF_K A <- A % k BPF_ALU+BPF_AND+BPF_K A <- A & k BPF_ALU+BPF_OR+BPF_K A <- A | k BPF_ALU+BPF_XOR+BPF_K A <- A ^ k BPF_ALU+BPF_LSH+BPF_K A <- A << k BPF_ALU+BPF_RSH+BPF_K A <- A >> k BPF_ALU+BPF_ADD+BPF_X A <- A + X BPF_ALU+BPF_SUB+BPF_X A <- A - X BPF_ALU+BPF_MUL+BPF_X A <- A * X BPF_ALU+BPF_DIV+BPF_X A <- A / X BPF_ALU+BPF_MOD+BPF_X A <- A % X BPF_ALU+BPF_AND+BPF_X A <- A & X BPF_ALU+BPF_OR+BPF_X A <- A | X BPF_ALU+BPF_XOR+BPF_X A <- A ^ X BPF_ALU+BPF_LSH+BPF_X A <- A << X BPF_ALU+BPF_RSH+BPF_X A <- A >> X BPF_ALU+BPF_NEG A <- -A .Ed .It Dv BPF_JMP The jump instructions alter flow of control. Conditional jumps compare the accumulator against a constant .Pq Dv BPF_K or the index register .Pq Dv BPF_X . If the result is true (or non-zero), the true branch is taken, otherwise the false branch is taken. Jump offsets are encoded in 8 bits so the longest jump is 256 instructions. However, the jump always .Pq Dv BPF_JA opcode uses the 32 bit .Li k field as the offset, allowing arbitrarily distant destinations. All conditionals use unsigned comparison conventions. .Bd -literal BPF_JMP+BPF_JA pc += k BPF_JMP+BPF_JGT+BPF_K pc += (A > k) ? jt : jf BPF_JMP+BPF_JGE+BPF_K pc += (A >= k) ? jt : jf BPF_JMP+BPF_JEQ+BPF_K pc += (A == k) ? jt : jf BPF_JMP+BPF_JSET+BPF_K pc += (A & k) ? jt : jf BPF_JMP+BPF_JGT+BPF_X pc += (A > X) ? jt : jf BPF_JMP+BPF_JGE+BPF_X pc += (A >= X) ? jt : jf BPF_JMP+BPF_JEQ+BPF_X pc += (A == X) ? jt : jf BPF_JMP+BPF_JSET+BPF_X pc += (A & X) ? jt : jf .Ed .It Dv BPF_RET The return instructions terminate the filter program and specify the amount of packet to accept (i.e., they return the truncation amount). A return value of zero indicates that the packet should be ignored. The return value is either a constant .Pq Dv BPF_K or the accumulator .Pq Dv BPF_A . .Bd -literal BPF_RET+BPF_A accept A bytes BPF_RET+BPF_K accept k bytes .Ed .It Dv BPF_MISC The miscellaneous category was created for anything that does not fit into the above classes, and for any new instructions that might need to be added. Currently, these are the register transfer instructions that copy the index register to the accumulator or vice versa. .Bd -literal BPF_MISC+BPF_TAX X <- A BPF_MISC+BPF_TXA A <- X .Ed .El .Pp The .Nm interface provides the following macros to facilitate array initializers: .Fn BPF_STMT opcode operand and .Fn BPF_JUMP opcode operand true_offset false_offset . .Sh SYSCTL VARIABLES A set of .Xr sysctl 8 variables controls the behaviour of the .Nm subsystem .Bl -tag -width indent .It Va net.bpf.optimize_writers : No 0 Various programs use BPF to send (but not receive) raw packets (cdpd, lldpd, dhcpd, dhcp relays, etc. are good examples of such programs). They do not need incoming packets to be send to them. Turning this option on makes new BPF users to be attached to write-only interface list until program explicitly specifies read filter via .Fn pcap_set_filter . This removes any performance degradation for high-speed interfaces. .It Va net.bpf.stats : Binary interface for retrieving general statistics. .It Va net.bpf.zerocopy_enable : No 0 Permits zero-copy to be used with net BPF readers. Use with caution. .It Va net.bpf.maxinsns : No 512 Maximum number of instructions that BPF program can contain. Use .Xr tcpdump 1 .Fl d option to determine approximate number of instruction for any filter. .It Va net.bpf.maxbufsize : No 524288 Maximum buffer size to allocate for packets buffer. .It Va net.bpf.bufsize : No 4096 Default buffer size to allocate for packets buffer. .El .Sh EXAMPLES The following filter is taken from the Reverse ARP Daemon. It accepts only Reverse ARP requests. .Bd -literal struct bpf_insn insns[] = { BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 12), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ETHERTYPE_REVARP, 0, 3), BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 20), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, REVARP_REQUEST, 0, 1), BPF_STMT(BPF_RET+BPF_K, sizeof(struct ether_arp) + sizeof(struct ether_header)), BPF_STMT(BPF_RET+BPF_K, 0), }; .Ed .Pp This filter accepts only IP packets between host 128.3.112.15 and 128.3.112.35. .Bd -literal struct bpf_insn insns[] = { BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 12), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ETHERTYPE_IP, 0, 8), BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 26), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0x8003700f, 0, 2), BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 30), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0x80037023, 3, 4), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0x80037023, 0, 3), BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 30), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0x8003700f, 0, 1), BPF_STMT(BPF_RET+BPF_K, (u_int)-1), BPF_STMT(BPF_RET+BPF_K, 0), }; .Ed .Pp Finally, this filter returns only TCP finger packets. We must parse the IP header to reach the TCP header. The .Dv BPF_JSET instruction checks that the IP fragment offset is 0 so we are sure that we have a TCP header. .Bd -literal struct bpf_insn insns[] = { BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 12), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ETHERTYPE_IP, 0, 10), BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 23), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, 8), BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 20), BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x1fff, 6, 0), BPF_STMT(BPF_LDX+BPF_B+BPF_MSH, 14), BPF_STMT(BPF_LD+BPF_H+BPF_IND, 14), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 79, 2, 0), BPF_STMT(BPF_LD+BPF_H+BPF_IND, 16), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 79, 0, 1), BPF_STMT(BPF_RET+BPF_K, (u_int)-1), BPF_STMT(BPF_RET+BPF_K, 0), }; .Ed .Sh SEE ALSO .Xr tcpdump 1 , .Xr ioctl 2 , .Xr kqueue 2 , .Xr poll 2 , .Xr select 2 , .Xr ng_bpf 4 , .Xr bpf 9 .Rs .%A McCanne, S. .%A Jacobson V. .%T "An efficient, extensible, and portable network monitor" .Re .Sh HISTORY The Enet packet filter was created in 1980 by Mike Accetta and Rick Rashid at Carnegie-Mellon University. Jeffrey Mogul, at Stanford, ported the code to .Bx and continued its development from 1983 on. Since then, it has evolved into the Ultrix Packet Filter at .Tn DEC , a .Tn STREAMS .Tn NIT module under .Tn SunOS 4.1 , and .Tn BPF . .Sh AUTHORS .An -nosplit .An Steven McCanne , of Lawrence Berkeley Laboratory, implemented BPF in Summer 1990. Much of the design is due to .An Van Jacobson . .Pp Support for zero-copy buffers was added by .An Robert N. M. Watson under contract to Seccuris Inc. .Sh BUGS The read buffer must be of a fixed size (returned by the .Dv BIOCGBLEN ioctl). .Pp A file that does not request promiscuous mode may receive promiscuously received packets as a side effect of another file requesting this mode on the same hardware interface. This could be fixed in the kernel with additional processing overhead. However, we favor the model where all files must assume that the interface is promiscuous, and if so desired, must utilize a filter to reject foreign packets. .Pp The .Dv SEESENT , .Dv DIRECTION , and .Dv FEEDBACK settings have been observed to work incorrectly on some interface types, including those with hardware loopback rather than software loopback, and point-to-point interfaces. They appear to function correctly on a broad range of Ethernet-style interfaces. diff --git a/sys/net/bpf.c b/sys/net/bpf.c index 605e7aa39fdb..0343c8f851be 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -1,3089 +1,3132 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 2019 Andrey V. Elsukov * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)bpf.c 8.4 (Berkeley) 1/9/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_bpf.h" #include "opt_ddb.h" #include "opt_netgraph.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include +#include #include #include #include #ifdef BPF_JITTER #include #endif #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); static struct bpf_if_ext dead_bpf_if = { .bif_dlist = CK_LIST_HEAD_INITIALIZER() }; struct bpf_if { #define bif_next bif_ext.bif_next #define bif_dlist bif_ext.bif_dlist struct bpf_if_ext bif_ext; /* public members */ u_int bif_dlt; /* link layer type */ u_int bif_hdrlen; /* length of link header */ struct bpfd_list bif_wlist; /* writer-only list */ struct ifnet *bif_ifp; /* corresponding interface */ struct bpf_if **bif_bpf; /* Pointer to pointer to us */ volatile u_int bif_refcnt; struct epoch_context epoch_ctx; }; CTASSERT(offsetof(struct bpf_if, bif_ext) == 0); struct bpf_program_buffer { struct epoch_context epoch_ctx; #ifdef BPF_JITTER bpf_jit_filter *func; #endif void *buffer[0]; }; #if defined(DEV_BPF) || defined(NETGRAPH_BPF) #define PRINET 26 /* interruptible */ +#define BPF_PRIO_MAX 7 #define SIZEOF_BPF_HDR(type) \ (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen)) #ifdef COMPAT_FREEBSD32 #include #include #define BPF_ALIGNMENT32 sizeof(int32_t) #define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32) #ifndef BURN_BRIDGES /* * 32-bit version of structure prepended to each packet. We use this header * instead of the standard one for 32-bit streams. We mark the a stream as * 32-bit the first time we see a 32-bit compat ioctl request. */ struct bpf_hdr32 { struct timeval32 bh_tstamp; /* time stamp */ uint32_t bh_caplen; /* length of captured portion */ uint32_t bh_datalen; /* original length of packet */ uint16_t bh_hdrlen; /* length of bpf header (this struct plus alignment padding) */ }; #endif struct bpf_program32 { u_int bf_len; uint32_t bf_insns; }; struct bpf_dltlist32 { u_int bfl_len; u_int bfl_list; }; #define BIOCSETF32 _IOW('B', 103, struct bpf_program32) #define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32) #define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32) #define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32) #define BIOCSETWF32 _IOW('B', 123, struct bpf_program32) #define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32) #endif #define BPF_LOCK() sx_xlock(&bpf_sx) #define BPF_UNLOCK() sx_xunlock(&bpf_sx) #define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED) /* * bpf_iflist is a list of BPF interface structures, each corresponding to a * specific DLT. The same network interface might have several BPF interface * structures registered by different layers in the stack (i.e., 802.11 * frames, ethernet frames, etc). */ CK_LIST_HEAD(bpf_iflist, bpf_if); static struct bpf_iflist bpf_iflist; static struct sx bpf_sx; /* bpf global lock */ static int bpf_bpfd_cnt; static void bpfif_ref(struct bpf_if *); static void bpfif_rele(struct bpf_if *); static void bpfd_ref(struct bpf_d *); static void bpfd_rele(struct bpf_d *); static void bpf_attachd(struct bpf_d *, struct bpf_if *); static void bpf_detachd(struct bpf_d *); static void bpf_detachd_locked(struct bpf_d *, bool); static void bpfd_free(epoch_context_t); static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, struct sockaddr *, int *, struct bpf_d *); static int bpf_setif(struct bpf_d *, struct ifreq *); static void bpf_timed_out(void *); static __inline void bpf_wakeup(struct bpf_d *); static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), struct bintime *); static void reset_d(struct bpf_d *); static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); static int bpf_setdlt(struct bpf_d *, u_int); static void filt_bpfdetach(struct knote *); static int filt_bpfread(struct knote *, long); static void bpf_drvinit(void *); static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "bpf sysctl"); int bpf_maxinsns = BPF_MAXINSNS; SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, &bpf_maxinsns, 0, "Maximum bpf program instructions"); static int bpf_zerocopy_enable = 0; SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW, bpf_stats_sysctl, "bpf statistics portal"); VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0; #define V_bpf_optimize_writers VNET(bpf_optimize_writers) SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RWTUN, &VNET_NAME(bpf_optimize_writers), 0, "Do not send packets until BPF program is set"); static d_open_t bpfopen; static d_read_t bpfread; static d_write_t bpfwrite; static d_ioctl_t bpfioctl; static d_poll_t bpfpoll; static d_kqfilter_t bpfkqfilter; static struct cdevsw bpf_cdevsw = { .d_version = D_VERSION, .d_open = bpfopen, .d_read = bpfread, .d_write = bpfwrite, .d_ioctl = bpfioctl, .d_poll = bpfpoll, .d_name = "bpf", .d_kqfilter = bpfkqfilter, }; static struct filterops bpfread_filtops = { .f_isfd = 1, .f_detach = filt_bpfdetach, .f_event = filt_bpfread, }; /* * LOCKING MODEL USED BY BPF * * Locks: * 1) global lock (BPF_LOCK). Sx, used to protect some global counters, * every bpf_iflist changes, serializes ioctl access to bpf descriptors. * 2) Descriptor lock. Mutex, used to protect BPF buffers and various * structure fields used by bpf_*tap* code. * * Lock order: global lock, then descriptor lock. * * There are several possible consumers: * * 1. The kernel registers interface pointer with bpfattach(). * Each call allocates new bpf_if structure, references ifnet pointer * and links bpf_if into bpf_iflist chain. This is protected with global * lock. * * 2. An userland application uses ioctl() call to bpf_d descriptor. * All such call are serialized with global lock. BPF filters can be * changed, but pointer to old filter will be freed using NET_EPOCH_CALL(). * Thus it should be safe for bpf_tap/bpf_mtap* code to do access to * filter pointers, even if change will happen during bpf_tap execution. * Destroying of bpf_d descriptor also is doing using NET_EPOCH_CALL(). * * 3. An userland application can write packets into bpf_d descriptor. * There we need to be sure, that ifnet won't disappear during bpfwrite(). * * 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to * bif_dlist is protected with net_epoch_preempt section. So, it should * be safe to make access to bpf_d descriptor inside the section. * * 5. The kernel invokes bpfdetach() on interface destroying. All lists * are modified with global lock held and actual free() is done using * NET_EPOCH_CALL(). */ static void bpfif_free(epoch_context_t ctx) { struct bpf_if *bp; bp = __containerof(ctx, struct bpf_if, epoch_ctx); if_rele(bp->bif_ifp); free(bp, M_BPF); } static void bpfif_ref(struct bpf_if *bp) { refcount_acquire(&bp->bif_refcnt); } static void bpfif_rele(struct bpf_if *bp) { if (!refcount_release(&bp->bif_refcnt)) return; NET_EPOCH_CALL(bpfif_free, &bp->epoch_ctx); } static void bpfd_ref(struct bpf_d *d) { refcount_acquire(&d->bd_refcnt); } static void bpfd_rele(struct bpf_d *d) { if (!refcount_release(&d->bd_refcnt)) return; NET_EPOCH_CALL(bpfd_free, &d->epoch_ctx); } static struct bpf_program_buffer* bpf_program_buffer_alloc(size_t size, int flags) { return (malloc(sizeof(struct bpf_program_buffer) + size, M_BPF, flags)); } static void bpf_program_buffer_free(epoch_context_t ctx) { struct bpf_program_buffer *ptr; ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx); #ifdef BPF_JITTER if (ptr->func != NULL) bpf_destroy_jit_filter(ptr->func); #endif free(ptr, M_BPF); } /* * Wrapper functions for various buffering methods. If the set of buffer * modes expands, we will probably want to introduce a switch data structure * similar to protosw, et. */ static void bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, u_int len) { BPFD_LOCK_ASSERT(d); switch (d->bd_bufmode) { case BPF_BUFMODE_BUFFER: return (bpf_buffer_append_bytes(d, buf, offset, src, len)); case BPF_BUFMODE_ZBUF: counter_u64_add(d->bd_zcopy, 1); return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); default: panic("bpf_buf_append_bytes"); } } static void bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, u_int len) { BPFD_LOCK_ASSERT(d); switch (d->bd_bufmode) { case BPF_BUFMODE_BUFFER: return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); case BPF_BUFMODE_ZBUF: counter_u64_add(d->bd_zcopy, 1); return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); default: panic("bpf_buf_append_mbuf"); } } /* * This function gets called when the free buffer is re-assigned. */ static void bpf_buf_reclaimed(struct bpf_d *d) { BPFD_LOCK_ASSERT(d); switch (d->bd_bufmode) { case BPF_BUFMODE_BUFFER: return; case BPF_BUFMODE_ZBUF: bpf_zerocopy_buf_reclaimed(d); return; default: panic("bpf_buf_reclaimed"); } } /* * If the buffer mechanism has a way to decide that a held buffer can be made * free, then it is exposed via the bpf_canfreebuf() interface. (1) is * returned if the buffer can be discarded, (0) is returned if it cannot. */ static int bpf_canfreebuf(struct bpf_d *d) { BPFD_LOCK_ASSERT(d); switch (d->bd_bufmode) { case BPF_BUFMODE_ZBUF: return (bpf_zerocopy_canfreebuf(d)); } return (0); } /* * Allow the buffer model to indicate that the current store buffer is * immutable, regardless of the appearance of space. Return (1) if the * buffer is writable, and (0) if not. */ static int bpf_canwritebuf(struct bpf_d *d) { BPFD_LOCK_ASSERT(d); switch (d->bd_bufmode) { case BPF_BUFMODE_ZBUF: return (bpf_zerocopy_canwritebuf(d)); } return (1); } /* * Notify buffer model that an attempt to write to the store buffer has * resulted in a dropped packet, in which case the buffer may be considered * full. */ static void bpf_buffull(struct bpf_d *d) { BPFD_LOCK_ASSERT(d); switch (d->bd_bufmode) { case BPF_BUFMODE_ZBUF: bpf_zerocopy_buffull(d); break; } } /* * Notify the buffer model that a buffer has moved into the hold position. */ void bpf_bufheld(struct bpf_d *d) { BPFD_LOCK_ASSERT(d); switch (d->bd_bufmode) { case BPF_BUFMODE_ZBUF: bpf_zerocopy_bufheld(d); break; } } static void bpf_free(struct bpf_d *d) { switch (d->bd_bufmode) { case BPF_BUFMODE_BUFFER: return (bpf_buffer_free(d)); case BPF_BUFMODE_ZBUF: return (bpf_zerocopy_free(d)); default: panic("bpf_buf_free"); } } static int bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) { if (d->bd_bufmode != BPF_BUFMODE_BUFFER) return (EOPNOTSUPP); return (bpf_buffer_uiomove(d, buf, len, uio)); } static int bpf_ioctl_sblen(struct bpf_d *d, u_int *i) { if (d->bd_bufmode != BPF_BUFMODE_BUFFER) return (EOPNOTSUPP); return (bpf_buffer_ioctl_sblen(d, i)); } static int bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) { if (d->bd_bufmode != BPF_BUFMODE_ZBUF) return (EOPNOTSUPP); return (bpf_zerocopy_ioctl_getzmax(td, d, i)); } static int bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) { if (d->bd_bufmode != BPF_BUFMODE_ZBUF) return (EOPNOTSUPP); return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); } static int bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) { if (d->bd_bufmode != BPF_BUFMODE_ZBUF) return (EOPNOTSUPP); return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); } /* * General BPF functions. */ static int bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, struct sockaddr *sockp, int *hdrlen, struct bpf_d *d) { const struct ieee80211_bpf_params *p; struct ether_header *eh; struct mbuf *m; int error; int len; int hlen; int slen; /* * Build a sockaddr based on the data link layer type. * We do this at this level because the ethernet header * is copied directly into the data field of the sockaddr. * In the case of SLIP, there is no header and the packet * is forwarded as is. * Also, we are careful to leave room at the front of the mbuf * for the link level header. */ switch (linktype) { case DLT_SLIP: sockp->sa_family = AF_INET; hlen = 0; break; case DLT_EN10MB: sockp->sa_family = AF_UNSPEC; /* XXX Would MAXLINKHDR be better? */ hlen = ETHER_HDR_LEN; break; case DLT_FDDI: sockp->sa_family = AF_IMPLINK; hlen = 0; break; case DLT_RAW: sockp->sa_family = AF_UNSPEC; hlen = 0; break; case DLT_NULL: /* * null interface types require a 4 byte pseudo header which * corresponds to the address family of the packet. */ sockp->sa_family = AF_UNSPEC; hlen = 4; break; case DLT_ATM_RFC1483: /* * en atm driver requires 4-byte atm pseudo header. * though it isn't standard, vpi:vci needs to be * specified anyway. */ sockp->sa_family = AF_UNSPEC; hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ break; case DLT_PPP: sockp->sa_family = AF_UNSPEC; hlen = 4; /* This should match PPP_HDRLEN */ break; case DLT_IEEE802_11: /* IEEE 802.11 wireless */ sockp->sa_family = AF_IEEE80211; hlen = 0; break; case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */ sockp->sa_family = AF_IEEE80211; sockp->sa_len = 12; /* XXX != 0 */ hlen = sizeof(struct ieee80211_bpf_params); break; default: return (EIO); } len = uio->uio_resid; if (len < hlen || len - hlen > ifp->if_mtu) return (EMSGSIZE); m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR); if (m == NULL) return (EIO); m->m_pkthdr.len = m->m_len = len; *mp = m; error = uiomove(mtod(m, u_char *), len, uio); if (error) goto bad; slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len); if (slen == 0) { error = EPERM; goto bad; } /* Check for multicast destination */ switch (linktype) { case DLT_EN10MB: eh = mtod(m, struct ether_header *); if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN) == 0) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; } if (d->bd_hdrcmplt == 0) { memcpy(eh->ether_shost, IF_LLADDR(ifp), sizeof(eh->ether_shost)); } break; } /* * Make room for link header, and copy it to sockaddr */ if (hlen != 0) { if (sockp->sa_family == AF_IEEE80211) { /* * Collect true length from the parameter header * NB: sockp is known to be zero'd so if we do a * short copy unspecified parameters will be * zero. * NB: packet may not be aligned after stripping * bpf params * XXX check ibp_vers */ p = mtod(m, const struct ieee80211_bpf_params *); hlen = p->ibp_len; if (hlen > sizeof(sockp->sa_data)) { error = EINVAL; goto bad; } } bcopy(mtod(m, const void *), sockp->sa_data, hlen); } *hdrlen = hlen; return (0); bad: m_freem(m); return (error); } /* * Attach descriptor to the bpf interface, i.e. make d listen on bp, * then reset its buffers and counters with reset_d(). */ static void bpf_attachd(struct bpf_d *d, struct bpf_if *bp) { int op_w; BPF_LOCK_ASSERT(); /* * Save sysctl value to protect from sysctl change * between reads */ op_w = V_bpf_optimize_writers || d->bd_writer; if (d->bd_bif != NULL) bpf_detachd_locked(d, false); /* * Point d at bp, and add d to the interface's list. * Since there are many applications using BPF for * sending raw packets only (dhcpd, cdpd are good examples) * we can delay adding d to the list of active listeners until * some filter is configured. */ BPFD_LOCK(d); /* * Hold reference to bpif while descriptor uses this interface. */ bpfif_ref(bp); d->bd_bif = bp; if (op_w != 0) { /* Add to writers-only list */ CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next); /* * We decrement bd_writer on every filter set operation. * First BIOCSETF is done by pcap_open_live() to set up * snap length. After that appliation usually sets its own * filter. */ d->bd_writer = 2; } else CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); reset_d(d); BPFD_UNLOCK(d); bpf_bpfd_cnt++; CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list", __func__, d->bd_pid, d->bd_writer ? "writer" : "active"); if (op_w == 0) EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1); } /* * Check if we need to upgrade our descriptor @d from write-only mode. */ static int bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen) { int is_snap, need_upgrade; /* * Check if we've already upgraded or new filter is empty. */ if (d->bd_writer == 0 || fcode == NULL) return (0); need_upgrade = 0; /* * Check if cmd looks like snaplen setting from * pcap_bpf.c:pcap_open_live(). * Note we're not checking .k value here: * while pcap_open_live() definitely sets to non-zero value, * we'd prefer to treat k=0 (deny ALL) case the same way: e.g. * do not consider upgrading immediately */ if (cmd == BIOCSETF && flen == 1 && fcode[0].code == (BPF_RET | BPF_K)) is_snap = 1; else is_snap = 0; if (is_snap == 0) { /* * We're setting first filter and it doesn't look like * setting snaplen. We're probably using bpf directly. * Upgrade immediately. */ need_upgrade = 1; } else { /* * Do not require upgrade by first BIOCSETF * (used to set snaplen) by pcap_open_live(). */ if (--d->bd_writer == 0) { /* * First snaplen filter has already * been set. This is probably catch-all * filter */ need_upgrade = 1; } } CTR5(KTR_NET, "%s: filter function set by pid %d, " "bd_writer counter %d, snap %d upgrade %d", __func__, d->bd_pid, d->bd_writer, is_snap, need_upgrade); return (need_upgrade); } /* * Detach a file from its interface. */ static void bpf_detachd(struct bpf_d *d) { BPF_LOCK(); bpf_detachd_locked(d, false); BPF_UNLOCK(); } static void bpf_detachd_locked(struct bpf_d *d, bool detached_ifp) { struct bpf_if *bp; struct ifnet *ifp; int error; BPF_LOCK_ASSERT(); CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid); /* Check if descriptor is attached */ if ((bp = d->bd_bif) == NULL) return; BPFD_LOCK(d); /* Remove d from the interface's descriptor list. */ CK_LIST_REMOVE(d, bd_next); /* Save bd_writer value */ error = d->bd_writer; ifp = bp->bif_ifp; d->bd_bif = NULL; if (detached_ifp) { /* * Notify descriptor as it's detached, so that any * sleepers wake up and get ENXIO. */ bpf_wakeup(d); } BPFD_UNLOCK(d); bpf_bpfd_cnt--; /* Call event handler iff d is attached */ if (error == 0) EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0); /* * Check if this descriptor had requested promiscuous mode. * If so and ifnet is not detached, turn it off. */ if (d->bd_promisc && !detached_ifp) { d->bd_promisc = 0; CURVNET_SET(ifp->if_vnet); error = ifpromisc(ifp, 0); CURVNET_RESTORE(); if (error != 0 && error != ENXIO) { /* * ENXIO can happen if a pccard is unplugged * Something is really wrong if we were able to put * the driver into promiscuous mode, but can't * take it out. */ if_printf(bp->bif_ifp, "bpf_detach: ifpromisc failed (%d)\n", error); } } bpfif_rele(bp); } /* * Close the descriptor by detaching it from its interface, * deallocating its buffers, and marking it free. */ static void bpf_dtor(void *data) { struct bpf_d *d = data; BPFD_LOCK(d); if (d->bd_state == BPF_WAITING) callout_stop(&d->bd_callout); d->bd_state = BPF_IDLE; BPFD_UNLOCK(d); funsetown(&d->bd_sigio); bpf_detachd(d); #ifdef MAC mac_bpfdesc_destroy(d); #endif /* MAC */ seldrain(&d->bd_sel); knlist_destroy(&d->bd_sel.si_note); callout_drain(&d->bd_callout); bpfd_rele(d); } /* * Open ethernet device. Returns ENXIO for illegal minor device number, * EBUSY if file is open by another process. */ /* ARGSUSED */ static int bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) { struct bpf_d *d; int error; d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO); error = devfs_set_cdevpriv(d, bpf_dtor); if (error != 0) { free(d, M_BPF); return (error); } /* Setup counters */ d->bd_rcount = counter_u64_alloc(M_WAITOK); d->bd_dcount = counter_u64_alloc(M_WAITOK); d->bd_fcount = counter_u64_alloc(M_WAITOK); d->bd_wcount = counter_u64_alloc(M_WAITOK); d->bd_wfcount = counter_u64_alloc(M_WAITOK); d->bd_wdcount = counter_u64_alloc(M_WAITOK); d->bd_zcopy = counter_u64_alloc(M_WAITOK); /* * For historical reasons, perform a one-time initialization call to * the buffer routines, even though we're not yet committed to a * particular buffer method. */ bpf_buffer_init(d); if ((flags & FREAD) == 0) d->bd_writer = 2; d->bd_hbuf_in_use = 0; d->bd_bufmode = BPF_BUFMODE_BUFFER; d->bd_sig = SIGIO; d->bd_direction = BPF_D_INOUT; d->bd_refcnt = 1; BPF_PID_REFRESH(d, td); #ifdef MAC mac_bpfdesc_init(d); mac_bpfdesc_create(td->td_ucred, d); #endif mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF); callout_init_mtx(&d->bd_callout, &d->bd_lock, 0); knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock); + /* Disable VLAN pcp tagging. */ + d->bd_pcp = 0; + return (0); } /* * bpfread - read next chunk of packets from buffers */ static int bpfread(struct cdev *dev, struct uio *uio, int ioflag) { struct bpf_d *d; int error; int non_block; int timed_out; error = devfs_get_cdevpriv((void **)&d); if (error != 0) return (error); /* * Restrict application to use a buffer the same size as * as kernel buffers. */ if (uio->uio_resid != d->bd_bufsize) return (EINVAL); non_block = ((ioflag & O_NONBLOCK) != 0); BPFD_LOCK(d); BPF_PID_REFRESH_CUR(d); if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { BPFD_UNLOCK(d); return (EOPNOTSUPP); } if (d->bd_state == BPF_WAITING) callout_stop(&d->bd_callout); timed_out = (d->bd_state == BPF_TIMED_OUT); d->bd_state = BPF_IDLE; while (d->bd_hbuf_in_use) { error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET|PCATCH, "bd_hbuf", 0); if (error != 0) { BPFD_UNLOCK(d); return (error); } } /* * If the hold buffer is empty, then do a timed sleep, which * ends when the timeout expires or when enough packets * have arrived to fill the store buffer. */ while (d->bd_hbuf == NULL) { if (d->bd_slen != 0) { /* * A packet(s) either arrived since the previous * read or arrived while we were asleep. */ if (d->bd_immediate || non_block || timed_out) { /* * Rotate the buffers and return what's here * if we are in immediate mode, non-blocking * flag is set, or this descriptor timed out. */ ROTATE_BUFFERS(d); break; } } /* * No data is available, check to see if the bpf device * is still pointed at a real interface. If not, return * ENXIO so that the userland process knows to rebind * it before using it again. */ if (d->bd_bif == NULL) { BPFD_UNLOCK(d); return (ENXIO); } if (non_block) { BPFD_UNLOCK(d); return (EWOULDBLOCK); } error = msleep(d, &d->bd_lock, PRINET|PCATCH, "bpf", d->bd_rtout); if (error == EINTR || error == ERESTART) { BPFD_UNLOCK(d); return (error); } if (error == EWOULDBLOCK) { /* * On a timeout, return what's in the buffer, * which may be nothing. If there is something * in the store buffer, we can rotate the buffers. */ if (d->bd_hbuf) /* * We filled up the buffer in between * getting the timeout and arriving * here, so we don't need to rotate. */ break; if (d->bd_slen == 0) { BPFD_UNLOCK(d); return (0); } ROTATE_BUFFERS(d); break; } } /* * At this point, we know we have something in the hold slot. */ d->bd_hbuf_in_use = 1; BPFD_UNLOCK(d); /* * Move data from hold buffer into user space. * We know the entire buffer is transferred since * we checked above that the read buffer is bpf_bufsize bytes. * * We do not have to worry about simultaneous reads because * we waited for sole access to the hold buffer above. */ error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); BPFD_LOCK(d); KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf")); d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = NULL; d->bd_hlen = 0; bpf_buf_reclaimed(d); d->bd_hbuf_in_use = 0; wakeup(&d->bd_hbuf_in_use); BPFD_UNLOCK(d); return (error); } /* * If there are processes sleeping on this descriptor, wake them up. */ static __inline void bpf_wakeup(struct bpf_d *d) { BPFD_LOCK_ASSERT(d); if (d->bd_state == BPF_WAITING) { callout_stop(&d->bd_callout); d->bd_state = BPF_IDLE; } wakeup(d); if (d->bd_async && d->bd_sig && d->bd_sigio) pgsigio(&d->bd_sigio, d->bd_sig, 0); selwakeuppri(&d->bd_sel, PRINET); KNOTE_LOCKED(&d->bd_sel.si_note, 0); } static void bpf_timed_out(void *arg) { struct bpf_d *d = (struct bpf_d *)arg; BPFD_LOCK_ASSERT(d); if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout)) return; if (d->bd_state == BPF_WAITING) { d->bd_state = BPF_TIMED_OUT; if (d->bd_slen != 0) bpf_wakeup(d); } } static int bpf_ready(struct bpf_d *d) { BPFD_LOCK_ASSERT(d); if (!bpf_canfreebuf(d) && d->bd_hlen != 0) return (1); if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0) return (1); return (0); } +static int +bpf_setpcp(struct mbuf *m, u_int8_t prio) +{ + struct m_tag *mtag; + + KASSERT(prio <= BPF_PRIO_MAX, + ("%s with invalid pcp", __func__)); + + mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_OUT, NULL); + if (mtag == NULL) { + mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_OUT, + sizeof(uint8_t), M_NOWAIT); + if (mtag == NULL) + return (ENOMEM); + m_tag_prepend(m, mtag); + } + + *(uint8_t *)(mtag + 1) = prio; + return (0); +} + static int bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) { struct route ro; struct sockaddr dst; struct epoch_tracker et; struct bpf_if *bp; struct bpf_d *d; struct ifnet *ifp; struct mbuf *m, *mc; int error, hlen; error = devfs_get_cdevpriv((void **)&d); if (error != 0) return (error); NET_EPOCH_ENTER(et); BPFD_LOCK(d); BPF_PID_REFRESH_CUR(d); counter_u64_add(d->bd_wcount, 1); if ((bp = d->bd_bif) == NULL) { error = ENXIO; goto out_locked; } ifp = bp->bif_ifp; if ((ifp->if_flags & IFF_UP) == 0) { error = ENETDOWN; goto out_locked; } if (uio->uio_resid == 0) goto out_locked; bzero(&dst, sizeof(dst)); m = NULL; hlen = 0; /* * Take extra reference, unlock d and exit from epoch section, * since bpf_movein() can sleep. */ bpfd_ref(d); NET_EPOCH_EXIT(et); BPFD_UNLOCK(d); error = bpf_movein(uio, (int)bp->bif_dlt, ifp, &m, &dst, &hlen, d); if (error != 0) { counter_u64_add(d->bd_wdcount, 1); bpfd_rele(d); return (error); } BPFD_LOCK(d); /* * Check that descriptor is still attached to the interface. * This can happen on bpfdetach(). To avoid access to detached * ifnet, free mbuf and return ENXIO. */ if (d->bd_bif == NULL) { counter_u64_add(d->bd_wdcount, 1); BPFD_UNLOCK(d); bpfd_rele(d); m_freem(m); return (ENXIO); } counter_u64_add(d->bd_wfcount, 1); if (d->bd_hdrcmplt) dst.sa_family = pseudo_AF_HDRCMPLT; if (d->bd_feedback) { mc = m_dup(m, M_NOWAIT); if (mc != NULL) mc->m_pkthdr.rcvif = ifp; /* Set M_PROMISC for outgoing packets to be discarded. */ if (d->bd_direction == BPF_D_INOUT) m->m_flags |= M_PROMISC; } else mc = NULL; m->m_pkthdr.len -= hlen; m->m_len -= hlen; m->m_data += hlen; /* XXX */ CURVNET_SET(ifp->if_vnet); #ifdef MAC mac_bpfdesc_create_mbuf(d, m); if (mc != NULL) mac_bpfdesc_create_mbuf(d, mc); #endif bzero(&ro, sizeof(ro)); if (hlen != 0) { ro.ro_prepend = (u_char *)&dst.sa_data; ro.ro_plen = hlen; ro.ro_flags = RT_HAS_HEADER; } + if (d->bd_pcp != 0) + bpf_setpcp(m, d->bd_pcp); + /* Avoid possible recursion on BPFD_LOCK(). */ NET_EPOCH_ENTER(et); BPFD_UNLOCK(d); error = (*ifp->if_output)(ifp, m, &dst, &ro); if (error) counter_u64_add(d->bd_wdcount, 1); if (mc != NULL) { if (error == 0) (*ifp->if_input)(ifp, mc); else m_freem(mc); } NET_EPOCH_EXIT(et); CURVNET_RESTORE(); bpfd_rele(d); return (error); out_locked: counter_u64_add(d->bd_wdcount, 1); NET_EPOCH_EXIT(et); BPFD_UNLOCK(d); return (error); } /* * Reset a descriptor by flushing its packet buffer and clearing the receive * and drop counts. This is doable for kernel-only buffers, but with * zero-copy buffers, we can't write to (or rotate) buffers that are * currently owned by userspace. It would be nice if we could encapsulate * this logic in the buffer code rather than here. */ static void reset_d(struct bpf_d *d) { BPFD_LOCK_ASSERT(d); while (d->bd_hbuf_in_use) mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET, "bd_hbuf", 0); if ((d->bd_hbuf != NULL) && (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) { /* Free the hold buffer. */ d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = NULL; d->bd_hlen = 0; bpf_buf_reclaimed(d); } if (bpf_canwritebuf(d)) d->bd_slen = 0; counter_u64_zero(d->bd_rcount); counter_u64_zero(d->bd_dcount); counter_u64_zero(d->bd_fcount); counter_u64_zero(d->bd_wcount); counter_u64_zero(d->bd_wfcount); counter_u64_zero(d->bd_wdcount); counter_u64_zero(d->bd_zcopy); } /* * FIONREAD Check for read packet available. * BIOCGBLEN Get buffer len [for read()]. * BIOCSETF Set read filter. * BIOCSETFNR Set read filter without resetting descriptor. * BIOCSETWF Set write filter. * BIOCFLUSH Flush read packet buffer. * BIOCPROMISC Put interface into promiscuous mode. * BIOCGDLT Get link layer type. * BIOCGETIF Get interface name. * BIOCSETIF Set interface. * BIOCSRTIMEOUT Set read timeout. * BIOCGRTIMEOUT Get read timeout. * BIOCGSTATS Get packet stats. * BIOCIMMEDIATE Set immediate mode. * BIOCVERSION Get filter language version. * BIOCGHDRCMPLT Get "header already complete" flag * BIOCSHDRCMPLT Set "header already complete" flag * BIOCGDIRECTION Get packet direction flag * BIOCSDIRECTION Set packet direction flag * BIOCGTSTAMP Get time stamp format and resolution. * BIOCSTSTAMP Set time stamp format and resolution. * BIOCLOCK Set "locked" flag * BIOCFEEDBACK Set packet feedback mode. * BIOCSETZBUF Set current zero-copy buffer locations. * BIOCGETZMAX Get maximum zero-copy buffer size. * BIOCROTZBUF Force rotation of zero-copy buffer * BIOCSETBUFMODE Set buffer mode. * BIOCGETBUFMODE Get current buffer mode. + * BIOCSETVLANPCP Set VLAN PCP tag. */ /* ARGSUSED */ static int bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { struct bpf_d *d; int error; error = devfs_get_cdevpriv((void **)&d); if (error != 0) return (error); /* * Refresh PID associated with this descriptor. */ BPFD_LOCK(d); BPF_PID_REFRESH(d, td); if (d->bd_state == BPF_WAITING) callout_stop(&d->bd_callout); d->bd_state = BPF_IDLE; BPFD_UNLOCK(d); if (d->bd_locked == 1) { switch (cmd) { case BIOCGBLEN: case BIOCFLUSH: case BIOCGDLT: case BIOCGDLTLIST: #ifdef COMPAT_FREEBSD32 case BIOCGDLTLIST32: #endif case BIOCGETIF: case BIOCGRTIMEOUT: #if defined(COMPAT_FREEBSD32) && defined(__amd64__) case BIOCGRTIMEOUT32: #endif case BIOCGSTATS: case BIOCVERSION: case BIOCGRSIG: case BIOCGHDRCMPLT: case BIOCSTSTAMP: case BIOCFEEDBACK: case FIONREAD: case BIOCLOCK: case BIOCSRTIMEOUT: #if defined(COMPAT_FREEBSD32) && defined(__amd64__) case BIOCSRTIMEOUT32: #endif case BIOCIMMEDIATE: case TIOCGPGRP: case BIOCROTZBUF: break; default: return (EPERM); } } #ifdef COMPAT_FREEBSD32 /* * If we see a 32-bit compat ioctl, mark the stream as 32-bit so * that it will get 32-bit packet headers. */ switch (cmd) { case BIOCSETF32: case BIOCSETFNR32: case BIOCSETWF32: case BIOCGDLTLIST32: case BIOCGRTIMEOUT32: case BIOCSRTIMEOUT32: if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { BPFD_LOCK(d); d->bd_compat32 = 1; BPFD_UNLOCK(d); } } #endif CURVNET_SET(TD_TO_VNET(td)); switch (cmd) { default: error = EINVAL; break; /* * Check for read packet available. */ case FIONREAD: { int n; BPFD_LOCK(d); n = d->bd_slen; while (d->bd_hbuf_in_use) mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET, "bd_hbuf", 0); if (d->bd_hbuf) n += d->bd_hlen; BPFD_UNLOCK(d); *(int *)addr = n; break; } /* * Get buffer len [for read()]. */ case BIOCGBLEN: BPFD_LOCK(d); *(u_int *)addr = d->bd_bufsize; BPFD_UNLOCK(d); break; /* * Set buffer length. */ case BIOCSBLEN: error = bpf_ioctl_sblen(d, (u_int *)addr); break; /* * Set link layer read filter. */ case BIOCSETF: case BIOCSETFNR: case BIOCSETWF: #ifdef COMPAT_FREEBSD32 case BIOCSETF32: case BIOCSETFNR32: case BIOCSETWF32: #endif error = bpf_setf(d, (struct bpf_program *)addr, cmd); break; /* * Flush read packet buffer. */ case BIOCFLUSH: BPFD_LOCK(d); reset_d(d); BPFD_UNLOCK(d); break; /* * Put interface into promiscuous mode. */ case BIOCPROMISC: if (d->bd_bif == NULL) { /* * No interface attached yet. */ error = EINVAL; break; } if (d->bd_promisc == 0) { error = ifpromisc(d->bd_bif->bif_ifp, 1); if (error == 0) d->bd_promisc = 1; } break; /* * Get current data link type. */ case BIOCGDLT: BPF_LOCK(); if (d->bd_bif == NULL) error = EINVAL; else *(u_int *)addr = d->bd_bif->bif_dlt; BPF_UNLOCK(); break; /* * Get a list of supported data link types. */ #ifdef COMPAT_FREEBSD32 case BIOCGDLTLIST32: { struct bpf_dltlist32 *list32; struct bpf_dltlist dltlist; list32 = (struct bpf_dltlist32 *)addr; dltlist.bfl_len = list32->bfl_len; dltlist.bfl_list = PTRIN(list32->bfl_list); BPF_LOCK(); if (d->bd_bif == NULL) error = EINVAL; else { error = bpf_getdltlist(d, &dltlist); if (error == 0) list32->bfl_len = dltlist.bfl_len; } BPF_UNLOCK(); break; } #endif case BIOCGDLTLIST: BPF_LOCK(); if (d->bd_bif == NULL) error = EINVAL; else error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); BPF_UNLOCK(); break; /* * Set data link type. */ case BIOCSDLT: BPF_LOCK(); if (d->bd_bif == NULL) error = EINVAL; else error = bpf_setdlt(d, *(u_int *)addr); BPF_UNLOCK(); break; /* * Get interface name. */ case BIOCGETIF: BPF_LOCK(); if (d->bd_bif == NULL) error = EINVAL; else { struct ifnet *const ifp = d->bd_bif->bif_ifp; struct ifreq *const ifr = (struct ifreq *)addr; strlcpy(ifr->ifr_name, ifp->if_xname, sizeof(ifr->ifr_name)); } BPF_UNLOCK(); break; /* * Set interface. */ case BIOCSETIF: { int alloc_buf, size; /* * Behavior here depends on the buffering model. If * we're using kernel memory buffers, then we can * allocate them here. If we're using zero-copy, * then the user process must have registered buffers * by the time we get here. */ alloc_buf = 0; BPFD_LOCK(d); if (d->bd_bufmode == BPF_BUFMODE_BUFFER && d->bd_sbuf == NULL) alloc_buf = 1; BPFD_UNLOCK(d); if (alloc_buf) { size = d->bd_bufsize; error = bpf_buffer_ioctl_sblen(d, &size); if (error != 0) break; } BPF_LOCK(); error = bpf_setif(d, (struct ifreq *)addr); BPF_UNLOCK(); break; } /* * Set read timeout. */ case BIOCSRTIMEOUT: #if defined(COMPAT_FREEBSD32) && defined(__amd64__) case BIOCSRTIMEOUT32: #endif { struct timeval *tv = (struct timeval *)addr; #if defined(COMPAT_FREEBSD32) && !defined(__mips__) struct timeval32 *tv32; struct timeval tv64; if (cmd == BIOCSRTIMEOUT32) { tv32 = (struct timeval32 *)addr; tv = &tv64; tv->tv_sec = tv32->tv_sec; tv->tv_usec = tv32->tv_usec; } else #endif tv = (struct timeval *)addr; /* * Subtract 1 tick from tvtohz() since this isn't * a one-shot timer. */ if ((error = itimerfix(tv)) == 0) d->bd_rtout = tvtohz(tv) - 1; break; } /* * Get read timeout. */ case BIOCGRTIMEOUT: #if defined(COMPAT_FREEBSD32) && defined(__amd64__) case BIOCGRTIMEOUT32: #endif { struct timeval *tv; #if defined(COMPAT_FREEBSD32) && defined(__amd64__) struct timeval32 *tv32; struct timeval tv64; if (cmd == BIOCGRTIMEOUT32) tv = &tv64; else #endif tv = (struct timeval *)addr; tv->tv_sec = d->bd_rtout / hz; tv->tv_usec = (d->bd_rtout % hz) * tick; #if defined(COMPAT_FREEBSD32) && defined(__amd64__) if (cmd == BIOCGRTIMEOUT32) { tv32 = (struct timeval32 *)addr; tv32->tv_sec = tv->tv_sec; tv32->tv_usec = tv->tv_usec; } #endif break; } /* * Get packet stats. */ case BIOCGSTATS: { struct bpf_stat *bs = (struct bpf_stat *)addr; /* XXXCSJP overflow */ bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount); bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount); break; } /* * Set immediate mode. */ case BIOCIMMEDIATE: BPFD_LOCK(d); d->bd_immediate = *(u_int *)addr; BPFD_UNLOCK(d); break; case BIOCVERSION: { struct bpf_version *bv = (struct bpf_version *)addr; bv->bv_major = BPF_MAJOR_VERSION; bv->bv_minor = BPF_MINOR_VERSION; break; } /* * Get "header already complete" flag */ case BIOCGHDRCMPLT: BPFD_LOCK(d); *(u_int *)addr = d->bd_hdrcmplt; BPFD_UNLOCK(d); break; /* * Set "header already complete" flag */ case BIOCSHDRCMPLT: BPFD_LOCK(d); d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; BPFD_UNLOCK(d); break; /* * Get packet direction flag */ case BIOCGDIRECTION: BPFD_LOCK(d); *(u_int *)addr = d->bd_direction; BPFD_UNLOCK(d); break; /* * Set packet direction flag */ case BIOCSDIRECTION: { u_int direction; direction = *(u_int *)addr; switch (direction) { case BPF_D_IN: case BPF_D_INOUT: case BPF_D_OUT: BPFD_LOCK(d); d->bd_direction = direction; BPFD_UNLOCK(d); break; default: error = EINVAL; } } break; /* * Get packet timestamp format and resolution. */ case BIOCGTSTAMP: BPFD_LOCK(d); *(u_int *)addr = d->bd_tstamp; BPFD_UNLOCK(d); break; /* * Set packet timestamp format and resolution. */ case BIOCSTSTAMP: { u_int func; func = *(u_int *)addr; if (BPF_T_VALID(func)) d->bd_tstamp = func; else error = EINVAL; } break; case BIOCFEEDBACK: BPFD_LOCK(d); d->bd_feedback = *(u_int *)addr; BPFD_UNLOCK(d); break; case BIOCLOCK: BPFD_LOCK(d); d->bd_locked = 1; BPFD_UNLOCK(d); break; case FIONBIO: /* Non-blocking I/O */ break; case FIOASYNC: /* Send signal on receive packets */ BPFD_LOCK(d); d->bd_async = *(int *)addr; BPFD_UNLOCK(d); break; case FIOSETOWN: /* * XXX: Add some sort of locking here? * fsetown() can sleep. */ error = fsetown(*(int *)addr, &d->bd_sigio); break; case FIOGETOWN: BPFD_LOCK(d); *(int *)addr = fgetown(&d->bd_sigio); BPFD_UNLOCK(d); break; /* This is deprecated, FIOSETOWN should be used instead. */ case TIOCSPGRP: error = fsetown(-(*(int *)addr), &d->bd_sigio); break; /* This is deprecated, FIOGETOWN should be used instead. */ case TIOCGPGRP: *(int *)addr = -fgetown(&d->bd_sigio); break; case BIOCSRSIG: /* Set receive signal */ { u_int sig; sig = *(u_int *)addr; if (sig >= NSIG) error = EINVAL; else { BPFD_LOCK(d); d->bd_sig = sig; BPFD_UNLOCK(d); } break; } case BIOCGRSIG: BPFD_LOCK(d); *(u_int *)addr = d->bd_sig; BPFD_UNLOCK(d); break; case BIOCGETBUFMODE: BPFD_LOCK(d); *(u_int *)addr = d->bd_bufmode; BPFD_UNLOCK(d); break; case BIOCSETBUFMODE: /* * Allow the buffering mode to be changed as long as we * haven't yet committed to a particular mode. Our * definition of commitment, for now, is whether or not a * buffer has been allocated or an interface attached, since * that's the point where things get tricky. */ switch (*(u_int *)addr) { case BPF_BUFMODE_BUFFER: break; case BPF_BUFMODE_ZBUF: if (bpf_zerocopy_enable) break; /* FALLSTHROUGH */ default: CURVNET_RESTORE(); return (EINVAL); } BPFD_LOCK(d); if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || d->bd_fbuf != NULL || d->bd_bif != NULL) { BPFD_UNLOCK(d); CURVNET_RESTORE(); return (EBUSY); } d->bd_bufmode = *(u_int *)addr; BPFD_UNLOCK(d); break; case BIOCGETZMAX: error = bpf_ioctl_getzmax(td, d, (size_t *)addr); break; case BIOCSETZBUF: error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr); break; case BIOCROTZBUF: error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr); break; + + case BIOCSETVLANPCP: + { + u_int pcp; + + pcp = *(u_int *)addr; + if (pcp > BPF_PRIO_MAX || pcp < 0) { + error = EINVAL; + break; + } + d->bd_pcp = pcp; + break; + } } CURVNET_RESTORE(); return (error); } /* * Set d's packet filter program to fp. If this file already has a filter, * free it and replace it. Returns EINVAL for bogus requests. * * Note we use global lock here to serialize bpf_setf() and bpf_setif() * calls. */ static int bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) { #ifdef COMPAT_FREEBSD32 struct bpf_program fp_swab; struct bpf_program32 *fp32; #endif struct bpf_program_buffer *fcode; struct bpf_insn *filter; #ifdef BPF_JITTER bpf_jit_filter *jfunc; #endif size_t size; u_int flen; bool track_event; #ifdef COMPAT_FREEBSD32 switch (cmd) { case BIOCSETF32: case BIOCSETWF32: case BIOCSETFNR32: fp32 = (struct bpf_program32 *)fp; fp_swab.bf_len = fp32->bf_len; fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns; fp = &fp_swab; switch (cmd) { case BIOCSETF32: cmd = BIOCSETF; break; case BIOCSETWF32: cmd = BIOCSETWF; break; } break; } #endif filter = NULL; #ifdef BPF_JITTER jfunc = NULL; #endif /* * Check new filter validness before acquiring any locks. * Allocate memory for new filter, if needed. */ flen = fp->bf_len; if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0)) return (EINVAL); size = flen * sizeof(*fp->bf_insns); if (size > 0) { /* We're setting up new filter. Copy and check actual data. */ fcode = bpf_program_buffer_alloc(size, M_WAITOK); filter = (struct bpf_insn *)fcode->buffer; if (copyin(fp->bf_insns, filter, size) != 0 || !bpf_validate(filter, flen)) { free(fcode, M_BPF); return (EINVAL); } #ifdef BPF_JITTER if (cmd != BIOCSETWF) { /* * Filter is copied inside fcode and is * perfectly valid. */ jfunc = bpf_jitter(filter, flen); } #endif } track_event = false; fcode = NULL; BPF_LOCK(); BPFD_LOCK(d); /* Set up new filter. */ if (cmd == BIOCSETWF) { if (d->bd_wfilter != NULL) { fcode = __containerof((void *)d->bd_wfilter, struct bpf_program_buffer, buffer); #ifdef BPF_JITTER fcode->func = NULL; #endif } d->bd_wfilter = filter; } else { if (d->bd_rfilter != NULL) { fcode = __containerof((void *)d->bd_rfilter, struct bpf_program_buffer, buffer); #ifdef BPF_JITTER fcode->func = d->bd_bfilter; #endif } d->bd_rfilter = filter; #ifdef BPF_JITTER d->bd_bfilter = jfunc; #endif if (cmd == BIOCSETF) reset_d(d); if (bpf_check_upgrade(cmd, d, filter, flen) != 0) { /* * Filter can be set several times without * specifying interface. In this case just mark d * as reader. */ d->bd_writer = 0; if (d->bd_bif != NULL) { /* * Remove descriptor from writers-only list * and add it to active readers list. */ CK_LIST_REMOVE(d, bd_next); CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist, d, bd_next); CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid); track_event = true; } } } BPFD_UNLOCK(d); if (fcode != NULL) NET_EPOCH_CALL(bpf_program_buffer_free, &fcode->epoch_ctx); if (track_event) EVENTHANDLER_INVOKE(bpf_track, d->bd_bif->bif_ifp, d->bd_bif->bif_dlt, 1); BPF_UNLOCK(); return (0); } /* * Detach a file from its current interface (if attached at all) and attach * to the interface indicated by the name stored in ifr. * Return an errno or 0. */ static int bpf_setif(struct bpf_d *d, struct ifreq *ifr) { struct bpf_if *bp; struct ifnet *theywant; BPF_LOCK_ASSERT(); theywant = ifunit(ifr->ifr_name); if (theywant == NULL || theywant->if_bpf == NULL) return (ENXIO); bp = theywant->if_bpf; /* * At this point, we expect the buffer is already allocated. If not, * return an error. */ switch (d->bd_bufmode) { case BPF_BUFMODE_BUFFER: case BPF_BUFMODE_ZBUF: if (d->bd_sbuf == NULL) return (EINVAL); break; default: panic("bpf_setif: bufmode %d", d->bd_bufmode); } if (bp != d->bd_bif) bpf_attachd(d, bp); else { BPFD_LOCK(d); reset_d(d); BPFD_UNLOCK(d); } return (0); } /* * Support for select() and poll() system calls * * Return true iff the specific operation will not block indefinitely. * Otherwise, return false but make a note that a selwakeup() must be done. */ static int bpfpoll(struct cdev *dev, int events, struct thread *td) { struct bpf_d *d; int revents; if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL) return (events & (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM)); /* * Refresh PID associated with this descriptor. */ revents = events & (POLLOUT | POLLWRNORM); BPFD_LOCK(d); BPF_PID_REFRESH(d, td); if (events & (POLLIN | POLLRDNORM)) { if (bpf_ready(d)) revents |= events & (POLLIN | POLLRDNORM); else { selrecord(td, &d->bd_sel); /* Start the read timeout if necessary. */ if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { callout_reset(&d->bd_callout, d->bd_rtout, bpf_timed_out, d); d->bd_state = BPF_WAITING; } } } BPFD_UNLOCK(d); return (revents); } /* * Support for kevent() system call. Register EVFILT_READ filters and * reject all others. */ int bpfkqfilter(struct cdev *dev, struct knote *kn) { struct bpf_d *d; if (devfs_get_cdevpriv((void **)&d) != 0 || kn->kn_filter != EVFILT_READ) return (1); /* * Refresh PID associated with this descriptor. */ BPFD_LOCK(d); BPF_PID_REFRESH_CUR(d); kn->kn_fop = &bpfread_filtops; kn->kn_hook = d; knlist_add(&d->bd_sel.si_note, kn, 1); BPFD_UNLOCK(d); return (0); } static void filt_bpfdetach(struct knote *kn) { struct bpf_d *d = (struct bpf_d *)kn->kn_hook; knlist_remove(&d->bd_sel.si_note, kn, 0); } static int filt_bpfread(struct knote *kn, long hint) { struct bpf_d *d = (struct bpf_d *)kn->kn_hook; int ready; BPFD_LOCK_ASSERT(d); ready = bpf_ready(d); if (ready) { kn->kn_data = d->bd_slen; /* * Ignore the hold buffer if it is being copied to user space. */ if (!d->bd_hbuf_in_use && d->bd_hbuf) kn->kn_data += d->bd_hlen; } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { callout_reset(&d->bd_callout, d->bd_rtout, bpf_timed_out, d); d->bd_state = BPF_WAITING; } return (ready); } #define BPF_TSTAMP_NONE 0 #define BPF_TSTAMP_FAST 1 #define BPF_TSTAMP_NORMAL 2 #define BPF_TSTAMP_EXTERN 3 static int bpf_ts_quality(int tstype) { if (tstype == BPF_T_NONE) return (BPF_TSTAMP_NONE); if ((tstype & BPF_T_FAST) != 0) return (BPF_TSTAMP_FAST); return (BPF_TSTAMP_NORMAL); } static int bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m) { struct m_tag *tag; int quality; quality = bpf_ts_quality(tstype); if (quality == BPF_TSTAMP_NONE) return (quality); if (m != NULL) { tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL); if (tag != NULL) { *bt = *(struct bintime *)(tag + 1); return (BPF_TSTAMP_EXTERN); } } if (quality == BPF_TSTAMP_NORMAL) binuptime(bt); else getbinuptime(bt); return (quality); } /* * Incoming linkage from device drivers. Process the packet pkt, of length * pktlen, which is stored in a contiguous buffer. The packet is parsed * by each process' filter, and if accepted, stashed into the corresponding * buffer. */ void bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) { struct epoch_tracker et; struct bintime bt; struct bpf_d *d; #ifdef BPF_JITTER bpf_jit_filter *bf; #endif u_int slen; int gottime; gottime = BPF_TSTAMP_NONE; NET_EPOCH_ENTER(et); CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { counter_u64_add(d->bd_rcount, 1); /* * NB: We dont call BPF_CHECK_DIRECTION() here since there * is no way for the caller to indiciate to us whether this * packet is inbound or outbound. In the bpf_mtap() routines, * we use the interface pointers on the mbuf to figure it out. */ #ifdef BPF_JITTER bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; if (bf != NULL) slen = (*(bf->func))(pkt, pktlen, pktlen); else #endif slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); if (slen != 0) { /* * Filter matches. Let's to acquire write lock. */ BPFD_LOCK(d); counter_u64_add(d->bd_fcount, 1); if (gottime < bpf_ts_quality(d->bd_tstamp)) gottime = bpf_gettime(&bt, d->bd_tstamp, NULL); #ifdef MAC if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) #endif catchpacket(d, pkt, pktlen, slen, bpf_append_bytes, &bt); BPFD_UNLOCK(d); } } NET_EPOCH_EXIT(et); } #define BPF_CHECK_DIRECTION(d, r, i) \ (((d)->bd_direction == BPF_D_IN && (r) != (i)) || \ ((d)->bd_direction == BPF_D_OUT && (r) == (i))) /* * Incoming linkage from device drivers, when packet is in an mbuf chain. * Locking model is explained in bpf_tap(). */ void bpf_mtap(struct bpf_if *bp, struct mbuf *m) { struct epoch_tracker et; struct bintime bt; struct bpf_d *d; #ifdef BPF_JITTER bpf_jit_filter *bf; #endif u_int pktlen, slen; int gottime; /* Skip outgoing duplicate packets. */ if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) { m->m_flags &= ~M_PROMISC; return; } pktlen = m_length(m, NULL); gottime = BPF_TSTAMP_NONE; NET_EPOCH_ENTER(et); CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp)) continue; counter_u64_add(d->bd_rcount, 1); #ifdef BPF_JITTER bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; /* XXX We cannot handle multiple mbufs. */ if (bf != NULL && m->m_next == NULL) slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen); else #endif slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); if (slen != 0) { BPFD_LOCK(d); counter_u64_add(d->bd_fcount, 1); if (gottime < bpf_ts_quality(d->bd_tstamp)) gottime = bpf_gettime(&bt, d->bd_tstamp, m); #ifdef MAC if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) #endif catchpacket(d, (u_char *)m, pktlen, slen, bpf_append_mbuf, &bt); BPFD_UNLOCK(d); } } NET_EPOCH_EXIT(et); } /* * Incoming linkage from device drivers, when packet is in * an mbuf chain and to be prepended by a contiguous header. */ void bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) { struct epoch_tracker et; struct bintime bt; struct mbuf mb; struct bpf_d *d; u_int pktlen, slen; int gottime; /* Skip outgoing duplicate packets. */ if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { m->m_flags &= ~M_PROMISC; return; } pktlen = m_length(m, NULL); /* * Craft on-stack mbuf suitable for passing to bpf_filter. * Note that we cut corners here; we only setup what's * absolutely needed--this mbuf should never go anywhere else. */ mb.m_flags = 0; mb.m_next = m; mb.m_data = data; mb.m_len = dlen; pktlen += dlen; gottime = BPF_TSTAMP_NONE; NET_EPOCH_ENTER(et); CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) continue; counter_u64_add(d->bd_rcount, 1); slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); if (slen != 0) { BPFD_LOCK(d); counter_u64_add(d->bd_fcount, 1); if (gottime < bpf_ts_quality(d->bd_tstamp)) gottime = bpf_gettime(&bt, d->bd_tstamp, m); #ifdef MAC if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) #endif catchpacket(d, (u_char *)&mb, pktlen, slen, bpf_append_mbuf, &bt); BPFD_UNLOCK(d); } } NET_EPOCH_EXIT(et); } #undef BPF_CHECK_DIRECTION #undef BPF_TSTAMP_NONE #undef BPF_TSTAMP_FAST #undef BPF_TSTAMP_NORMAL #undef BPF_TSTAMP_EXTERN static int bpf_hdrlen(struct bpf_d *d) { int hdrlen; hdrlen = d->bd_bif->bif_hdrlen; #ifndef BURN_BRIDGES if (d->bd_tstamp == BPF_T_NONE || BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME) #ifdef COMPAT_FREEBSD32 if (d->bd_compat32) hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32); else #endif hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr); else #endif hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr); #ifdef COMPAT_FREEBSD32 if (d->bd_compat32) hdrlen = BPF_WORDALIGN32(hdrlen); else #endif hdrlen = BPF_WORDALIGN(hdrlen); return (hdrlen - d->bd_bif->bif_hdrlen); } static void bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype) { struct bintime bt2, boottimebin; struct timeval tsm; struct timespec tsn; if ((tstype & BPF_T_MONOTONIC) == 0) { bt2 = *bt; getboottimebin(&boottimebin); bintime_add(&bt2, &boottimebin); bt = &bt2; } switch (BPF_T_FORMAT(tstype)) { case BPF_T_MICROTIME: bintime2timeval(bt, &tsm); ts->bt_sec = tsm.tv_sec; ts->bt_frac = tsm.tv_usec; break; case BPF_T_NANOTIME: bintime2timespec(bt, &tsn); ts->bt_sec = tsn.tv_sec; ts->bt_frac = tsn.tv_nsec; break; case BPF_T_BINTIME: ts->bt_sec = bt->sec; ts->bt_frac = bt->frac; break; } } /* * Move the packet data from interface memory (pkt) into the * store buffer. "cpfn" is the routine called to do the actual data * transfer. bcopy is passed in to copy contiguous chunks, while * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, * pkt is really an mbuf. */ static void catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), struct bintime *bt) { struct bpf_xhdr hdr; #ifndef BURN_BRIDGES struct bpf_hdr hdr_old; #ifdef COMPAT_FREEBSD32 struct bpf_hdr32 hdr32_old; #endif #endif int caplen, curlen, hdrlen, totlen; int do_wakeup = 0; int do_timestamp; int tstype; BPFD_LOCK_ASSERT(d); if (d->bd_bif == NULL) { /* Descriptor was detached in concurrent thread */ counter_u64_add(d->bd_dcount, 1); return; } /* * Detect whether user space has released a buffer back to us, and if * so, move it from being a hold buffer to a free buffer. This may * not be the best place to do it (for example, we might only want to * run this check if we need the space), but for now it's a reliable * spot to do it. */ if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = NULL; d->bd_hlen = 0; bpf_buf_reclaimed(d); } /* * Figure out how many bytes to move. If the packet is * greater or equal to the snapshot length, transfer that * much. Otherwise, transfer the whole packet (unless * we hit the buffer size limit). */ hdrlen = bpf_hdrlen(d); totlen = hdrlen + min(snaplen, pktlen); if (totlen > d->bd_bufsize) totlen = d->bd_bufsize; /* * Round up the end of the previous packet to the next longword. * * Drop the packet if there's no room and no hope of room * If the packet would overflow the storage buffer or the storage * buffer is considered immutable by the buffer model, try to rotate * the buffer and wakeup pending processes. */ #ifdef COMPAT_FREEBSD32 if (d->bd_compat32) curlen = BPF_WORDALIGN32(d->bd_slen); else #endif curlen = BPF_WORDALIGN(d->bd_slen); if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { if (d->bd_fbuf == NULL) { /* * There's no room in the store buffer, and no * prospect of room, so drop the packet. Notify the * buffer model. */ bpf_buffull(d); counter_u64_add(d->bd_dcount, 1); return; } KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use")); ROTATE_BUFFERS(d); do_wakeup = 1; curlen = 0; } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) /* * Immediate mode is set, or the read timeout has already * expired during a select call. A packet arrived, so the * reader should be woken up. */ do_wakeup = 1; caplen = totlen - hdrlen; tstype = d->bd_tstamp; do_timestamp = tstype != BPF_T_NONE; #ifndef BURN_BRIDGES if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) { struct bpf_ts ts; if (do_timestamp) bpf_bintime2ts(bt, &ts, tstype); #ifdef COMPAT_FREEBSD32 if (d->bd_compat32) { bzero(&hdr32_old, sizeof(hdr32_old)); if (do_timestamp) { hdr32_old.bh_tstamp.tv_sec = ts.bt_sec; hdr32_old.bh_tstamp.tv_usec = ts.bt_frac; } hdr32_old.bh_datalen = pktlen; hdr32_old.bh_hdrlen = hdrlen; hdr32_old.bh_caplen = caplen; bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old, sizeof(hdr32_old)); goto copy; } #endif bzero(&hdr_old, sizeof(hdr_old)); if (do_timestamp) { hdr_old.bh_tstamp.tv_sec = ts.bt_sec; hdr_old.bh_tstamp.tv_usec = ts.bt_frac; } hdr_old.bh_datalen = pktlen; hdr_old.bh_hdrlen = hdrlen; hdr_old.bh_caplen = caplen; bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old, sizeof(hdr_old)); goto copy; } #endif /* * Append the bpf header. Note we append the actual header size, but * move forward the length of the header plus padding. */ bzero(&hdr, sizeof(hdr)); if (do_timestamp) bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype); hdr.bh_datalen = pktlen; hdr.bh_hdrlen = hdrlen; hdr.bh_caplen = caplen; bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); /* * Copy the packet data into the store buffer and update its length. */ #ifndef BURN_BRIDGES copy: #endif (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen); d->bd_slen = curlen + totlen; if (do_wakeup) bpf_wakeup(d); } /* * Free buffers currently in use by a descriptor. * Called on close. */ static void bpfd_free(epoch_context_t ctx) { struct bpf_d *d; struct bpf_program_buffer *p; /* * We don't need to lock out interrupts since this descriptor has * been detached from its interface and it yet hasn't been marked * free. */ d = __containerof(ctx, struct bpf_d, epoch_ctx); bpf_free(d); if (d->bd_rfilter != NULL) { p = __containerof((void *)d->bd_rfilter, struct bpf_program_buffer, buffer); #ifdef BPF_JITTER p->func = d->bd_bfilter; #endif bpf_program_buffer_free(&p->epoch_ctx); } if (d->bd_wfilter != NULL) { p = __containerof((void *)d->bd_wfilter, struct bpf_program_buffer, buffer); #ifdef BPF_JITTER p->func = NULL; #endif bpf_program_buffer_free(&p->epoch_ctx); } mtx_destroy(&d->bd_lock); counter_u64_free(d->bd_rcount); counter_u64_free(d->bd_dcount); counter_u64_free(d->bd_fcount); counter_u64_free(d->bd_wcount); counter_u64_free(d->bd_wfcount); counter_u64_free(d->bd_wdcount); counter_u64_free(d->bd_zcopy); free(d, M_BPF); } /* * Attach an interface to bpf. dlt is the link layer type; hdrlen is the * fixed size of the link header (variable length headers not yet supported). */ void bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) { bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); } /* * Attach an interface to bpf. ifp is a pointer to the structure * defining the interface to be attached, dlt is the link layer type, * and hdrlen is the fixed size of the link header (variable length * headers are not yet supporrted). */ void bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) { struct bpf_if *bp; KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized")); bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO); CK_LIST_INIT(&bp->bif_dlist); CK_LIST_INIT(&bp->bif_wlist); bp->bif_ifp = ifp; bp->bif_dlt = dlt; bp->bif_hdrlen = hdrlen; bp->bif_bpf = driverp; bp->bif_refcnt = 1; *driverp = bp; /* * Reference ifnet pointer, so it won't freed until * we release it. */ if_ref(ifp); BPF_LOCK(); CK_LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); BPF_UNLOCK(); if (bootverbose && IS_DEFAULT_VNET(curvnet)) if_printf(ifp, "bpf attached\n"); } #ifdef VIMAGE /* * When moving interfaces between vnet instances we need a way to * query the dlt and hdrlen before detach so we can re-attch the if_bpf * after the vmove. We unfortunately have no device driver infrastructure * to query the interface for these values after creation/attach, thus * add this as a workaround. */ int bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen) { if (bp == NULL) return (ENXIO); if (bif_dlt == NULL && bif_hdrlen == NULL) return (0); if (bif_dlt != NULL) *bif_dlt = bp->bif_dlt; if (bif_hdrlen != NULL) *bif_hdrlen = bp->bif_hdrlen; return (0); } #endif /* * Detach bpf from an interface. This involves detaching each descriptor * associated with the interface. Notify each descriptor as it's detached * so that any sleepers wake up and get ENXIO. */ void bpfdetach(struct ifnet *ifp) { struct bpf_if *bp, *bp_temp; struct bpf_d *d; BPF_LOCK(); /* Find all bpf_if struct's which reference ifp and detach them. */ CK_LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) { if (ifp != bp->bif_ifp) continue; CK_LIST_REMOVE(bp, bif_next); *bp->bif_bpf = (struct bpf_if *)&dead_bpf_if; CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p", __func__, bp->bif_dlt, bp, ifp); /* Detach common descriptors */ while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) { bpf_detachd_locked(d, true); } /* Detach writer-only descriptors */ while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) { bpf_detachd_locked(d, true); } bpfif_rele(bp); } BPF_UNLOCK(); } /* * Get a list of available data link type of the interface. */ static int bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) { struct ifnet *ifp; struct bpf_if *bp; u_int *lst; int error, n, n1; BPF_LOCK_ASSERT(); ifp = d->bd_bif->bif_ifp; n1 = 0; CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { if (bp->bif_ifp == ifp) n1++; } if (bfl->bfl_list == NULL) { bfl->bfl_len = n1; return (0); } if (n1 > bfl->bfl_len) return (ENOMEM); lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK); n = 0; CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { if (bp->bif_ifp != ifp) continue; lst[n++] = bp->bif_dlt; } error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n); free(lst, M_TEMP); bfl->bfl_len = n; return (error); } /* * Set the data link type of a BPF instance. */ static int bpf_setdlt(struct bpf_d *d, u_int dlt) { int error, opromisc; struct ifnet *ifp; struct bpf_if *bp; BPF_LOCK_ASSERT(); MPASS(d->bd_bif != NULL); /* * It is safe to check bd_bif without BPFD_LOCK, it can not be * changed while we hold global lock. */ if (d->bd_bif->bif_dlt == dlt) return (0); ifp = d->bd_bif->bif_ifp; CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) break; } if (bp == NULL) return (EINVAL); opromisc = d->bd_promisc; bpf_attachd(d, bp); if (opromisc) { error = ifpromisc(bp->bif_ifp, 1); if (error) if_printf(bp->bif_ifp, "%s: ifpromisc failed (%d)\n", __func__, error); else d->bd_promisc = 1; } return (0); } static void bpf_drvinit(void *unused) { struct cdev *dev; sx_init(&bpf_sx, "bpf global lock"); CK_LIST_INIT(&bpf_iflist); dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf"); /* For compatibility */ make_dev_alias(dev, "bpf0"); } /* * Zero out the various packet counters associated with all of the bpf * descriptors. At some point, we will probably want to get a bit more * granular and allow the user to specify descriptors to be zeroed. */ static void bpf_zero_counters(void) { struct bpf_if *bp; struct bpf_d *bd; BPF_LOCK(); /* * We are protected by global lock here, interfaces and * descriptors can not be deleted while we hold it. */ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { counter_u64_zero(bd->bd_rcount); counter_u64_zero(bd->bd_dcount); counter_u64_zero(bd->bd_fcount); counter_u64_zero(bd->bd_wcount); counter_u64_zero(bd->bd_wfcount); counter_u64_zero(bd->bd_zcopy); } } BPF_UNLOCK(); } /* * Fill filter statistics */ static void bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) { BPF_LOCK_ASSERT(); bzero(d, sizeof(*d)); d->bd_structsize = sizeof(*d); d->bd_immediate = bd->bd_immediate; d->bd_promisc = bd->bd_promisc; d->bd_hdrcmplt = bd->bd_hdrcmplt; d->bd_direction = bd->bd_direction; d->bd_feedback = bd->bd_feedback; d->bd_async = bd->bd_async; d->bd_rcount = counter_u64_fetch(bd->bd_rcount); d->bd_dcount = counter_u64_fetch(bd->bd_dcount); d->bd_fcount = counter_u64_fetch(bd->bd_fcount); d->bd_sig = bd->bd_sig; d->bd_slen = bd->bd_slen; d->bd_hlen = bd->bd_hlen; d->bd_bufsize = bd->bd_bufsize; d->bd_pid = bd->bd_pid; strlcpy(d->bd_ifname, bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ); d->bd_locked = bd->bd_locked; d->bd_wcount = counter_u64_fetch(bd->bd_wcount); d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount); d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount); d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy); d->bd_bufmode = bd->bd_bufmode; } /* * Handle `netstat -B' stats request */ static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) { static const struct xbpf_d zerostats; struct xbpf_d *xbdbuf, *xbd, tempstats; int index, error; struct bpf_if *bp; struct bpf_d *bd; /* * XXX This is not technically correct. It is possible for non * privileged users to open bpf devices. It would make sense * if the users who opened the devices were able to retrieve * the statistics for them, too. */ error = priv_check(req->td, PRIV_NET_BPF); if (error) return (error); /* * Check to see if the user is requesting that the counters be * zeroed out. Explicitly check that the supplied data is zeroed, * as we aren't allowing the user to set the counters currently. */ if (req->newptr != NULL) { if (req->newlen != sizeof(tempstats)) return (EINVAL); memset(&tempstats, 0, sizeof(tempstats)); error = SYSCTL_IN(req, &tempstats, sizeof(tempstats)); if (error) return (error); if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0) return (EINVAL); bpf_zero_counters(); return (0); } if (req->oldptr == NULL) return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd))); if (bpf_bpfd_cnt == 0) return (SYSCTL_OUT(req, 0, 0)); xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); BPF_LOCK(); if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { BPF_UNLOCK(); free(xbdbuf, M_BPF); return (ENOMEM); } index = 0; CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { /* Send writers-only first */ CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) { xbd = &xbdbuf[index++]; bpfstats_fill_xbpf(xbd, bd); } CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { xbd = &xbdbuf[index++]; bpfstats_fill_xbpf(xbd, bd); } } BPF_UNLOCK(); error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); free(xbdbuf, M_BPF); return (error); } SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL); #else /* !DEV_BPF && !NETGRAPH_BPF */ /* * NOP stubs to allow bpf-using drivers to load and function. * * A 'better' implementation would allow the core bpf functionality * to be loaded at runtime. */ void bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) { } void bpf_mtap(struct bpf_if *bp, struct mbuf *m) { } void bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) { } void bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) { bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); } void bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) { *driverp = (struct bpf_if *)&dead_bpf_if; } void bpfdetach(struct ifnet *ifp) { } u_int bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) { return -1; /* "no filter" behaviour */ } int bpf_validate(const struct bpf_insn *f, int len) { return 0; /* false */ } #endif /* !DEV_BPF && !NETGRAPH_BPF */ #ifdef DDB static void bpf_show_bpf_if(struct bpf_if *bpf_if) { if (bpf_if == NULL) return; db_printf("%p:\n", bpf_if); #define BPF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, bpf_if->e); /* bif_ext.bif_next */ /* bif_ext.bif_dlist */ BPF_DB_PRINTF("%#x", bif_dlt); BPF_DB_PRINTF("%u", bif_hdrlen); /* bif_wlist */ BPF_DB_PRINTF("%p", bif_ifp); BPF_DB_PRINTF("%p", bif_bpf); BPF_DB_PRINTF("%u", bif_refcnt); } DB_SHOW_COMMAND(bpf_if, db_show_bpf_if) { if (!have_addr) { db_printf("usage: show bpf_if \n"); return; } bpf_show_bpf_if((struct bpf_if *)addr); } #endif diff --git a/sys/net/bpf.h b/sys/net/bpf.h index 54bbfd23bba2..b7cd8036856c 100644 --- a/sys/net/bpf.h +++ b/sys/net/bpf.h @@ -1,475 +1,476 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)bpf.h 8.1 (Berkeley) 6/10/93 * @(#)bpf.h 1.34 (LBL) 6/16/96 * * $FreeBSD$ */ #ifndef _NET_BPF_H_ #define _NET_BPF_H_ #include #include #include /* BSD style release date */ #define BPF_RELEASE 199606 typedef int32_t bpf_int32; typedef u_int32_t bpf_u_int32; typedef int64_t bpf_int64; typedef u_int64_t bpf_u_int64; /* * Alignment macros. BPF_WORDALIGN rounds up to the next * even multiple of BPF_ALIGNMENT. */ #define BPF_ALIGNMENT sizeof(long) #define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1)) #define BPF_MAXINSNS 512 #define BPF_MAXBUFSIZE 0x80000 #define BPF_MINBUFSIZE 32 /* * Structure for BIOCSETF. */ struct bpf_program { u_int bf_len; struct bpf_insn *bf_insns; }; /* * Struct returned by BIOCGSTATS. */ struct bpf_stat { u_int bs_recv; /* number of packets received */ u_int bs_drop; /* number of packets dropped */ }; /* * Struct return by BIOCVERSION. This represents the version number of * the filter language described by the instruction encodings below. * bpf understands a program iff kernel_major == filter_major && * kernel_minor >= filter_minor, that is, if the value returned by the * running kernel has the same major number and a minor number equal * equal to or less than the filter being downloaded. Otherwise, the * results are undefined, meaning an error may be returned or packets * may be accepted haphazardly. * It has nothing to do with the source code version. */ struct bpf_version { u_short bv_major; u_short bv_minor; }; /* Current version number of filter architecture. */ #define BPF_MAJOR_VERSION 1 #define BPF_MINOR_VERSION 1 /* * Historically, BPF has supported a single buffering model, first using mbuf * clusters in kernel, and later using malloc(9) buffers in kernel. We now * support multiple buffering modes, which may be queried and set using * BIOCGETBUFMODE and BIOCSETBUFMODE. So as to avoid handling the complexity * of changing modes while sniffing packets, the mode becomes fixed once an * interface has been attached to the BPF descriptor. */ #define BPF_BUFMODE_BUFFER 1 /* Kernel buffers with read(). */ #define BPF_BUFMODE_ZBUF 2 /* Zero-copy buffers. */ /*- * Struct used by BIOCSETZBUF, BIOCROTZBUF: describes up to two zero-copy * buffer as used by BPF. */ struct bpf_zbuf { void *bz_bufa; /* Location of 'a' zero-copy buffer. */ void *bz_bufb; /* Location of 'b' zero-copy buffer. */ size_t bz_buflen; /* Size of zero-copy buffers. */ }; #define BIOCGBLEN _IOR('B', 102, u_int) #define BIOCSBLEN _IOWR('B', 102, u_int) #define BIOCSETF _IOW('B', 103, struct bpf_program) #define BIOCFLUSH _IO('B', 104) #define BIOCPROMISC _IO('B', 105) #define BIOCGDLT _IOR('B', 106, u_int) #define BIOCGETIF _IOR('B', 107, struct ifreq) #define BIOCSETIF _IOW('B', 108, struct ifreq) #define BIOCSRTIMEOUT _IOW('B', 109, struct timeval) #define BIOCGRTIMEOUT _IOR('B', 110, struct timeval) #define BIOCGSTATS _IOR('B', 111, struct bpf_stat) #define BIOCIMMEDIATE _IOW('B', 112, u_int) #define BIOCVERSION _IOR('B', 113, struct bpf_version) #define BIOCGRSIG _IOR('B', 114, u_int) #define BIOCSRSIG _IOW('B', 115, u_int) #define BIOCGHDRCMPLT _IOR('B', 116, u_int) #define BIOCSHDRCMPLT _IOW('B', 117, u_int) #define BIOCGDIRECTION _IOR('B', 118, u_int) #define BIOCSDIRECTION _IOW('B', 119, u_int) #define BIOCSDLT _IOW('B', 120, u_int) #define BIOCGDLTLIST _IOWR('B', 121, struct bpf_dltlist) #define BIOCLOCK _IO('B', 122) #define BIOCSETWF _IOW('B', 123, struct bpf_program) #define BIOCFEEDBACK _IOW('B', 124, u_int) #define BIOCGETBUFMODE _IOR('B', 125, u_int) #define BIOCSETBUFMODE _IOW('B', 126, u_int) #define BIOCGETZMAX _IOR('B', 127, size_t) #define BIOCROTZBUF _IOR('B', 128, struct bpf_zbuf) #define BIOCSETZBUF _IOW('B', 129, struct bpf_zbuf) #define BIOCSETFNR _IOW('B', 130, struct bpf_program) #define BIOCGTSTAMP _IOR('B', 131, u_int) #define BIOCSTSTAMP _IOW('B', 132, u_int) +#define BIOCSETVLANPCP _IOW('B', 133, u_int) /* Obsolete */ #define BIOCGSEESENT BIOCGDIRECTION #define BIOCSSEESENT BIOCSDIRECTION /* Packet directions */ enum bpf_direction { BPF_D_IN, /* See incoming packets */ BPF_D_INOUT, /* See incoming and outgoing packets */ BPF_D_OUT /* See outgoing packets */ }; /* Time stamping functions */ #define BPF_T_MICROTIME 0x0000 #define BPF_T_NANOTIME 0x0001 #define BPF_T_BINTIME 0x0002 #define BPF_T_NONE 0x0003 #define BPF_T_FORMAT_MASK 0x0003 #define BPF_T_NORMAL 0x0000 #define BPF_T_FAST 0x0100 #define BPF_T_MONOTONIC 0x0200 #define BPF_T_MONOTONIC_FAST (BPF_T_FAST | BPF_T_MONOTONIC) #define BPF_T_FLAG_MASK 0x0300 #define BPF_T_FORMAT(t) ((t) & BPF_T_FORMAT_MASK) #define BPF_T_FLAG(t) ((t) & BPF_T_FLAG_MASK) #define BPF_T_VALID(t) \ ((t) == BPF_T_NONE || (BPF_T_FORMAT(t) != BPF_T_NONE && \ ((t) & ~(BPF_T_FORMAT_MASK | BPF_T_FLAG_MASK)) == 0)) #define BPF_T_MICROTIME_FAST (BPF_T_MICROTIME | BPF_T_FAST) #define BPF_T_NANOTIME_FAST (BPF_T_NANOTIME | BPF_T_FAST) #define BPF_T_BINTIME_FAST (BPF_T_BINTIME | BPF_T_FAST) #define BPF_T_MICROTIME_MONOTONIC (BPF_T_MICROTIME | BPF_T_MONOTONIC) #define BPF_T_NANOTIME_MONOTONIC (BPF_T_NANOTIME | BPF_T_MONOTONIC) #define BPF_T_BINTIME_MONOTONIC (BPF_T_BINTIME | BPF_T_MONOTONIC) #define BPF_T_MICROTIME_MONOTONIC_FAST (BPF_T_MICROTIME | BPF_T_MONOTONIC_FAST) #define BPF_T_NANOTIME_MONOTONIC_FAST (BPF_T_NANOTIME | BPF_T_MONOTONIC_FAST) #define BPF_T_BINTIME_MONOTONIC_FAST (BPF_T_BINTIME | BPF_T_MONOTONIC_FAST) /* * Structure prepended to each packet. */ struct bpf_ts { bpf_int64 bt_sec; /* seconds */ bpf_u_int64 bt_frac; /* fraction */ }; struct bpf_xhdr { struct bpf_ts bh_tstamp; /* time stamp */ bpf_u_int32 bh_caplen; /* length of captured portion */ bpf_u_int32 bh_datalen; /* original length of packet */ u_short bh_hdrlen; /* length of bpf header (this struct plus alignment padding) */ }; /* Obsolete */ struct bpf_hdr { struct timeval bh_tstamp; /* time stamp */ bpf_u_int32 bh_caplen; /* length of captured portion */ bpf_u_int32 bh_datalen; /* original length of packet */ u_short bh_hdrlen; /* length of bpf header (this struct plus alignment padding) */ }; #ifdef _KERNEL #define MTAG_BPF 0x627066 #define MTAG_BPF_TIMESTAMP 0 #endif /* * When using zero-copy BPF buffers, a shared memory header is present * allowing the kernel BPF implementation and user process to synchronize * without using system calls. This structure defines that header. When * accessing these fields, appropriate atomic operation and memory barriers * are required in order not to see stale or out-of-order data; see bpf(4) * for reference code to access these fields from userspace. * * The layout of this structure is critical, and must not be changed; if must * fit in a single page on all architectures. */ struct bpf_zbuf_header { volatile u_int bzh_kernel_gen; /* Kernel generation number. */ volatile u_int bzh_kernel_len; /* Length of data in the buffer. */ volatile u_int bzh_user_gen; /* User generation number. */ u_int _bzh_pad[5]; }; /* * The instruction encodings. * * Please inform tcpdump-workers@lists.tcpdump.org if you use any * of the reserved values, so that we can note that they're used * (and perhaps implement it in the reference BPF implementation * and encourage its implementation elsewhere). */ /* * The upper 8 bits of the opcode aren't used. BSD/OS used 0x8000. */ /* instruction classes */ #define BPF_CLASS(code) ((code) & 0x07) #define BPF_LD 0x00 #define BPF_LDX 0x01 #define BPF_ST 0x02 #define BPF_STX 0x03 #define BPF_ALU 0x04 #define BPF_JMP 0x05 #define BPF_RET 0x06 #define BPF_MISC 0x07 /* ld/ldx fields */ #define BPF_SIZE(code) ((code) & 0x18) #define BPF_W 0x00 #define BPF_H 0x08 #define BPF_B 0x10 /* 0x18 reserved; used by BSD/OS */ #define BPF_MODE(code) ((code) & 0xe0) #define BPF_IMM 0x00 #define BPF_ABS 0x20 #define BPF_IND 0x40 #define BPF_MEM 0x60 #define BPF_LEN 0x80 #define BPF_MSH 0xa0 /* 0xc0 reserved; used by BSD/OS */ /* 0xe0 reserved; used by BSD/OS */ /* alu/jmp fields */ #define BPF_OP(code) ((code) & 0xf0) #define BPF_ADD 0x00 #define BPF_SUB 0x10 #define BPF_MUL 0x20 #define BPF_DIV 0x30 #define BPF_OR 0x40 #define BPF_AND 0x50 #define BPF_LSH 0x60 #define BPF_RSH 0x70 #define BPF_NEG 0x80 #define BPF_MOD 0x90 #define BPF_XOR 0xa0 /* 0xb0 reserved */ /* 0xc0 reserved */ /* 0xd0 reserved */ /* 0xe0 reserved */ /* 0xf0 reserved */ #define BPF_JA 0x00 #define BPF_JEQ 0x10 #define BPF_JGT 0x20 #define BPF_JGE 0x30 #define BPF_JSET 0x40 /* 0x50 reserved; used on BSD/OS */ /* 0x60 reserved */ /* 0x70 reserved */ /* 0x80 reserved */ /* 0x90 reserved */ /* 0xa0 reserved */ /* 0xb0 reserved */ /* 0xc0 reserved */ /* 0xd0 reserved */ /* 0xe0 reserved */ /* 0xf0 reserved */ #define BPF_SRC(code) ((code) & 0x08) #define BPF_K 0x00 #define BPF_X 0x08 /* ret - BPF_K and BPF_X also apply */ #define BPF_RVAL(code) ((code) & 0x18) #define BPF_A 0x10 /* 0x18 reserved */ /* misc */ #define BPF_MISCOP(code) ((code) & 0xf8) #define BPF_TAX 0x00 /* 0x08 reserved */ /* 0x10 reserved */ /* 0x18 reserved */ /* #define BPF_COP 0x20 NetBSD "coprocessor" extensions */ /* 0x28 reserved */ /* 0x30 reserved */ /* 0x38 reserved */ /* #define BPF_COPX 0x40 NetBSD "coprocessor" extensions */ /* also used on BSD/OS */ /* 0x48 reserved */ /* 0x50 reserved */ /* 0x58 reserved */ /* 0x60 reserved */ /* 0x68 reserved */ /* 0x70 reserved */ /* 0x78 reserved */ #define BPF_TXA 0x80 /* 0x88 reserved */ /* 0x90 reserved */ /* 0x98 reserved */ /* 0xa0 reserved */ /* 0xa8 reserved */ /* 0xb0 reserved */ /* 0xb8 reserved */ /* 0xc0 reserved; used on BSD/OS */ /* 0xc8 reserved */ /* 0xd0 reserved */ /* 0xd8 reserved */ /* 0xe0 reserved */ /* 0xe8 reserved */ /* 0xf0 reserved */ /* 0xf8 reserved */ /* * The instruction data structure. */ struct bpf_insn { u_short code; u_char jt; u_char jf; bpf_u_int32 k; }; /* * Macros for insn array initializers. */ #define BPF_STMT(code, k) { (u_short)(code), 0, 0, k } #define BPF_JUMP(code, k, jt, jf) { (u_short)(code), jt, jf, k } /* * Structure to retrieve available DLTs for the interface. */ struct bpf_dltlist { u_int bfl_len; /* number of bfd_list array */ u_int *bfl_list; /* array of DLTs */ }; #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_BPF); #endif #ifdef SYSCTL_DECL SYSCTL_DECL(_net_bpf); #endif /* * Rotate the packet buffers in descriptor d. Move the store buffer into the * hold slot, and the free buffer into the store slot. Zero the length of the * new store buffer. Descriptor lock should be held. One must be careful to * not rotate the buffers twice, i.e. if fbuf != NULL. */ #define ROTATE_BUFFERS(d) do { \ (d)->bd_hbuf = (d)->bd_sbuf; \ (d)->bd_hlen = (d)->bd_slen; \ (d)->bd_sbuf = (d)->bd_fbuf; \ (d)->bd_slen = 0; \ (d)->bd_fbuf = NULL; \ bpf_bufheld(d); \ } while (0) /* * Descriptor associated with each attached hardware interface. * Part of this structure is exposed to external callers to speed up * bpf_peers_present() calls. */ struct bpf_if; CK_LIST_HEAD(bpfd_list, bpf_d); struct bpf_if_ext { CK_LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */ struct bpfd_list bif_dlist; /* descriptor list */ }; void bpf_bufheld(struct bpf_d *d); int bpf_validate(const struct bpf_insn *, int); void bpf_tap(struct bpf_if *, u_char *, u_int); void bpf_mtap(struct bpf_if *, struct mbuf *); void bpf_mtap2(struct bpf_if *, void *, u_int, struct mbuf *); void bpfattach(struct ifnet *, u_int, u_int); void bpfattach2(struct ifnet *, u_int, u_int, struct bpf_if **); void bpfdetach(struct ifnet *); #ifdef VIMAGE int bpf_get_bp_params(struct bpf_if *, u_int *, u_int *); #endif void bpfilterattach(int); u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int); static __inline int bpf_peers_present(struct bpf_if *bpf) { struct bpf_if_ext *ext; ext = (struct bpf_if_ext *)bpf; if (!CK_LIST_EMPTY(&ext->bif_dlist)) return (1); return (0); } #define BPF_TAP(_ifp,_pkt,_pktlen) do { \ if (bpf_peers_present((_ifp)->if_bpf)) \ bpf_tap((_ifp)->if_bpf, (_pkt), (_pktlen)); \ } while (0) #define BPF_MTAP(_ifp,_m) do { \ if (bpf_peers_present((_ifp)->if_bpf)) { \ M_ASSERTVALID(_m); \ bpf_mtap((_ifp)->if_bpf, (_m)); \ } \ } while (0) #define BPF_MTAP2(_ifp,_data,_dlen,_m) do { \ if (bpf_peers_present((_ifp)->if_bpf)) { \ M_ASSERTVALID(_m); \ bpf_mtap2((_ifp)->if_bpf,(_data),(_dlen),(_m)); \ } \ } while (0) #endif /* * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST). */ #define BPF_MEMWORDS 16 /* BPF attach/detach events */ struct ifnet; typedef void (*bpf_track_fn)(void *, struct ifnet *, int /* dlt */, int /* 1 =>'s attach */); EVENTHANDLER_DECLARE(bpf_track, bpf_track_fn); #endif /* _NET_BPF_H_ */ diff --git a/sys/net/bpfdesc.h b/sys/net/bpfdesc.h index be978280311d..52cc5f130eb3 100644 --- a/sys/net/bpfdesc.h +++ b/sys/net/bpfdesc.h @@ -1,161 +1,162 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)bpfdesc.h 8.1 (Berkeley) 6/10/93 * * $FreeBSD$ */ #ifndef _NET_BPFDESC_H_ #define _NET_BPFDESC_H_ #include #include #include #include #include #include #include /* * Descriptor associated with each open bpf file. */ struct zbuf; struct bpf_d { CK_LIST_ENTRY(bpf_d) bd_next; /* Linked list of descriptors */ /* * Buffer slots: two memory buffers store the incoming packets. * The model has three slots. Sbuf is always occupied. * sbuf (store) - Receive interrupt puts packets here. * hbuf (hold) - When sbuf is full, put buffer here and * wakeup read (replace sbuf with fbuf). * fbuf (free) - When read is done, put buffer here. * On receiving, if sbuf is full and fbuf is 0, packet is dropped. */ caddr_t bd_sbuf; /* store slot */ caddr_t bd_hbuf; /* hold slot */ caddr_t bd_fbuf; /* free slot */ int bd_hbuf_in_use; /* don't rotate buffers */ int bd_slen; /* current length of store buffer */ int bd_hlen; /* current length of hold buffer */ int bd_bufsize; /* absolute length of buffers */ struct bpf_if * bd_bif; /* interface descriptor */ u_long bd_rtout; /* Read timeout in 'ticks' */ struct bpf_insn *bd_rfilter; /* read filter code */ struct bpf_insn *bd_wfilter; /* write filter code */ void *bd_bfilter; /* binary filter code */ counter_u64_t bd_rcount; /* number of packets received */ counter_u64_t bd_dcount; /* number of packets dropped */ u_char bd_promisc; /* true if listening promiscuously */ u_char bd_state; /* idle, waiting, or timed out */ u_char bd_immediate; /* true to return on packet arrival */ u_char bd_writer; /* non-zero if d is writer-only */ int bd_hdrcmplt; /* false to fill in src lladdr automatically */ int bd_direction; /* select packet direction */ int bd_tstamp; /* select time stamping function */ int bd_feedback; /* true to feed back sent packets */ int bd_async; /* non-zero if packet reception should generate signal */ int bd_sig; /* signal to send upon packet reception */ + int bd_pcp; /* VLAN pcp tag */ struct sigio * bd_sigio; /* information for async I/O */ struct selinfo bd_sel; /* bsd select info */ struct mtx bd_lock; /* per-descriptor lock */ struct callout bd_callout; /* for BPF timeouts with select */ struct label *bd_label; /* MAC label for descriptor */ counter_u64_t bd_fcount; /* number of packets which matched filter */ pid_t bd_pid; /* PID which created descriptor */ int bd_locked; /* true if descriptor is locked */ u_int bd_bufmode; /* Current buffer mode. */ counter_u64_t bd_wcount; /* number of packets written */ counter_u64_t bd_wfcount; /* number of packets that matched write filter */ counter_u64_t bd_wdcount; /* number of packets dropped during a write */ counter_u64_t bd_zcopy; /* number of zero copy operations */ u_char bd_compat32; /* 32-bit stream on LP64 system */ volatile u_int bd_refcnt; struct epoch_context epoch_ctx; }; /* Values for bd_state */ #define BPF_IDLE 0 /* no select in progress */ #define BPF_WAITING 1 /* waiting for read timeout in select */ #define BPF_TIMED_OUT 2 /* read timeout has expired in select */ #define BPFD_LOCK(bd) mtx_lock(&(bd)->bd_lock) #define BPFD_UNLOCK(bd) mtx_unlock(&(bd)->bd_lock) #define BPFD_LOCK_ASSERT(bd) mtx_assert(&(bd)->bd_lock, MA_OWNED) #define BPF_PID_REFRESH(bd, td) (bd)->bd_pid = (td)->td_proc->p_pid #define BPF_PID_REFRESH_CUR(bd) (bd)->bd_pid = curthread->td_proc->p_pid /* * External representation of the bpf descriptor */ struct xbpf_d { u_int bd_structsize; /* Size of this structure. */ u_char bd_promisc; u_char bd_immediate; u_char __bd_pad[6]; int bd_hdrcmplt; int bd_direction; int bd_feedback; int bd_async; u_int64_t bd_rcount; u_int64_t bd_dcount; u_int64_t bd_fcount; int bd_sig; int bd_slen; int bd_hlen; int bd_bufsize; pid_t bd_pid; char bd_ifname[IFNAMSIZ]; int bd_locked; u_int64_t bd_wcount; u_int64_t bd_wfcount; u_int64_t bd_wdcount; u_int64_t bd_zcopy; int bd_bufmode; /* * Allocate 4 64 bit unsigned integers for future expansion so we do * not have to worry about breaking the ABI. */ u_int64_t bd_spare[4]; }; #define BPFIF_FLAG_DYING 1 /* Reject new bpf consumers */ #endif