diff --git a/UPDATING b/UPDATING --- a/UPDATING +++ b/UPDATING @@ -27,6 +27,18 @@ world, or to merely disable the most expensive debugging functionality at runtime, run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20230619: + To enable pf rdr rules for connections initiated from the host, pf + filter rules can be optionally enabled for packets delivered + locally. This can change the behavior of rules which match packets + delivered to lo0. To enable this feature: + + sysctl net.pf.filter_local=1 + service pf restart + + When enabled, its best to ensure that packets delivered locally are not + filtered, e.g. by adding a 'skip on lo' rule. + 20230613: Improvements to libtacplus(8) mean that tacplus.conf(5) now follows POSIX shell syntax rules. This may cause TACACS+ diff --git a/bin/sh/histedit.c b/bin/sh/histedit.c --- a/bin/sh/histedit.c +++ b/bin/sh/histedit.c @@ -594,6 +594,20 @@ *(char *const *)b + curpos)); } +static char +**add_match(char **matches, size_t i, size_t *size, char *match_copy) +{ + if (match_copy == NULL) + return (NULL); + matches[i] = match_copy; + if (i >= *size - 1) { + *size *= 2; + matches = reallocarray(matches, *size, sizeof(matches[0])); + } + + return (matches); +} + /* * This function is passed to libedit's fn_complete2(). The library will use * it instead of its standard function that finds matching files in current @@ -605,7 +619,7 @@ { char *free_path = NULL, *path; const char *dirname; - char **matches = NULL; + char **matches = NULL, **rmatches; size_t i = 0, size = 16, uniq; size_t curpos = end - start, lcstring = -1; @@ -631,7 +645,6 @@ } while ((entry = readdir(dir)) != NULL) { struct stat statb; - char **rmatches; if (strncmp(entry->d_name, text, curpos) != 0) continue; @@ -642,11 +655,8 @@ continue; } else if (entry->d_type != DT_REG) continue; - matches[++i] = strdup(entry->d_name); - if (i < size - 1) - continue; - size *= 2; - rmatches = reallocarray(matches, size, sizeof(matches[0])); + rmatches = add_match(matches, ++i, &size, + strdup(entry->d_name)); if (rmatches == NULL) { closedir(dir); goto out; @@ -655,6 +665,14 @@ } closedir(dir); } + for (const unsigned char *bp = builtincmd; *bp != 0; bp += 2 + bp[0]) { + if (curpos > bp[0] || memcmp(bp + 2, text, curpos) != 0) + continue; + rmatches = add_match(matches, ++i, &size, strndup(bp + 2, bp[0])); + if (rmatches == NULL) + goto out; + matches = rmatches; + } out: free(free_path); if (i == 0) { diff --git a/contrib/lib9p/transport/socket.c b/contrib/lib9p/transport/socket.c --- a/contrib/lib9p/transport/socket.c +++ b/contrib/lib9p/transport/socket.c @@ -73,7 +73,6 @@ struct kevent kev[2]; struct kevent event[2]; int err, kq, i, val, evs, nsockets = 0; - int sockets[2]; memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; @@ -98,7 +97,6 @@ continue; } - sockets[nsockets] = s; EV_SET(&kev[nsockets++], s, EVFILT_READ, EV_ADD | EV_ENABLE, 0, 0, 0); listen(s, 10); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4627,11 +4627,17 @@ WideningDecision == CM_Interleave); }; - // Returns true if Ptr is the pointer operand of a memory access instruction - // I, and I is known to not require scalarization. + // I, I is known to not require scalarization, and the pointer is not also + // stored. auto isVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool { - return getLoadStorePointerOperand(I) == Ptr && isUniformDecision(I, VF); + auto GetStoredValue = [I]() -> Value * { + if (!isa(I)) + return nullptr; + return I->getOperand(0); + }; + return getLoadStorePointerOperand(I) == Ptr && isUniformDecision(I, VF) && + GetStoredValue() != Ptr; }; // Holds a list of values which are known to have at least one uniform use. @@ -4679,8 +4685,8 @@ if (isa(I) && Legal->isUniformMemOp(I)) addToWorklistIfAllowed(&I); - if (isUniformDecision(&I, VF)) { - assert(isVectorizedMemAccessUse(&I, Ptr) && "consistency check"); + if (isVectorizedMemAccessUse(&I, Ptr)) { + assert(isUniformDecision(&I, VF) && "consistency check"); HasUniformUse.insert(Ptr); } } diff --git a/contrib/tcpdump/netdissect.h b/contrib/tcpdump/netdissect.h --- a/contrib/tcpdump/netdissect.h +++ b/contrib/tcpdump/netdissect.h @@ -698,7 +698,7 @@ extern int ospf_te_lsa_print(netdissect_options *, const u_char *, u_int); extern void otv_print(netdissect_options *, const u_char *, u_int); extern void pfsync_ip_print(netdissect_options *, const u_char *, u_int); -extern u_int pfsync_if_print(netdissect_options *, const struct pcap_pkthdr *, const u_char *); +extern void pfsync_if_print(netdissect_options *, const struct pcap_pkthdr *, const u_char *); extern void pgm_print(netdissect_options *, const u_char *, u_int, const u_char *); extern void pim_print(netdissect_options *, const u_char *, u_int, const u_char *); extern void pimv1_print(netdissect_options *, const u_char *, u_int); diff --git a/contrib/tcpdump/print-pfsync.c b/contrib/tcpdump/print-pfsync.c --- a/contrib/tcpdump/print-pfsync.c +++ b/contrib/tcpdump/print-pfsync.c @@ -57,7 +57,7 @@ const struct pfsync_state_peer *, uint8_t); static void print_state(netdissect_options *, union pfsync_state_union *, int); -u_int +void pfsync_if_print(netdissect_options *ndo, const struct pcap_pkthdr *h, register const u_char *p) { @@ -78,7 +78,7 @@ hex_print(ndo, "\n\t", p, caplen); } fn_print_char(ndo, '\n'); - return (caplen); + return; } void diff --git a/contrib/tcpdump/print.c b/contrib/tcpdump/print.c --- a/contrib/tcpdump/print.c +++ b/contrib/tcpdump/print.c @@ -188,6 +188,9 @@ #if defined(DLT_PFLOG) && defined(HAVE_NET_IF_PFLOG_H) { pflog_if_print, DLT_PFLOG }, #endif +#if defined(DLT_PFSYNC) && defined(HAVE_NET_PFVAR_H) + { pfsync_if_print, DLT_PFSYNC}, +#endif #ifdef DLT_PKTAP { pktap_if_print, DLT_PKTAP }, #endif diff --git a/contrib/traceroute/ifaddrlist.c b/contrib/traceroute/ifaddrlist.c --- a/contrib/traceroute/ifaddrlist.c +++ b/contrib/traceroute/ifaddrlist.c @@ -73,7 +73,7 @@ #ifdef HAVE_SOCKADDR_SA_LEN size_t n; #endif - register struct ifreq *ifrp, *ifend, *ifnext, *mp; + register struct ifreq *ifrp, *ifend, *ifnext; register struct sockaddr_in *sin; register struct ifaddrlist *al; struct ifconf ifc; @@ -106,7 +106,6 @@ ifend = (struct ifreq *)((char *)ibuf + ifc.ifc_len); al = ifaddrlist; - mp = NULL; nipaddr = 0; for (; ifrp < ifend; ifrp = ifnext) { #ifdef HAVE_SOCKADDR_SA_LEN diff --git a/lib/libc/nameser/ns_name.c b/lib/libc/nameser/ns_name.c --- a/lib/libc/nameser/ns_name.c +++ b/lib/libc/nameser/ns_name.c @@ -1150,7 +1150,7 @@ bitlen = 256; return ((bitlen + 7 ) / 8 + 1); } - return (-1); /*%< unknwon ELT */ + return (-1); /*%< unknown ELT */ } return (l); } diff --git a/lib/libc/posix1e/acl_is_trivial_np.3 b/lib/libc/posix1e/acl_is_trivial_np.3 --- a/lib/libc/posix1e/acl_is_trivial_np.3 +++ b/lib/libc/posix1e/acl_is_trivial_np.3 @@ -26,8 +26,8 @@ .\" .\" $FreeBSD$ .\" -.Dd November 12, 2013 -.Dt ACL_STRIP_NP 3 +.Dd June 19, 2023 +.Dt ACL_IS_TRIVIAL_NP 3 .Os .Sh NAME .Nm acl_is_trivial_np @@ -41,7 +41,7 @@ .Fn acl_is_trivial_np "const acl_t aclp" "int *trivialp" .Sh DESCRIPTION The -.Fn acl_is_trivial +.Fn acl_is_trivial_np function determines whether the ACL pointed to by the argument .Va acl is trivial. @@ -56,13 +56,14 @@ For POSIX.1e ACLs, ACL is trivial if it has the three required entries, one for owner, one for owning group, and one for other. For NFSv4 ACLs, ACL is trivial if it is identical to the ACL generated by -.Fn acl_strip_np 3 . +.Xr acl_strip_np 3 . Files that have non-trivial ACL have a plus sign appended after mode bits in "ls -l" output. .Sh RETURN VALUES -.Rv -std acl_get_tag_type +.Rv -std acl_is_trivial_np .Sh SEE ALSO .Xr acl 3 , +.Xr acl_strip_np 3 , .Xr posix1e 3 .Sh STANDARDS POSIX.1e is described in IEEE POSIX.1e draft 17. diff --git a/lib/libc/xdr/xdr_rec.c b/lib/libc/xdr/xdr_rec.c --- a/lib/libc/xdr/xdr_rec.c +++ b/lib/libc/xdr/xdr_rec.c @@ -119,7 +119,7 @@ char *out_base; /* output buffer (points to frag header) */ char *out_finger; /* next output position */ char *out_boundry; /* data cannot up to this address */ - u_int32_t *frag_header; /* beginning of curren fragment */ + u_int32_t *frag_header; /* beginning of current fragment */ bool_t frag_sent; /* true if buffer sent in middle of record */ /* * in-coming bits diff --git a/lib/libgssapi/gss_delete_sec_context.c b/lib/libgssapi/gss_delete_sec_context.c --- a/lib/libgssapi/gss_delete_sec_context.c +++ b/lib/libgssapi/gss_delete_sec_context.c @@ -41,7 +41,6 @@ gss_ctx_id_t *context_handle, gss_buffer_t output_token) { - OM_uint32 major_status; struct _gss_context *ctx = (struct _gss_context *) *context_handle; if (output_token) @@ -54,7 +53,7 @@ * otherwise fake an empty token. */ if (ctx->gc_ctx) { - major_status = ctx->gc_mech->gm_delete_sec_context( + (void) ctx->gc_mech->gm_delete_sec_context( minor_status, &ctx->gc_ctx, output_token); } free(ctx); diff --git a/lib/libgssapi/gss_mech_switch.c b/lib/libgssapi/gss_mech_switch.c --- a/lib/libgssapi/gss_mech_switch.c +++ b/lib/libgssapi/gss_mech_switch.c @@ -190,7 +190,6 @@ char *p; char *name, *oid, *lib, *kobj; struct _gss_mech_switch *m; - int count; void *so; const char *(*prefix_fn)(void); @@ -208,7 +207,6 @@ return; } - count = 0; while (fgets(buf, sizeof(buf), fp)) { if (*buf == '#') continue; @@ -290,7 +288,6 @@ OPTSYM(pname_to_uid); SLIST_INSERT_HEAD(&_gss_mechs, m, gm_link); - count++; continue; bad: diff --git a/lib/libkvm/kvm_minidump_powerpc64_hpt.c b/lib/libkvm/kvm_minidump_powerpc64_hpt.c --- a/lib/libkvm/kvm_minidump_powerpc64_hpt.c +++ b/lib/libkvm/kvm_minidump_powerpc64_hpt.c @@ -253,9 +253,6 @@ ppc64mmu_hpt_init(kvm_t *kd) { struct hpt_data *data; - struct minidumphdr *hdr; - - hdr = &kd->vmst->hdr; /* Alloc MMU data */ data = _kvm_malloc(kd, sizeof(*data)); diff --git a/lib/libvgl/mouse.c b/lib/libvgl/mouse.c --- a/lib/libvgl/mouse.c +++ b/lib/libvgl/mouse.c @@ -284,22 +284,19 @@ { struct mouse_info mouseinfo; VGLBitmap *ormask; - int andmask, border, error, i, interior; + int border, error, i, interior; switch (VGLModeInfo.vi_mem_model) { case V_INFO_MM_PACKED: case V_INFO_MM_PLANAR: - andmask = 0x0f; border = 0x0f; interior = 0x04; break; case V_INFO_MM_VGAX: - andmask = 0x3f; border = 0x3f; interior = 0x24; break; default: - andmask = 0xff; border = BORDER; interior = INTERIOR; break; diff --git a/libexec/rc/rc.d/motd b/libexec/rc/rc.d/motd --- a/libexec/rc/rc.d/motd +++ b/libexec/rc/rc.d/motd @@ -38,10 +38,10 @@ # Otherwise, create an empty template file. install -c -o root -g wheel -m ${PERMS} /dev/null "${TEMPLATE}" fi - # Provide compatibility symlink: - if [ ! -h "${COMPAT_MOTD}" ]; then - ln -sF "${TARGET}" "${COMPAT_MOTD}" - fi + fi + # Provide compatibility symlink: + if [ ! -h "${COMPAT_MOTD}" ]; then + ln -sF "${TARGET}" "${COMPAT_MOTD}" fi T=`mktemp -t motd` diff --git a/libexec/rtld-elf/arm/reloc.c b/libexec/rtld-elf/arm/reloc.c --- a/libexec/rtld-elf/arm/reloc.c +++ b/libexec/rtld-elf/arm/reloc.c @@ -91,7 +91,6 @@ const Elf_Rel *rel = NULL, *rellim; Elf_Addr relsz = 0; Elf_Addr *where; - uint32_t size; for (; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { @@ -104,7 +103,6 @@ } } rellim = (const Elf_Rel *)((const char *)rel + relsz); - size = (rellim - 1)->r_offset - rel->r_offset; for (; rel < rellim; rel++) { where = (Elf_Addr *)(relocbase + rel->r_offset); diff --git a/sbin/camcontrol/camcontrol.c b/sbin/camcontrol/camcontrol.c --- a/sbin/camcontrol/camcontrol.c +++ b/sbin/camcontrol/camcontrol.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -123,18 +124,21 @@ CAM_ARG_LUN = 0x00000010, CAM_ARG_EJECT = 0x00000020, CAM_ARG_UNIT = 0x00000040, - CAM_ARG_FORMAT_BLOCK = 0x00000080, - CAM_ARG_FORMAT_BFI = 0x00000100, - CAM_ARG_FORMAT_PHYS = 0x00000200, - CAM_ARG_PLIST = 0x00000400, - CAM_ARG_GLIST = 0x00000800, + /* unused 0x00000080 */ + /* unused 0x00000100 */ + /* unused 0x00000200 */ + /* unused 0x00000400 */ + /* unused 0x00000800 */ CAM_ARG_GET_SERIAL = 0x00001000, CAM_ARG_GET_STDINQ = 0x00002000, CAM_ARG_GET_XFERRATE = 0x00004000, CAM_ARG_INQ_MASK = 0x00007000, + /* unused 0x00008000 */ + /* unused 0x00010000 */ CAM_ARG_TIMEOUT = 0x00020000, CAM_ARG_CMD_IN = 0x00040000, CAM_ARG_CMD_OUT = 0x00080000, + /* unused 0x00100000 */ CAM_ARG_ERR_RECOVER = 0x00200000, CAM_ARG_RETRIES = 0x00400000, CAM_ARG_START_UNIT = 0x00800000, @@ -145,6 +149,7 @@ CAM_ARG_DEBUG_XPT = 0x10000000, CAM_ARG_DEBUG_PERIPH = 0x20000000, CAM_ARG_DEBUG_PROBE = 0x40000000, + /* unused 0x80000000 */ } cam_argmask; struct camcontrol_opts { @@ -3823,11 +3828,8 @@ struct scsi_read_defect_data_hdr_10 *hdr10 = NULL; struct scsi_read_defect_data_hdr_12 *hdr12 = NULL; size_t hdr_size = 0, entry_size = 0; - int use_12byte = 0; - int hex_format = 0; u_int8_t *defect_list = NULL; u_int8_t list_format = 0; - int list_type_set = 0; u_int32_t dlist_length = 0; u_int32_t returned_length = 0, valid_len = 0; u_int32_t num_returned = 0, num_valid = 0; @@ -3835,11 +3837,11 @@ u_int32_t starting_offset = 0; u_int8_t returned_format, returned_type; unsigned int i; - int summary = 0, quiet = 0; int c, error = 0; - int lists_specified = 0; - int get_length = 1, first_pass = 1; int mads = 0; + bool summary = false, quiet = false, list_type_set = false; + bool get_length = true, use_12byte = false, first_pass = true; + bool hex_format = false; while ((c = getopt(argc, argv, combinedopt)) != -1) { switch(c){ @@ -3848,15 +3850,21 @@ scsi_nv_status status; int entry_num = 0; + if (list_type_set) { + warnx("%s: -f specified twice", __func__); + error = 1; + goto defect_bailout; + } + status = scsi_get_nv(defect_list_type_map, sizeof(defect_list_type_map) / sizeof(defect_list_type_map[0]), optarg, &entry_num, SCSI_NV_FLAG_IG_CASE); if (status == SCSI_NV_FOUND) { - list_format = defect_list_type_map[ + list_format |= defect_list_type_map[ entry_num].value; - list_type_set = 1; + list_type_set = true; } else { warnx("%s: %s %s option %s", __func__, (status == SCSI_NV_AMBIGUOUS) ? @@ -3868,16 +3876,16 @@ break; } case 'G': - arglist |= CAM_ARG_GLIST; + list_format |= SRDD10_GLIST; break; case 'P': - arglist |= CAM_ARG_PLIST; + list_format |= SRDD10_PLIST; break; case 'q': - quiet = 1; + quiet = true; break; case 's': - summary = 1; + summary = true; break; case 'S': { char *endptr; @@ -3888,52 +3896,48 @@ warnx("invalid starting offset %s", optarg); goto defect_bailout; } + use_12byte = true; break; } case 'X': - hex_format = 1; + hex_format = true; break; default: break; } } - if (list_type_set == 0) { + if (!list_type_set) { error = 1; warnx("no defect list format specified"); goto defect_bailout; } - if (arglist & CAM_ARG_PLIST) { - list_format |= SRDD10_PLIST; - lists_specified++; - } - - if (arglist & CAM_ARG_GLIST) { - list_format |= SRDD10_GLIST; - lists_specified++; - } - /* * This implies a summary, and was the previous behavior. */ - if (lists_specified == 0) - summary = 1; + if ((list_format & ~SRDD10_DLIST_FORMAT_MASK) == 0) + summary = true; ccb = cam_getccb(device); -retry_12byte: - /* - * We start off asking for just the header to determine how much - * defect data is available. Some Hitachi drives return an error - * if you ask for more data than the drive has. Once we know the - * length, we retry the command with the returned length. + * We start off asking for just the header to determine how much defect + * data is available. Some Hitachi drives return an error if you ask + * for more data than the drive has. Once we know the length, we retry + * the command with the returned length. When we're retrying the with + * 12-byte command, we're always changing to the 12-byte command and + * need to get the length. Simplify the logic below by always setting + * use_12byte in this case with this slightly more complex logic here. */ - if (use_12byte == 0) + if (!use_12byte) { dlist_length = sizeof(*hdr10); - else + } else { +retry_12byte: + get_length = true; + use_12byte = true; dlist_length = sizeof(*hdr12); + } retry: if (defect_list != NULL) { @@ -3979,7 +3983,7 @@ valid_len = ccb->csio.dxfer_len - ccb->csio.resid; - if (use_12byte == 0) { + if (!use_12byte) { hdr10 = (struct scsi_read_defect_data_hdr_10 *)defect_list; hdr_size = sizeof(*hdr10); hdr_max = SRDDH10_MAX_LENGTH; @@ -4033,8 +4037,8 @@ num_valid = min(returned_length, valid_len - hdr_size); num_valid /= entry_size; - if (get_length != 0) { - get_length = 0; + if (get_length) { + get_length = false; if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_SCSI_STATUS_ERROR) { @@ -4055,10 +4059,8 @@ if ((sense_key == SSD_KEY_RECOVERED_ERROR) && (asc == 0x1c) && (ascq == 0x00) && (returned_length > 0)) { - if ((use_12byte == 0) + if (!use_12byte && (returned_length >= max_possible_size)) { - get_length = 1; - use_12byte = 1; goto retry_12byte; } dlist_length = returned_length + hdr_size; @@ -4073,9 +4075,7 @@ * command can support. Retry with the 12 * byte command. */ - if (use_12byte == 0) { - get_length = 1; - use_12byte = 1; + if (!use_12byte) { goto retry_12byte; } dlist_length = returned_length + hdr_size; @@ -4089,9 +4089,7 @@ * error and no data. Retry with the 12 * byte command. */ - if (use_12byte == 0) { - get_length = 1; - use_12byte = 1; + if (!use_12byte) { goto retry_12byte; } dlist_length = returned_length + hdr_size; @@ -4104,11 +4102,9 @@ if (returned_length == 0) dlist_length = SRDD10_MAX_LENGTH; else { - if ((use_12byte == 0) + if (!use_12byte && (returned_length >= max_possible_size)) { - get_length = 1; - use_12byte = 1; goto retry_12byte; } dlist_length = returned_length + @@ -4124,17 +4120,15 @@ CAM_EPF_ALL, stderr); goto defect_bailout; } else { - if ((use_12byte == 0) + if (!use_12byte && (returned_length >= max_possible_size)) { - get_length = 1; - use_12byte = 1; goto retry_12byte; } dlist_length = returned_length + hdr_size; } - if (summary != 0) { + if (summary) { fprintf(stdout, "%u", num_returned); - if (quiet == 0) { + if (!quiet) { fprintf(stdout, " defect%s", (num_returned != 1) ? "s" : ""); } @@ -4218,10 +4212,10 @@ goto defect_bailout; } - if (first_pass != 0) { + if (first_pass) { fprintf(stderr, "Got %d defect", num_returned); - if ((lists_specified == 0) || (num_returned == 0)) { + if (!summary || (num_returned == 0)) { fprintf(stderr, "s.\n"); goto defect_bailout; } else if (num_returned == 1) @@ -4229,7 +4223,7 @@ else fprintf(stderr, "s:\n"); - first_pass = 0; + first_pass = false; } /* @@ -4254,7 +4248,7 @@ 0 : 1; sector &= ~SDD_EXT_PHYS_FLAG_MASK; } - if (hex_format == 0) + if (!hex_format) fprintf(stdout, "%d:%d:%d%s", scsi_3btoul(dlist[i].cylinder), dlist[i].head, @@ -4290,7 +4284,7 @@ mads = (bfi & SDD_EXT_BFI_MADS) ? 1 : 0; bfi &= ~SDD_EXT_BFI_FLAG_MASK; } - if (hex_format == 0) + if (!hex_format) fprintf(stdout, "%d:%d:%d%s", scsi_3btoul(dlist[i].cylinder), dlist[i].head, @@ -4319,7 +4313,7 @@ (defect_list + hdr_size); for (i = 0; i < num_valid; i++) { - if (hex_format == 0) + if (!hex_format) fprintf(stdout, "%u\n", scsi_4btoul(dlist[i].address)); else @@ -4342,7 +4336,7 @@ (defect_list + hdr_size); for (i = 0; i < num_valid; i++) { - if (hex_format == 0) + if (!hex_format) fprintf(stdout, "%ju\n", (uintmax_t)scsi_8btou64( dlist[i].address)); diff --git a/sbin/camcontrol/zone.c b/sbin/camcontrol/zone.c --- a/sbin/camcontrol/zone.c +++ b/sbin/camcontrol/zone.c @@ -138,7 +138,6 @@ struct scsi_report_zones_desc *desc = NULL; uint32_t hdr_len, len; uint64_t max_lba, next_lba = 0; - int more_data = 0; zone_print_status status = ZONE_PRINT_OK; char tmpstr[80]; int field_widths[ZONE_NUM_FIELDS]; @@ -168,7 +167,6 @@ } if (hdr_len > (valid_len + sizeof(*hdr))) { - more_data = 1; status = ZONE_PRINT_MORE_DATA; } @@ -592,7 +590,7 @@ /*retry_count*/ retry_count, /*flags*/ CAM_DIR_NONE | CAM_DEV_QFRZDIS, /*tag_action*/ task_attr, - /*protocol*/ AP_PROTO_NON_DATA, + /*protocol*/ protocol, /*ata_flags*/ AP_FLAG_BYT_BLOK_BYTES | AP_FLAG_TLEN_NO_DATA, /*features*/ features, diff --git a/sbin/ifconfig/ifconfig.h b/sbin/ifconfig/ifconfig.h --- a/sbin/ifconfig/ifconfig.h +++ b/sbin/ifconfig/ifconfig.h @@ -255,6 +255,7 @@ extern int exit_code; extern char *f_inet, *f_inet6, *f_ether, *f_addr; +void clearifcap(if_ctx *ctx, const char *, int value); void setifcap(if_ctx *ctx, const char *, int value); void setifcapnv(if_ctx *ctx, const char *vname, const char *arg); diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -28,7 +28,7 @@ .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" -.Dd June 13, 2023 +.Dd June 20, 2023 .Dt IFCONFIG 8 .Os .Sh NAME @@ -509,6 +509,9 @@ .It Cm group Ar groupname Assign the interface to a .Dq group . +The +.Ar groupname +may not be longer than 15 characters and must not end in a digit. Any interface can be in multiple groups. .Pp Cloned interfaces are members of their interface family group by default. diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c --- a/sbin/ifconfig/ifconfig.c +++ b/sbin/ifconfig/ifconfig.c @@ -501,7 +501,7 @@ args->printkeys = true; break; case 'l': /* scan interface names only */ - args->namesonly++; + args->namesonly = true; break; case 'm': /* show media choices in status */ args->supmedia = true; @@ -1396,6 +1396,22 @@ * of the ifreq structure, which may confuse other parts of ifconfig. * Make a private copy so we can avoid that. */ +static void +clearifflags(if_ctx *ctx, const char *vname, int value) +{ + struct ifreq my_ifr; + int flags; + + flags = getifflags(ctx->ifname, ctx->io_s, false); + flags &= ~value; + memset(&my_ifr, 0, sizeof(my_ifr)); + strlcpy(my_ifr.ifr_name, ctx->ifname, sizeof(my_ifr.ifr_name)); + my_ifr.ifr_flags = flags & 0xffff; + my_ifr.ifr_flagshigh = flags >> 16; + if (ioctl(ctx->io_s, SIOCSIFFLAGS, (caddr_t)&my_ifr) < 0) + Perror(vname); +} + static void setifflags(if_ctx *ctx, const char *vname, int value) { @@ -1403,11 +1419,7 @@ int flags; flags = getifflags(ctx->ifname, ctx->io_s, false); - if (value < 0) { - value = -value; - flags &= ~value; - } else - flags |= value; + flags |= value; memset(&my_ifr, 0, sizeof(my_ifr)); strlcpy(my_ifr.ifr_name, ctx->ifname, sizeof(my_ifr.ifr_name)); my_ifr.ifr_flags = flags & 0xffff; @@ -1416,6 +1428,27 @@ Perror(vname); } +void +clearifcap(if_ctx *ctx, const char *vname, int value) +{ + struct ifreq ifr = {}; + int flags; + + if (ioctl_ctx_ifr(ctx, SIOCGIFCAP, &ifr) < 0) { + Perror("ioctl (SIOCGIFCAP)"); + exit(1); + } + flags = ifr.ifr_curcap; + flags &= ~value; + flags &= ifr.ifr_reqcap; + /* Check for no change in capabilities. */ + if (ifr.ifr_curcap == flags) + return; + ifr.ifr_reqcap = flags; + if (ioctl_ctx(ctx, SIOCSIFCAP, &ifr) < 0) + Perror(vname); +} + void setifcap(if_ctx *ctx, const char *vname, int value) { @@ -1427,11 +1460,7 @@ exit(1); } flags = ifr.ifr_curcap; - if (value < 0) { - value = -value; - flags &= ~value; - } else - flags |= value; + flags |= value; flags &= ifr.ifr_reqcap; /* Check for no change in capabilities. */ if (ifr.ifr_curcap == flags) @@ -1972,17 +2001,17 @@ static struct cmd basic_cmds[] = { DEF_CMD("up", IFF_UP, setifflags), - DEF_CMD("down", -IFF_UP, setifflags), - DEF_CMD("arp", -IFF_NOARP, setifflags), + DEF_CMD("down", IFF_UP, clearifflags), + DEF_CMD("arp", IFF_NOARP, clearifflags), DEF_CMD("-arp", IFF_NOARP, setifflags), DEF_CMD("debug", IFF_DEBUG, setifflags), - DEF_CMD("-debug", -IFF_DEBUG, setifflags), + DEF_CMD("-debug", IFF_DEBUG, clearifflags), DEF_CMD_ARG("description", setifdescr), DEF_CMD_ARG("descr", setifdescr), DEF_CMD("-description", 0, unsetifdescr), DEF_CMD("-descr", 0, unsetifdescr), DEF_CMD("promisc", IFF_PPROMISC, setifflags), - DEF_CMD("-promisc", -IFF_PPROMISC, setifflags), + DEF_CMD("-promisc", IFF_PPROMISC, clearifflags), DEF_CMD("add", IFF_UP, notealias), DEF_CMD("alias", IFF_UP, notealias), DEF_CMD("-alias", -IFF_UP, notealias), @@ -1991,7 +2020,7 @@ #ifdef notdef #define EN_SWABIPS 0x1000 DEF_CMD("swabips", EN_SWABIPS, setifflags), - DEF_CMD("-swabips", -EN_SWABIPS, setifflags), + DEF_CMD("-swabips", EN_SWABIPS, clearifflags), #endif DEF_CMD_ARG("netmask", setifnetmask), DEF_CMD_ARG("metric", setifmetric), @@ -2004,64 +2033,64 @@ DEF_CMD_ARG("-vnet", setifrvnet), #endif DEF_CMD("link0", IFF_LINK0, setifflags), - DEF_CMD("-link0", -IFF_LINK0, setifflags), + DEF_CMD("-link0", IFF_LINK0, clearifflags), DEF_CMD("link1", IFF_LINK1, setifflags), - DEF_CMD("-link1", -IFF_LINK1, setifflags), + DEF_CMD("-link1", IFF_LINK1, clearifflags), DEF_CMD("link2", IFF_LINK2, setifflags), - DEF_CMD("-link2", -IFF_LINK2, setifflags), + DEF_CMD("-link2", IFF_LINK2, clearifflags), DEF_CMD("monitor", IFF_MONITOR, setifflags), - DEF_CMD("-monitor", -IFF_MONITOR, setifflags), + DEF_CMD("-monitor", IFF_MONITOR, clearifflags), DEF_CMD("mextpg", IFCAP_MEXTPG, setifcap), - DEF_CMD("-mextpg", -IFCAP_MEXTPG, setifcap), + DEF_CMD("-mextpg", IFCAP_MEXTPG, clearifcap), DEF_CMD("staticarp", IFF_STATICARP, setifflags), - DEF_CMD("-staticarp", -IFF_STATICARP, setifflags), + DEF_CMD("-staticarp", IFF_STATICARP, clearifflags), DEF_CMD("stickyarp", IFF_STICKYARP, setifflags), - DEF_CMD("-stickyarp", -IFF_STICKYARP, setifflags), + DEF_CMD("-stickyarp", IFF_STICKYARP, clearifflags), DEF_CMD("rxcsum6", IFCAP_RXCSUM_IPV6, setifcap), - DEF_CMD("-rxcsum6", -IFCAP_RXCSUM_IPV6, setifcap), + DEF_CMD("-rxcsum6", IFCAP_RXCSUM_IPV6, clearifcap), DEF_CMD("txcsum6", IFCAP_TXCSUM_IPV6, setifcap), - DEF_CMD("-txcsum6", -IFCAP_TXCSUM_IPV6, setifcap), + DEF_CMD("-txcsum6", IFCAP_TXCSUM_IPV6, clearifcap), DEF_CMD("rxcsum", IFCAP_RXCSUM, setifcap), - DEF_CMD("-rxcsum", -IFCAP_RXCSUM, setifcap), + DEF_CMD("-rxcsum", IFCAP_RXCSUM, clearifcap), DEF_CMD("txcsum", IFCAP_TXCSUM, setifcap), - DEF_CMD("-txcsum", -IFCAP_TXCSUM, setifcap), + DEF_CMD("-txcsum", IFCAP_TXCSUM, clearifcap), DEF_CMD("netcons", IFCAP_NETCONS, setifcap), - DEF_CMD("-netcons", -IFCAP_NETCONS, setifcap), + DEF_CMD("-netcons", IFCAP_NETCONS, clearifcap), DEF_CMD_ARG("pcp", setifpcp), DEF_CMD("-pcp", 0, disableifpcp), DEF_CMD("polling", IFCAP_POLLING, setifcap), - DEF_CMD("-polling", -IFCAP_POLLING, setifcap), + DEF_CMD("-polling", IFCAP_POLLING, clearifcap), DEF_CMD("tso6", IFCAP_TSO6, setifcap), - DEF_CMD("-tso6", -IFCAP_TSO6, setifcap), + DEF_CMD("-tso6", IFCAP_TSO6, clearifcap), DEF_CMD("tso4", IFCAP_TSO4, setifcap), - DEF_CMD("-tso4", -IFCAP_TSO4, setifcap), + DEF_CMD("-tso4", IFCAP_TSO4, clearifcap), DEF_CMD("tso", IFCAP_TSO, setifcap), - DEF_CMD("-tso", -IFCAP_TSO, setifcap), + DEF_CMD("-tso", IFCAP_TSO, clearifcap), DEF_CMD("toe", IFCAP_TOE, setifcap), - DEF_CMD("-toe", -IFCAP_TOE, setifcap), + DEF_CMD("-toe", IFCAP_TOE, clearifcap), DEF_CMD("lro", IFCAP_LRO, setifcap), - DEF_CMD("-lro", -IFCAP_LRO, setifcap), + DEF_CMD("-lro", IFCAP_LRO, clearifcap), DEF_CMD("txtls", IFCAP_TXTLS, setifcap), - DEF_CMD("-txtls", -IFCAP_TXTLS, setifcap), + DEF_CMD("-txtls", IFCAP_TXTLS, clearifcap), DEF_CMD_SARG("rxtls", IFCAP2_RXTLS4_NAME "," IFCAP2_RXTLS6_NAME, setifcapnv), DEF_CMD_SARG("-rxtls", "-"IFCAP2_RXTLS4_NAME ",-" IFCAP2_RXTLS6_NAME, setifcapnv), DEF_CMD("wol", IFCAP_WOL, setifcap), - DEF_CMD("-wol", -IFCAP_WOL, setifcap), + DEF_CMD("-wol", IFCAP_WOL, clearifcap), DEF_CMD("wol_ucast", IFCAP_WOL_UCAST, setifcap), - DEF_CMD("-wol_ucast", -IFCAP_WOL_UCAST, setifcap), + DEF_CMD("-wol_ucast", IFCAP_WOL_UCAST, clearifcap), DEF_CMD("wol_mcast", IFCAP_WOL_MCAST, setifcap), - DEF_CMD("-wol_mcast", -IFCAP_WOL_MCAST, setifcap), + DEF_CMD("-wol_mcast", IFCAP_WOL_MCAST, clearifcap), DEF_CMD("wol_magic", IFCAP_WOL_MAGIC, setifcap), - DEF_CMD("-wol_magic", -IFCAP_WOL_MAGIC, setifcap), + DEF_CMD("-wol_magic", IFCAP_WOL_MAGIC, clearifcap), DEF_CMD("txrtlmt", IFCAP_TXRTLMT, setifcap), - DEF_CMD("-txrtlmt", -IFCAP_TXRTLMT, setifcap), + DEF_CMD("-txrtlmt", IFCAP_TXRTLMT, clearifcap), DEF_CMD("txtlsrtlmt", IFCAP_TXTLS_RTLMT, setifcap), - DEF_CMD("-txtlsrtlmt", -IFCAP_TXTLS_RTLMT, setifcap), + DEF_CMD("-txtlsrtlmt", IFCAP_TXTLS_RTLMT, clearifcap), DEF_CMD("hwrxtstmp", IFCAP_HWRXTSTMP, setifcap), - DEF_CMD("-hwrxtstmp", -IFCAP_HWRXTSTMP, setifcap), - DEF_CMD("normal", -IFF_LINK0, setifflags), + DEF_CMD("-hwrxtstmp", IFCAP_HWRXTSTMP, clearifcap), + DEF_CMD("normal", IFF_LINK0, clearifflags), DEF_CMD("compress", IFF_LINK0, setifflags), DEF_CMD("noicmp", IFF_LINK1, setifflags), DEF_CMD_ARG("mtu", setifmtu), diff --git a/sbin/ifconfig/ifieee80211.c b/sbin/ifconfig/ifieee80211.c --- a/sbin/ifconfig/ifieee80211.c +++ b/sbin/ifconfig/ifieee80211.c @@ -2062,14 +2062,14 @@ const struct ieee80211_channel *cb = b; return ca->ic_freq == cb->ic_freq ? - (ca->ic_flags & CHAN_ALL) - (cb->ic_flags & CHAN_ALL) : + (int)(ca->ic_flags & CHAN_ALL) - (int)(cb->ic_flags & CHAN_ALL) : ca->ic_freq - cb->ic_freq; #undef CHAN_ALL } static const struct ieee80211_channel * chanlookup(const struct ieee80211_channel chans[], int nchans, - int freq, int flags) + int freq, uint32_t flags) { int i; @@ -2084,7 +2084,7 @@ } static int -chanfind(const struct ieee80211_channel chans[], int nchans, unsigned int flags) +chanfind(const struct ieee80211_channel chans[], int nchans, uint32_t flags) { for (int i = 0; i < nchans; i++) { const struct ieee80211_channel *c = &chans[i]; @@ -2098,7 +2098,7 @@ * Check channel compatibility. */ static int -checkchan(const struct ieee80211req_chaninfo *avail, int freq, int flags) +checkchan(const struct ieee80211req_chaninfo *avail, int freq, uint32_t flags) { flags &= ~REQ_FLAGS; /* diff --git a/sbin/ifconfig/ifpfsync.c b/sbin/ifconfig/ifpfsync.c --- a/sbin/ifconfig/ifpfsync.c +++ b/sbin/ifconfig/ifpfsync.c @@ -223,12 +223,17 @@ case AF_INET: { struct sockaddr_in *sin = satosin(peerres->ai_addr); - if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) - errx(1, "syncpeer address cannot be multicast"); - memcpy(&addr, sin, sizeof(*sin)); break; } +#endif +#ifdef INET6 + case AF_INET6: { + struct sockaddr_in6 *sin6 = satosin6(peerres->ai_addr); + + memcpy(&addr, sin6, sizeof(*sin6)); + break; + } #endif default: errx(1, "syncpeer address %s not supported", val); @@ -373,9 +378,9 @@ if (syncdev[0] != '\0') printf("syncdev: %s ", syncdev); - if (syncpeer.ss_family == AF_INET && + if ((syncpeer.ss_family == AF_INET && ((struct sockaddr_in *)&syncpeer)->sin_addr.s_addr != - htonl(INADDR_PFSYNC_GROUP)) { + htonl(INADDR_PFSYNC_GROUP)) || syncpeer.ss_family == AF_INET6) { struct sockaddr *syncpeer_sa = (struct sockaddr *)&syncpeer; diff --git a/sbin/ifconfig/ifvlan.c b/sbin/ifconfig/ifvlan.c --- a/sbin/ifconfig/ifvlan.c +++ b/sbin/ifconfig/ifvlan.c @@ -286,15 +286,15 @@ /* XXX For compatibility. Should become DEF_CMD() some day. */ DEF_CMD_OPTARG("-vlandev", unsetvlandev), DEF_CMD("vlanmtu", IFCAP_VLAN_MTU, setifcap), - DEF_CMD("-vlanmtu", -IFCAP_VLAN_MTU, setifcap), + DEF_CMD("-vlanmtu", IFCAP_VLAN_MTU, clearifcap), DEF_CMD("vlanhwtag", IFCAP_VLAN_HWTAGGING, setifcap), - DEF_CMD("-vlanhwtag", -IFCAP_VLAN_HWTAGGING, setifcap), + DEF_CMD("-vlanhwtag", IFCAP_VLAN_HWTAGGING, clearifcap), DEF_CMD("vlanhwfilter", IFCAP_VLAN_HWFILTER, setifcap), - DEF_CMD("-vlanhwfilter", -IFCAP_VLAN_HWFILTER, setifcap), - DEF_CMD("-vlanhwtso", -IFCAP_VLAN_HWTSO, setifcap), + DEF_CMD("-vlanhwfilter", IFCAP_VLAN_HWFILTER, clearifcap), + DEF_CMD("-vlanhwtso", IFCAP_VLAN_HWTSO, clearifcap), DEF_CMD("vlanhwtso", IFCAP_VLAN_HWTSO, setifcap), DEF_CMD("vlanhwcsum", IFCAP_VLAN_HWCSUM, setifcap), - DEF_CMD("-vlanhwcsum", -IFCAP_VLAN_HWCSUM, setifcap), + DEF_CMD("-vlanhwcsum", IFCAP_VLAN_HWCSUM, clearifcap), }; static struct afswtch af_vlan = { .af_name = "af_vlan", diff --git a/sbin/ifconfig/ifvxlan.c b/sbin/ifconfig/ifvxlan.c --- a/sbin/ifconfig/ifvxlan.c +++ b/sbin/ifconfig/ifvxlan.c @@ -613,9 +613,9 @@ DEF_CMD("vxlanflushall", 1, setvxlan_flush), DEF_CMD("vxlanhwcsum", IFCAP_VXLAN_HWCSUM, setifcap), - DEF_CMD("-vxlanhwcsum", -IFCAP_VXLAN_HWCSUM, setifcap), + DEF_CMD("-vxlanhwcsum", IFCAP_VXLAN_HWCSUM, clearifcap), DEF_CMD("vxlanhwtso", IFCAP_VXLAN_HWTSO, setifcap), - DEF_CMD("-vxlanhwtso", -IFCAP_VXLAN_HWTSO, setifcap), + DEF_CMD("-vxlanhwtso", IFCAP_VXLAN_HWTSO, clearifcap), }; static struct afswtch af_vxlan = { diff --git a/sbin/mount_nfs/mount_nfs.8 b/sbin/mount_nfs/mount_nfs.8 --- a/sbin/mount_nfs/mount_nfs.8 +++ b/sbin/mount_nfs/mount_nfs.8 @@ -28,7 +28,7 @@ .\" @(#)mount_nfs.8 8.3 (Berkeley) 3/29/95 .\" $FreeBSD$ .\" -.Dd April 3, 2023 +.Dd June 14, 2023 .Dt MOUNT_NFS 8 .Os .Sh NAME @@ -231,6 +231,9 @@ This option requires the .Cm nfsv4 option. +Note that for NFS servers such as AmazonEFS, where each new TCP +connection can connect to a different cluster that maintains lock +state separately, this option cannot be used. .It Cm nfsv2 Use the NFS Version 2 protocol (the default is to try version 3 first then version 2). diff --git a/share/examples/kld/syscall/test/call.c b/share/examples/kld/syscall/test/call.c --- a/share/examples/kld/syscall/test/call.c +++ b/share/examples/kld/syscall/test/call.c @@ -28,7 +28,7 @@ * $FreeBSD$ */ -#include +#include #include #include diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,5 +1,5 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. -.Dd May 30, 2023 +.Dd June 19, 2023 .Dt SRC.CONF 5 .Os .Sh NAME @@ -261,18 +261,12 @@ Do not build .Xr calendar 1 . .It Va WITHOUT_CAPSICUM -Do not build Capsicum support into system programs. -When set, it enforces these options: -.Pp -.Bl -item -compact -.It -.Va WITHOUT_CASPER -.El +This option has no effect. .It Va WITHOUT_CAROOT Do not add the trusted certificates from the Mozilla NSS bundle to base. .It Va WITHOUT_CASPER -Do not build Casper program and related libraries. +This option has no effect. .It Va WITH_CCACHE_BUILD Use .Xr ccache 1 diff --git a/share/mk/bsd.cpu.mk b/share/mk/bsd.cpu.mk --- a/share/mk/bsd.cpu.mk +++ b/share/mk/bsd.cpu.mk @@ -14,8 +14,12 @@ MACHINE_CPU = arm . elif ${MACHINE_CPUARCH} == "i386" MACHINE_CPU = i486 -. elif ${MACHINE_CPUARCH} == "powerpc" +. elif ${MACHINE_ARCH} == "powerpc" MACHINE_CPU = aim +. elif ${MACHINE_ARCH} == "powerpc64" +MACHINE_CPU = aim altivec +. elif ${MACHINE_ARCH} == "powerpc64le" +MACHINE_CPU = aim altivec vsx vsx2 . elif ${MACHINE_CPUARCH} == "riscv" MACHINE_CPU = riscv . endif @@ -277,7 +281,9 @@ . if ${CPUTYPE} == "e500" MACHINE_CPU = booke softfp . elif ${CPUTYPE} == "g4" -MACHINE_CPU = altivec +MACHINE_CPU = aim altivec +. else +MACHINE_CPU= aim . endif . elif ${MACHINE_ARCH} == "powerpc64" . if ${CPUTYPE} == "e5500" @@ -289,10 +295,10 @@ . elif ${CPUTYPE} == power9 MACHINE_CPU = altivec vsx vsx2 vsx3 . else -MACHINE_CPU = altivec +MACHINE_CPU = aim altivec . endif . elif ${MACHINE_ARCH} == "powerpc64le" -MACHINE_CPU = altivec vsx vsx2 +MACHINE_CPU = aim altivec vsx vsx2 . if ${CPUTYPE} == power9 MACHINE_CPU += vsx3 . endif diff --git a/share/mk/bsd.mkopt.mk b/share/mk/bsd.mkopt.mk --- a/share/mk/bsd.mkopt.mk +++ b/share/mk/bsd.mkopt.mk @@ -8,6 +8,8 @@ # "yes", unless WITHOUT_FOO is defined, in which case it is set to # "no". # +# For each option FOO in __REQUIRED_OPTIONS, MK_FOO is set to "yes". +# # For each option FOO in __DEFAULT_NO_OPTIONS, MK_FOO is set to "no", # unless WITH_FOO is defined, in which case it is set to "yes". # @@ -52,6 +54,14 @@ .endfor .undef __DEFAULT_YES_OPTIONS +# +# MK_* options which are always yes, typically as a transitional +# step towards removing the options entirely. +# +.for var in ${__REQUIRED_OPTIONS} +MK_${var}:= yes +.endfor + # # MK_* options which default to "no". # diff --git a/share/mk/src.opts.mk b/share/mk/src.opts.mk --- a/share/mk/src.opts.mk +++ b/share/mk/src.opts.mk @@ -53,6 +53,9 @@ # BROKEN was selected as the least imperfect one considered at the # time. Options are added to BROKEN_OPTIONS list on a per-arch basis. # At this time, there's no provision for mutually incompatible options. +# Options listed in 'REQUIRED_OPTIONS' will be hard-wired to 'yes'; this +# is intended as a transitional measure while options are in the process +# of being removed. __DEFAULT_YES_OPTIONS = \ ACCT \ @@ -73,9 +76,7 @@ BSNMP \ BZIP2 \ CALENDAR \ - CAPSICUM \ CAROOT \ - CASPER \ CCD \ CDDL \ CLANG \ @@ -213,6 +214,10 @@ SORT_THREADS \ ZONEINFO_LEAPSECONDS_SUPPORT \ +__REQUIRED_OPTIONS = \ + CAPSICUM \ + CASPER + # LEFT/RIGHT. Left options which default to "yes" unless their corresponding # RIGHT option is disabled. __DEFAULT_DEPENDENT_OPTIONS= \ diff --git a/sys/arm64/arm64/disassem.c b/sys/arm64/arm64/disassem.c --- a/sys/arm64/arm64/disassem.c +++ b/sys/arm64/arm64/disassem.c @@ -53,6 +53,7 @@ #define OP_RT_SP (1UL << 8) /* Use sp for RT otherwise xzr */ #define OP_RN_SP (1UL << 9) /* Use sp for RN otherwise xzr */ #define OP_RM_SP (1UL << 10) /* Use sp for RM otherwise xzr */ +#define OP_SHIFT_ROR (1UL << 11) /* Use ror shift type */ static const char *w_reg[] = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", @@ -69,7 +70,7 @@ }; static const char *shift_2[] = { - "lsl", "lsr", "asr", "rsv" + "lsl", "lsr", "asr", "ror" }; /* @@ -232,6 +233,28 @@ TYPE_01, 0 }, /* negs shifted register */ { "subs", "SF(1)|1101011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", TYPE_01, 0 }, /* subs shifted register */ + { "mvn", "SF(1)|0101010|SHIFT(2)|1|RM(5)|IMM(6)|11111|RD(5)", + TYPE_01, OP_SHIFT_ROR }, /* mvn shifted register */ + { "orn", "SF(1)|0101010|SHIFT(2)|1|RM(5)|IMM(6)|RN(5)|RD(5)", + TYPE_01, OP_SHIFT_ROR }, /* orn shifted register */ + { "mov", "SF(1)|0101010000|RM(5)|000000|11111|RD(5)", + TYPE_01, 0 }, /* mov register */ + { "orr", "SF(1)|0101010|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", + TYPE_01, OP_SHIFT_ROR }, /* orr shifted register */ + { "and", "SF(1)|0001010|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", + TYPE_01, OP_SHIFT_ROR }, /* and shifted register */ + { "tst", "SF(1)|1101010|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|11111", + TYPE_01, OP_SHIFT_ROR }, /* tst shifted register */ + { "ands", "SF(1)|1101010|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", + TYPE_01, OP_SHIFT_ROR }, /* ands shifted register */ + { "bic", "SF(1)|0001010|SHIFT(2)|1|RM(5)|IMM(6)|RN(5)|RD(5)", + TYPE_01, OP_SHIFT_ROR }, /* bic shifted register */ + { "bics", "SF(1)|1101010|SHIFT(2)|1|RM(5)|IMM(6)|RN(5)|RD(5)", + TYPE_01, OP_SHIFT_ROR }, /* bics shifted register */ + { "eon", "SF(1)|1001010|SHIFT(2)|1|RM(5)|IMM(6)|RN(5)|RD(5)", + TYPE_01, OP_SHIFT_ROR }, /* eon shifted register */ + { "eor", "SF(1)|1001010|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", + TYPE_01, OP_SHIFT_ROR }, /* eor shifted register */ { NULL, NULL } }; @@ -420,6 +443,8 @@ int pre; /* Indicate if x31 register should be printed as sp or xzr */ int rm_sp, rt_sp, rd_sp, rn_sp; + /* Indicate if shift type ror is supported */ + bool has_shift_ror; /* Initialize defaults, all are 0 except SF indicating 64bit access */ shift = rd = rm = rn = imm = idx = option = amount = scale = 0; @@ -464,6 +489,8 @@ rd_sp = i_ptr->special_ops & OP_RD_SP; rn_sp = i_ptr->special_ops & OP_RN_SP; + has_shift_ror = i_ptr->special_ops & OP_SHIFT_ROR; + /* Print opcode by type */ switch (i_ptr->type) { case TYPE_01: @@ -479,6 +506,13 @@ rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm); arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift); + /* + * if shift type is RESERVED for shifted register instruction, + * print undefined + */ + if (shift == 3 && !has_shift_ror) + goto undefined; + di->di_printf("%s\t", i_ptr->name); /* diff --git a/sys/cam/scsi/scsi_all.h b/sys/cam/scsi/scsi_all.h --- a/sys/cam/scsi/scsi_all.h +++ b/sys/cam/scsi/scsi_all.h @@ -2104,7 +2104,6 @@ #define WRITE_VERIFY_10 0x2E #define VERIFY_10 0x2F #define SYNCHRONIZE_CACHE 0x35 -#define READ_DEFECT_DATA_10 0x37 #define WRITE_BUFFER 0x3B #define READ_BUFFER 0x3C #define CHANGE_DEFINITION 0x40 diff --git a/sys/contrib/openzfs/include/sys/arc.h b/sys/contrib/openzfs/include/sys/arc.h --- a/sys/contrib/openzfs/include/sys/arc.h +++ b/sys/contrib/openzfs/include/sys/arc.h @@ -304,9 +304,8 @@ zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t *zp, arc_write_done_func_t *ready, arc_write_done_func_t *child_ready, - arc_write_done_func_t *physdone, arc_write_done_func_t *done, - void *priv, zio_priority_t priority, int zio_flags, - const zbookmark_phys_t *zb); + arc_write_done_func_t *done, void *priv, zio_priority_t priority, + int zio_flags, const zbookmark_phys_t *zb); arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv); void arc_remove_prune_callback(arc_prune_t *p); diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h --- a/sys/contrib/openzfs/include/sys/arc_impl.h +++ b/sys/contrib/openzfs/include/sys/arc_impl.h @@ -123,7 +123,6 @@ void *awcb_private; arc_write_done_func_t *awcb_ready; arc_write_done_func_t *awcb_children_ready; - arc_write_done_func_t *awcb_physdone; arc_write_done_func_t *awcb_done; arc_buf_t *awcb_buf; }; diff --git a/sys/contrib/openzfs/include/sys/zfs_refcount.h b/sys/contrib/openzfs/include/sys/zfs_refcount.h --- a/sys/contrib/openzfs/include/sys/zfs_refcount.h +++ b/sys/contrib/openzfs/include/sys/zfs_refcount.h @@ -27,6 +27,7 @@ #define _SYS_ZFS_REFCOUNT_H #include +#include #include #include @@ -43,19 +44,22 @@ #ifdef ZFS_DEBUG typedef struct reference { - list_node_t ref_link; + union { + avl_node_t a; + list_node_t l; + } ref_link; const void *ref_holder; uint64_t ref_number; - uint8_t *ref_removed; + boolean_t ref_search; } reference_t; typedef struct refcount { + uint64_t rc_count; kmutex_t rc_mtx; - boolean_t rc_tracked; - list_t rc_list; + avl_tree_t rc_tree; list_t rc_removed; - uint64_t rc_count; - uint64_t rc_removed_count; + uint_t rc_removed_count; + boolean_t rc_tracked; } zfs_refcount_t; /* diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h --- a/sys/contrib/openzfs/include/sys/zio.h +++ b/sys/contrib/openzfs/include/sys/zio.h @@ -460,7 +460,6 @@ /* Callback info */ zio_done_func_t *io_ready; zio_done_func_t *io_children_ready; - zio_done_func_t *io_physdone; zio_done_func_t *io_done; void *io_private; int64_t io_prev_space_delta; /* DMU private */ @@ -503,9 +502,6 @@ int io_error; int io_child_error[ZIO_CHILD_TYPES]; uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES]; - uint64_t io_child_count; - uint64_t io_phys_children; - uint64_t io_parent_count; uint64_t *io_stall; zio_t *io_gang_leader; zio_gang_node_t *io_gang_tree; @@ -553,9 +549,8 @@ extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, struct abd *data, uint64_t size, uint64_t psize, const zio_prop_t *zp, zio_done_func_t *ready, zio_done_func_t *children_ready, - zio_done_func_t *physdone, zio_done_func_t *done, - void *priv, zio_priority_t priority, zio_flag_t flags, - const zbookmark_phys_t *zb); + zio_done_func_t *done, void *priv, zio_priority_t priority, + zio_flag_t flags, const zbookmark_phys_t *zb); extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, struct abd *data, uint64_t size, zio_done_func_t *done, void *priv, diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -6675,18 +6675,6 @@ callback->awcb_children_ready(zio, buf, callback->awcb_private); } -/* - * The SPA calls this callback for each physical write that happens on behalf - * of a logical write. See the comment in dbuf_write_physdone() for details. - */ -static void -arc_write_physdone(zio_t *zio) -{ - arc_write_callback_t *cb = zio->io_private; - if (cb->awcb_physdone != NULL) - cb->awcb_physdone(zio, cb->awcb_buf, cb->awcb_private); -} - static void arc_write_done(zio_t *zio) { @@ -6776,9 +6764,9 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t *zp, arc_write_done_func_t *ready, - arc_write_done_func_t *children_ready, arc_write_done_func_t *physdone, - arc_write_done_func_t *done, void *private, zio_priority_t priority, - int zio_flags, const zbookmark_phys_t *zb) + arc_write_done_func_t *children_ready, arc_write_done_func_t *done, + void *private, zio_priority_t priority, int zio_flags, + const zbookmark_phys_t *zb) { arc_buf_hdr_t *hdr = buf->b_hdr; arc_write_callback_t *callback; @@ -6825,7 +6813,6 @@ callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP); callback->awcb_ready = ready; callback->awcb_children_ready = children_ready; - callback->awcb_physdone = physdone; callback->awcb_done = done; callback->awcb_private = private; callback->awcb_buf = buf; @@ -6862,8 +6849,7 @@ abd_get_from_buf(buf->b_data, HDR_GET_LSIZE(hdr)), HDR_GET_LSIZE(hdr), arc_buf_size(buf), &localprop, arc_write_ready, (children_ready != NULL) ? arc_write_children_ready : NULL, - arc_write_physdone, arc_write_done, callback, - priority, zio_flags, zb); + arc_write_done, callback, priority, zio_flags, zb); return (zio); } diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -4369,22 +4369,6 @@ rw_exit(&parent_db->db_rwlock); } -static void -dbuf_lightweight_physdone(zio_t *zio) -{ - dbuf_dirty_record_t *dr = zio->io_private; - dsl_pool_t *dp = spa_get_dsl(zio->io_spa); - ASSERT3U(dr->dr_txg, ==, zio->io_txg); - - /* - * The callback will be called io_phys_children times. Retire one - * portion of our dirty space each time we are called. Any rounding - * error will be cleaned up by dbuf_lightweight_done(). - */ - int delta = dr->dr_accounted / zio->io_phys_children; - dsl_pool_undirty_space(dp, delta, zio->io_txg); -} - static void dbuf_lightweight_done(zio_t *zio) { @@ -4403,16 +4387,8 @@ dsl_dataset_block_born(ds, zio->io_bp, tx); } - /* - * See comment in dbuf_write_done(). - */ - if (zio->io_phys_children == 0) { - dsl_pool_undirty_space(dmu_objset_pool(os), - dr->dr_accounted, zio->io_txg); - } else { - dsl_pool_undirty_space(dmu_objset_pool(os), - dr->dr_accounted % zio->io_phys_children, zio->io_txg); - } + dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted, + zio->io_txg); abd_free(dr->dt.dll.dr_abd); kmem_free(dr, sizeof (*dr)); @@ -4446,8 +4422,7 @@ dmu_tx_get_txg(tx), &dr->dr_bp_copy, dr->dt.dll.dr_abd, dn->dn_datablksz, abd_get_size(dr->dt.dll.dr_abd), &dr->dt.dll.dr_props, dbuf_lightweight_ready, NULL, - dbuf_lightweight_physdone, dbuf_lightweight_done, dr, - ZIO_PRIORITY_ASYNC_WRITE, + dbuf_lightweight_done, dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED | dr->dt.dll.dr_flags, &zb); zio_nowait(dr->dr_zio); @@ -4789,37 +4764,6 @@ DB_DNODE_EXIT(db); } -/* - * The SPA will call this callback several times for each zio - once - * for every physical child i/o (zio->io_phys_children times). This - * allows the DMU to monitor the progress of each logical i/o. For example, - * there may be 2 copies of an indirect block, or many fragments of a RAID-Z - * block. There may be a long delay before all copies/fragments are completed, - * so this callback allows us to retire dirty space gradually, as the physical - * i/os complete. - */ -static void -dbuf_write_physdone(zio_t *zio, arc_buf_t *buf, void *arg) -{ - (void) buf; - dmu_buf_impl_t *db = arg; - objset_t *os = db->db_objset; - dsl_pool_t *dp = dmu_objset_pool(os); - dbuf_dirty_record_t *dr; - int delta = 0; - - dr = db->db_data_pending; - ASSERT3U(dr->dr_txg, ==, zio->io_txg); - - /* - * The callback will be called io_phys_children times. Retire one - * portion of our dirty space each time we are called. Any rounding - * error will be cleaned up by dbuf_write_done(). - */ - delta = dr->dr_accounted / zio->io_phys_children; - dsl_pool_undirty_space(dp, delta, zio->io_txg); -} - static void dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) { @@ -4894,27 +4838,8 @@ db->db_data_pending = NULL; dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE); - /* - * If we didn't do a physical write in this ZIO and we - * still ended up here, it means that the space of the - * dbuf that we just released (and undirtied) above hasn't - * been marked as undirtied in the pool's accounting. - * - * Thus, we undirty that space in the pool's view of the - * world here. For physical writes this type of update - * happens in dbuf_write_physdone(). - * - * If we did a physical write, cleanup any rounding errors - * that came up due to writing multiple copies of a block - * on disk [see dbuf_write_physdone()]. - */ - if (zio->io_phys_children == 0) { - dsl_pool_undirty_space(dmu_objset_pool(os), - dr->dr_accounted, zio->io_txg); - } else { - dsl_pool_undirty_space(dmu_objset_pool(os), - dr->dr_accounted % zio->io_phys_children, zio->io_txg); - } + dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted, + zio->io_txg); kmem_free(dr, sizeof (dbuf_dirty_record_t)); } @@ -5162,7 +5087,7 @@ dr->dr_zio = zio_write(pio, os->os_spa, txg, &dr->dr_bp_copy, contents, db->db.db_size, db->db.db_size, &zp, - dbuf_write_override_ready, NULL, NULL, + dbuf_write_override_ready, NULL, dbuf_write_override_done, dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); mutex_enter(&db->db_mtx); @@ -5176,7 +5101,7 @@ zp.zp_checksum == ZIO_CHECKSUM_NOPARITY); dr->dr_zio = zio_write(pio, os->os_spa, txg, &dr->dr_bp_copy, NULL, db->db.db_size, db->db.db_size, &zp, - dbuf_write_nofill_ready, NULL, NULL, + dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb); @@ -5195,9 +5120,8 @@ dr->dr_zio = arc_write(pio, os->os_spa, txg, &dr->dr_bp_copy, data, !DBUF_IS_CACHEABLE(db), dbuf_is_l2cacheable(db), &zp, dbuf_write_ready, - children_ready_cb, dbuf_write_physdone, - dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE, - ZIO_FLAG_MUSTSUCCEED, &zb); + children_ready_cb, dbuf_write_done, db, + ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); } } diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -1698,7 +1698,7 @@ zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size), zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp, - dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done, + dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); return (0); @@ -1864,7 +1864,7 @@ zio_nowait(arc_write(pio, os->os_spa, txg, zgd->zgd_bp, dr->dt.dl.dr_data, !DBUF_IS_CACHEABLE(db), dbuf_is_l2cacheable(db), - &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa, + &zp, dmu_sync_ready, NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); return (0); diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c --- a/sys/contrib/openzfs/module/zfs/dmu_objset.c +++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c @@ -1698,7 +1698,7 @@ zio = arc_write(pio, os->os_spa, tx->tx_txg, blkptr_copy, os->os_phys_buf, B_FALSE, dmu_os_is_l2cacheable(os), - &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, + &zp, dmu_objset_write_ready, NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); /* diff --git a/sys/contrib/openzfs/module/zfs/refcount.c b/sys/contrib/openzfs/module/zfs/refcount.c --- a/sys/contrib/openzfs/module/zfs/refcount.c +++ b/sys/contrib/openzfs/module/zfs/refcount.c @@ -36,33 +36,40 @@ static uint_t reference_history = 3; /* tunable */ static kmem_cache_t *reference_cache; -static kmem_cache_t *reference_history_cache; void zfs_refcount_init(void) { reference_cache = kmem_cache_create("reference_cache", sizeof (reference_t), 0, NULL, NULL, NULL, NULL, NULL, 0); - - reference_history_cache = kmem_cache_create("reference_history_cache", - sizeof (uint64_t), 0, NULL, NULL, NULL, NULL, NULL, 0); } void zfs_refcount_fini(void) { kmem_cache_destroy(reference_cache); - kmem_cache_destroy(reference_history_cache); +} + +static int +zfs_refcount_compare(const void *x1, const void *x2) +{ + const reference_t *r1 = (const reference_t *)x1; + const reference_t *r2 = (const reference_t *)x2; + + int cmp1 = TREE_CMP(r1->ref_holder, r2->ref_holder); + int cmp2 = TREE_CMP(r1->ref_number, r2->ref_number); + int cmp = cmp1 ? cmp1 : cmp2; + return ((cmp || r1->ref_search) ? cmp : TREE_PCMP(r1, r2)); } void zfs_refcount_create(zfs_refcount_t *rc) { mutex_init(&rc->rc_mtx, NULL, MUTEX_DEFAULT, NULL); - list_create(&rc->rc_list, sizeof (reference_t), - offsetof(reference_t, ref_link)); + avl_create(&rc->rc_tree, zfs_refcount_compare, sizeof (reference_t), + offsetof(reference_t, ref_link.a)); list_create(&rc->rc_removed, sizeof (reference_t), - offsetof(reference_t, ref_link)); + offsetof(reference_t, ref_link.l)); rc->rc_count = 0; rc->rc_removed_count = 0; rc->rc_tracked = reference_tracking_enable; @@ -86,16 +93,15 @@ zfs_refcount_destroy_many(zfs_refcount_t *rc, uint64_t number) { reference_t *ref; + void *cookie = NULL; ASSERT3U(rc->rc_count, ==, number); - while ((ref = list_remove_head(&rc->rc_list))) + while ((ref = avl_destroy_nodes(&rc->rc_tree, &cookie)) != NULL) kmem_cache_free(reference_cache, ref); - list_destroy(&rc->rc_list); + avl_destroy(&rc->rc_tree); - while ((ref = list_remove_head(&rc->rc_removed))) { - kmem_cache_free(reference_history_cache, ref->ref_removed); + while ((ref = list_remove_head(&rc->rc_removed))) kmem_cache_free(reference_cache, ref); - } list_destroy(&rc->rc_removed); mutex_destroy(&rc->rc_mtx); } @@ -121,10 +127,10 @@ int64_t zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, const void *holder) { - reference_t *ref = NULL; + reference_t *ref; int64_t count; - if (!rc->rc_tracked) { + if (likely(!rc->rc_tracked)) { count = atomic_add_64_nv(&(rc)->rc_count, number); ASSERT3U(count, >=, number); return (count); @@ -133,8 +139,9 @@ ref = kmem_cache_alloc(reference_cache, KM_SLEEP); ref->ref_holder = holder; ref->ref_number = number; + ref->ref_search = B_FALSE; mutex_enter(&rc->rc_mtx); - list_insert_head(&rc->rc_list, ref); + avl_add(&rc->rc_tree, ref); rc->rc_count += number; count = rc->rc_count; mutex_exit(&rc->rc_mtx); @@ -151,7 +158,7 @@ void zfs_refcount_add_few(zfs_refcount_t *rc, uint64_t number, const void *holder) { - if (!rc->rc_tracked) + if (likely(!rc->rc_tracked)) (void) zfs_refcount_add_many(rc, number, holder); else for (; number > 0; number--) (void) zfs_refcount_add(rc, holder); @@ -161,47 +168,42 @@ zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, const void *holder) { - reference_t *ref; + reference_t *ref, s; int64_t count; - if (!rc->rc_tracked) { + if (likely(!rc->rc_tracked)) { count = atomic_add_64_nv(&(rc)->rc_count, -number); ASSERT3S(count, >=, 0); return (count); } + s.ref_holder = holder; + s.ref_number = number; + s.ref_search = B_TRUE; mutex_enter(&rc->rc_mtx); ASSERT3U(rc->rc_count, >=, number); - for (ref = list_head(&rc->rc_list); ref; - ref = list_next(&rc->rc_list, ref)) { - if (ref->ref_holder == holder && ref->ref_number == number) { - list_remove(&rc->rc_list, ref); - if (reference_history > 0) { - ref->ref_removed = - kmem_cache_alloc(reference_history_cache, - KM_SLEEP); - list_insert_head(&rc->rc_removed, ref); - rc->rc_removed_count++; - if (rc->rc_removed_count > reference_history) { - ref = list_tail(&rc->rc_removed); - list_remove(&rc->rc_removed, ref); - kmem_cache_free(reference_history_cache, - ref->ref_removed); - kmem_cache_free(reference_cache, ref); - rc->rc_removed_count--; - } - } else { - kmem_cache_free(reference_cache, ref); - } - rc->rc_count -= number; - count = rc->rc_count; - mutex_exit(&rc->rc_mtx); - return (count); + ref = avl_find(&rc->rc_tree, &s, NULL); + if (unlikely(ref == NULL)) { + panic("No such hold %p on refcount %llx", holder, + (u_longlong_t)(uintptr_t)rc); + return (-1); + } + avl_remove(&rc->rc_tree, ref); + if (reference_history > 0) { + list_insert_head(&rc->rc_removed, ref); + if (rc->rc_removed_count >= reference_history) { + ref = list_remove_tail(&rc->rc_removed); + kmem_cache_free(reference_cache, ref); + } else { + rc->rc_removed_count++; } + } else { + kmem_cache_free(reference_cache, ref); } - panic("No such hold %p on refcount %llx", holder, - (u_longlong_t)(uintptr_t)rc); - return (-1); + rc->rc_count -= number; + count = rc->rc_count; + mutex_exit(&rc->rc_mtx); + return (count); } int64_t @@ -213,7 +215,7 @@ void zfs_refcount_remove_few(zfs_refcount_t *rc, uint64_t number, const void *holder) { - if (!rc->rc_tracked) + if (likely(!rc->rc_tracked)) (void) zfs_refcount_remove_many(rc, number, holder); else for (; number > 0; number--) (void) zfs_refcount_remove(rc, holder); @@ -222,31 +224,38 @@ void zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src) { - int64_t count, removed_count; - list_t list, removed; + avl_tree_t tree; + list_t removed; + reference_t *ref; + void *cookie = NULL; + uint64_t count; + uint_t removed_count; - list_create(&list, sizeof (reference_t), - offsetof(reference_t, ref_link)); + avl_create(&tree, zfs_refcount_compare, sizeof (reference_t), + offsetof(reference_t, ref_link.a)); list_create(&removed, sizeof (reference_t), - offsetof(reference_t, ref_link)); + offsetof(reference_t, ref_link.l)); mutex_enter(&src->rc_mtx); count = src->rc_count; removed_count = src->rc_removed_count; src->rc_count = 0; src->rc_removed_count = 0; - list_move_tail(&list, &src->rc_list); + avl_swap(&tree, &src->rc_tree); list_move_tail(&removed, &src->rc_removed); mutex_exit(&src->rc_mtx); mutex_enter(&dst->rc_mtx); dst->rc_count += count; dst->rc_removed_count += removed_count; - list_move_tail(&dst->rc_list, &list); + if (avl_is_empty(&dst->rc_tree)) + avl_swap(&dst->rc_tree, &tree); + else while ((ref = avl_destroy_nodes(&tree, &cookie)) != NULL) + avl_add(&dst->rc_tree, ref); list_move_tail(&dst->rc_removed, &removed); mutex_exit(&dst->rc_mtx); - list_destroy(&list); + avl_destroy(&tree); list_destroy(&removed); } @@ -254,23 +263,19 @@ zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number, const void *current_holder, const void *new_holder) { - reference_t *ref; - boolean_t found = B_FALSE; + reference_t *ref, s; - if (!rc->rc_tracked) + if (likely(!rc->rc_tracked)) return; + s.ref_holder = current_holder; + s.ref_number = number; + s.ref_search = B_TRUE; mutex_enter(&rc->rc_mtx); - for (ref = list_head(&rc->rc_list); ref; - ref = list_next(&rc->rc_list, ref)) { - if (ref->ref_holder == current_holder && - ref->ref_number == number) { - ref->ref_holder = new_holder; - found = B_TRUE; - break; - } - } - ASSERT(found); + ref = avl_find(&rc->rc_tree, &s, NULL); + ASSERT(ref); + ref->ref_holder = new_holder; + avl_update(&rc->rc_tree, ref); mutex_exit(&rc->rc_mtx); } @@ -290,21 +295,23 @@ boolean_t zfs_refcount_held(zfs_refcount_t *rc, const void *holder) { - reference_t *ref; + reference_t *ref, s; + avl_index_t idx; + boolean_t res; - if (!rc->rc_tracked) + if (likely(!rc->rc_tracked)) return (zfs_refcount_count(rc) > 0); + s.ref_holder = holder; + s.ref_number = 0; + s.ref_search = B_TRUE; mutex_enter(&rc->rc_mtx); - for (ref = list_head(&rc->rc_list); ref; - ref = list_next(&rc->rc_list, ref)) { - if (ref->ref_holder == holder) { - mutex_exit(&rc->rc_mtx); - return (B_TRUE); - } - } + ref = avl_find(&rc->rc_tree, &s, &idx); + if (likely(ref == NULL)) + ref = avl_nearest(&rc->rc_tree, idx, AVL_AFTER); + res = ref && ref->ref_holder == holder; mutex_exit(&rc->rc_mtx); - return (B_FALSE); + return (res); } /* @@ -315,21 +322,23 @@ boolean_t zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder) { - reference_t *ref; + reference_t *ref, s; + avl_index_t idx; + boolean_t res; - if (!rc->rc_tracked) + if (likely(!rc->rc_tracked)) return (B_TRUE); mutex_enter(&rc->rc_mtx); - for (ref = list_head(&rc->rc_list); ref; - ref = list_next(&rc->rc_list, ref)) { - if (ref->ref_holder == holder) { - mutex_exit(&rc->rc_mtx); - return (B_FALSE); - } - } + s.ref_holder = holder; + s.ref_number = 0; + s.ref_search = B_TRUE; + ref = avl_find(&rc->rc_tree, &s, &idx); + if (likely(ref == NULL)) + ref = avl_nearest(&rc->rc_tree, idx, AVL_AFTER); + res = ref == NULL || ref->ref_holder != holder; mutex_exit(&rc->rc_mtx); - return (B_TRUE); + return (res); } EXPORT_SYMBOL(zfs_refcount_create); diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c --- a/sys/contrib/openzfs/module/zfs/vdev_label.c +++ b/sys/contrib/openzfs/module/zfs/vdev_label.c @@ -486,6 +486,9 @@ if (vd->vdev_isspare) fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1); + if (flags & VDEV_CONFIG_L2CACHE) + fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift); + if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) && vd == vd->vdev_top) { fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -650,9 +650,6 @@ list_insert_head(&pio->io_child_list, zl); list_insert_head(&cio->io_parent_list, zl); - pio->io_child_count++; - cio->io_parent_count++; - mutex_exit(&cio->io_lock); mutex_exit(&pio->io_lock); } @@ -669,9 +666,6 @@ list_remove(&pio->io_child_list, zl); list_remove(&cio->io_parent_list, zl); - pio->io_child_count--; - cio->io_parent_count--; - mutex_exit(&cio->io_lock); mutex_exit(&pio->io_lock); kmem_cache_free(zio_link_cache, zl); @@ -1162,9 +1156,8 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, abd_t *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp, zio_done_func_t *ready, zio_done_func_t *children_ready, - zio_done_func_t *physdone, zio_done_func_t *done, - void *private, zio_priority_t priority, zio_flag_t flags, - const zbookmark_phys_t *zb) + zio_done_func_t *done, void *private, zio_priority_t priority, + zio_flag_t flags, const zbookmark_phys_t *zb) { zio_t *zio; @@ -1184,7 +1177,6 @@ zio->io_ready = ready; zio->io_children_ready = children_ready; - zio->io_physdone = physdone; zio->io_prop = *zp; /* @@ -1517,16 +1509,11 @@ flags &= ~ZIO_FLAG_IO_ALLOCATING; } - zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, size, done, private, type, priority, flags, vd, offset, &pio->io_bookmark, ZIO_STAGE_VDEV_IO_START >> 1, pipeline); ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV); - zio->io_physdone = pio->io_physdone; - if (vd->vdev_ops->vdev_op_leaf && zio->io_logical != NULL) - zio->io_logical->io_phys_children++; - return (zio); } @@ -2711,7 +2698,7 @@ blkptr_t *bp = zio->io_bp; ASSERT(gio == zio_unique_parent(zio)); - ASSERT(zio->io_child_count == 0); + ASSERT(list_is_empty(&zio->io_child_list)); if (zio->io_error) return; @@ -2969,7 +2956,7 @@ zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g], has_data ? abd_get_offset(pio->io_abd, pio->io_size - resid) : NULL, lsize, lsize, &zp, - zio_write_gang_member_ready, NULL, NULL, + zio_write_gang_member_ready, NULL, zio_write_gang_done, &gn->gn_child[g], pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); @@ -3431,7 +3418,7 @@ } else { cio = zio_write(zio, spa, txg, bp, zio->io_orig_abd, zio->io_orig_size, zio->io_orig_size, zp, - zio_ddt_child_write_ready, NULL, NULL, + zio_ddt_child_write_ready, NULL, zio_ddt_child_write_done, dde, zio->io_priority, ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark); @@ -4134,13 +4121,6 @@ if (zio->io_error) zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; - if (vd != NULL && vd->vdev_ops->vdev_op_leaf && - zio->io_physdone != NULL) { - ASSERT(!(zio->io_flags & ZIO_FLAG_DELEGATED)); - ASSERT(zio->io_child_type == ZIO_CHILD_VDEV); - zio->io_physdone(zio->io_logical); - } - return (zio); } @@ -4890,7 +4870,7 @@ return (NULL); } - ASSERT(zio->io_child_count == 0); + ASSERT(list_is_empty(&zio->io_child_list)); ASSERT(zio->io_reexecute == 0); ASSERT(zio->io_error == 0 || (zio->io_flags & ZIO_FLAG_CANFAIL)); diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in --- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in @@ -173,6 +173,7 @@ 'link_count/link_count_001': ['SKIP', na_reason], 'casenorm/mixed_create_failure': ['FAIL', 13215], 'mmap/mmap_sync_001_pos': ['SKIP', na_reason], + 'rsend/send_raw_ashift': ['SKIP', 14961], }) elif sys.platform.startswith('linux'): known.update({ diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib --- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib +++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib @@ -3706,7 +3706,7 @@ while $do_once || [ $stat1 -ne $stat2 ] || [ $stat2 -eq 0 ]; do typeset stat1=$(get_arcstat $stat) - sleep 2 + sleep 0.5 typeset stat2=$(get_arcstat $stat) do_once=false done diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh @@ -27,15 +27,14 @@ # # STRATEGY: # 1. Create pool with a cache device. -# 2. Export and re-import pool without writing any data. -# 3. Create a random file in that pool and random read for 10 sec. -# 4. Export pool. -# 5. Read the amount of log blocks written from the header of the +# 2. Create a random file in that pool and random read for 10 sec. +# 3. Export pool. +# 4. Read the amount of log blocks written from the header of the # L2ARC device. -# 6. Import pool. -# 7. Read the amount of log blocks rebuilt in arcstats and compare to +# 5. Import pool. +# 6. Read the amount of log blocks rebuilt in arcstats and compare to # (5). -# 8. Check if the labels of the L2ARC device are intact. +# 7. Check if the labels of the L2ARC device are intact. # # * We can predict the minimum bytes of L2ARC restored if we subtract # from the effective size of the cache device the bytes l2arc_evict() @@ -77,10 +76,8 @@ log_must truncate -s ${cache_sz}M $VDEV_CACHE -log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE - -log_must zpool export $TESTPOOL -log_must zpool import -d $VDIR $TESTPOOL +log_must zpool create -f -o ashift=12 $TESTPOOL $VDEV +log_must zpool add $TESTPOOL cache $VDEV_CACHE log_must fio $FIO_SCRIPTS/mkfiles.fio log_must fio $FIO_SCRIPTS/random_reads.fio diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh @@ -37,6 +37,10 @@ log_assert "Verify raw sending to pools with greater ashift succeeds" +if is_freebsd; then + log_unsupported "Runs too long on FreeBSD 14 (Issue #14961)" +fi + function cleanup { rm -f $BACKDIR/fs@* diff --git a/sys/crypto/openssl/ossl.c b/sys/crypto/openssl/ossl.c --- a/sys/crypto/openssl/ossl.c +++ b/sys/crypto/openssl/ossl.c @@ -301,7 +301,8 @@ error = ossl_newsession_cipher(s, csp); break; case CSP_MODE_AEAD: - error = ossl_newsession_cipher(s, csp); + if (csp->csp_cipher_alg != CRYPTO_CHACHA20_POLY1305) + error = ossl_newsession_cipher(s, csp); break; default: __assert_unreachable(); diff --git a/sys/dev/amdtemp/amdtemp.c b/sys/dev/amdtemp/amdtemp.c --- a/sys/dev/amdtemp/amdtemp.c +++ b/sys/dev/amdtemp/amdtemp.c @@ -165,6 +165,12 @@ */ #define AMDTEMP_17H_CUR_TMP 0x59800 #define AMDTEMP_17H_CUR_TMP_RANGE_SEL (1u << 19) +/* + * Bits 16-17, when set, mean that CUR_TMP is read-write. When it is, the + * 49 degree offset should apply as well. This was revealed in a Linux + * patch from an AMD employee. + */ +#define AMDTEMP_17H_CUR_TMP_TJ_SEL ((1u << 17) | (1u << 16)) /* * The following register set was discovered experimentally by Ondrej Čerman * and collaborators, but is not (yet) documented in a PPR/OSRR (other than @@ -731,7 +737,8 @@ { bool minus49; - minus49 = ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0); + minus49 = ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0) + || ((val & AMDTEMP_17H_CUR_TMP_TJ_SEL) == AMDTEMP_17H_CUR_TMP_TJ_SEL); return (amdtemp_decode_fam10h_to_17h(sc_offset, val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49)); } diff --git a/sys/dev/hifn/hifn7751.c b/sys/dev/hifn/hifn7751.c --- a/sys/dev/hifn/hifn7751.c +++ b/sys/dev/hifn/hifn7751.c @@ -880,7 +880,7 @@ } /* - * Resets the board. Values in the regesters are left as is + * Resets the board. Values in the registers are left as is * from the reset (i.e. initial values are assigned elsewhere). */ static void diff --git a/sys/dev/mlx4/mlx4_core/mlx4_pd.c b/sys/dev/mlx4/mlx4_core/mlx4_pd.c --- a/sys/dev/mlx4/mlx4_core/mlx4_pd.c +++ b/sys/dev/mlx4/mlx4_core/mlx4_pd.c @@ -273,8 +273,8 @@ { int num_reserved_uar = mlx4_get_num_reserved_uar(dev); - mlx4_dbg(dev, "uar_page_shift = %d", dev->uar_page_shift); - mlx4_dbg(dev, "Effective reserved_uars=%d", dev->caps.reserved_uars); + mlx4_dbg(dev, "uar_page_shift = %d\n", dev->uar_page_shift); + mlx4_dbg(dev, "Effective reserved_uars=%d\n", dev->caps.reserved_uars); if (dev->caps.num_uars <= num_reserved_uar) { mlx4_err(dev, "Only %d UAR pages (need more than %d)\n", diff --git a/sys/dev/mpi3mr/mpi3mr_app.h b/sys/dev/mpi3mr/mpi3mr_app.h --- a/sys/dev/mpi3mr/mpi3mr_app.h +++ b/sys/dev/mpi3mr/mpi3mr_app.h @@ -75,7 +75,6 @@ int mpi3mr_app_attach(struct mpi3mr_softc *); void mpi3mr_app_detach(struct mpi3mr_softc *); -static struct mpi3mr_mgmt_info mpi3mr_mgmt_info; enum mpi3mr_ioctl_adp_state { MPI3MR_IOCTL_ADP_STATE_UNKNOWN = 0, diff --git a/sys/dev/mpi3mr/mpi3mr_app.c b/sys/dev/mpi3mr/mpi3mr_app.c --- a/sys/dev/mpi3mr/mpi3mr_app.c +++ b/sys/dev/mpi3mr/mpi3mr_app.c @@ -67,6 +67,8 @@ .d_name = "mpi3mr", }; +static struct mpi3mr_mgmt_info mpi3mr_mgmt_info; + static int mpi3mr_open(struct cdev *dev, int flags, int fmt, struct thread *td) { diff --git a/sys/dev/pci/pci_pci.c b/sys/dev/pci/pci_pci.c --- a/sys/dev/pci/pci_pci.c +++ b/sys/dev/pci/pci_pci.c @@ -1321,7 +1321,7 @@ pcib_alloc_pcie_irq(struct pcib_softc *sc) { device_t dev; - int count, error, rid; + int count, error, mem_rid, rid; rid = -1; dev = sc->dev; @@ -1333,9 +1333,17 @@ */ count = pci_msix_count(dev); if (count == 1) { - error = pci_alloc_msix(dev, &count); - if (error == 0) - rid = 1; + mem_rid = pci_msix_table_bar(dev); + sc->pcie_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &mem_rid, RF_ACTIVE); + if (sc->pcie_mem == NULL) { + device_printf(dev, + "Failed to allocate BAR for MSI-X table\n"); + } else { + error = pci_alloc_msix(dev, &count); + if (error == 0) + rid = 1; + } } if (rid < 0 && pci_msi_count(dev) > 0) { @@ -1383,7 +1391,12 @@ error = bus_free_resource(dev, SYS_RES_IRQ, sc->pcie_irq); if (error) return (error); - return (pci_release_msi(dev)); + error = pci_release_msi(dev); + if (error) + return (error); + if (sc->pcie_mem != NULL) + error = bus_free_resource(dev, SYS_RES_MEMORY, sc->pcie_mem); + return (error); } static void diff --git a/sys/dev/pci/pcib_private.h b/sys/dev/pci/pcib_private.h --- a/sys/dev/pci/pcib_private.h +++ b/sys/dev/pci/pcib_private.h @@ -134,6 +134,7 @@ uint16_t pcie_link_sta; uint16_t pcie_slot_sta; uint32_t pcie_slot_cap; + struct resource *pcie_mem; struct resource *pcie_irq; void *pcie_ihand; struct task pcie_hp_task; diff --git a/sys/fs/tarfs/tarfs_io.c b/sys/fs/tarfs/tarfs_io.c --- a/sys/fs/tarfs/tarfs_io.c +++ b/sys/fs/tarfs/tarfs_io.c @@ -125,8 +125,7 @@ rl = vn_rangelock_rlock(tmp->vp, off, off + len); error = vn_lock(tmp->vp, LK_SHARED); if (error == 0) { - error = VOP_READ(tmp->vp, uiop, - IO_DIRECT|IO_NODELOCKED, + error = VOP_READ(tmp->vp, uiop, IO_NODELOCKED, uiop->uio_td->td_ucred); VOP_UNLOCK(tmp->vp); } @@ -426,8 +425,7 @@ auio.uio_rw = UIO_READ; auio.uio_resid = aiov.iov_len; auio.uio_td = td; - error = VOP_READ(tmp->vp, &auio, - IO_DIRECT | IO_NODELOCKED, + error = VOP_READ(tmp->vp, &auio, IO_NODELOCKED, td->td_ucred); if (error != 0) goto fail; diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c --- a/sys/kern/subr_pctrie.c +++ b/sys/kern/subr_pctrie.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include /* smr.h depends on struct thread. */ #include @@ -259,21 +260,22 @@ } /* - * Returns the slot where two keys differ. + * Returns the level where two keys differ. * It cannot accept 2 equal keys. */ static __inline uint16_t pctrie_keydiff(uint64_t index1, uint64_t index2) { - uint16_t clev; KASSERT(index1 != index2, ("%s: passing the same key value %jx", __func__, (uintmax_t)index1)); + CTASSERT(sizeof(long long) >= sizeof(uint64_t)); - index1 ^= index2; - for (clev = PCTRIE_LIMIT;; clev--) - if (pctrie_slot(index1, clev) != 0) - return (clev); + /* + * From the highest-order bit where the indexes differ, + * compute the highest level in the trie where they differ. + */ + return ((flsll(index1 ^ index2) - 1) / PCTRIE_WIDTH); } /* diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -1051,7 +1051,7 @@ /* #undef ZFS_IS_GPL_COMPATIBLE */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gfeff9dfed" +#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g10e36e176" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -1081,7 +1081,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "FreeBSD_gfeff9dfed" +#define ZFS_META_RELEASE "FreeBSD_g10e36e176" /* Define the project version. */ #define ZFS_META_VERSION "2.1.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.1.99-1993-gfeff9dfed" +#define ZFS_META_GITREV "zfs-2.1.99-1998-g10e36e176" diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -697,7 +697,9 @@ #define PFE_SKIP_PROTO 2 #define PFE_SKIP_SRC_ADDR 3 #define PFE_SKIP_DST_ADDR 4 -#define PFE_SKIP_COUNT 5 +#define PFE_SKIP_SRC_IP_ADDR 5 +#define PFE_SKIP_DST_IP_ADDR 6 +#define PFE_SKIP_COUNT 7 union pf_keth_rule_ptr skip[PFE_SKIP_COUNT]; TAILQ_ENTRY(pf_keth_rule) entries; @@ -2215,6 +2217,8 @@ extern u_int pf_free_src_nodes(struct pf_ksrc_node_list *); extern void pf_print_state(struct pf_kstate *); extern void pf_print_flags(u_int8_t); +extern int pf_addr_wrap_neq(struct pf_addr_wrap *, + struct pf_addr_wrap *); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); extern u_int16_t pf_proto_cksum_fixup(struct mbuf *, u_int16_t, @@ -2357,13 +2361,13 @@ struct mbuf *pf_build_tcp(const struct pf_krule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, - u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, - u_int16_t, int); + u_int8_t, u_int16_t, u_int16_t, u_int8_t, bool, + u_int16_t, u_int16_t, int); void pf_send_tcp(const struct pf_krule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, - u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, - u_int16_t, int); + u_int8_t, u_int16_t, u_int16_t, u_int8_t, bool, + u_int16_t, u_int16_t, int); void pf_syncookies_init(void); void pf_syncookies_cleanup(void); diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -91,12 +91,15 @@ #include #include #include +#include #include #include #include #include +#include #include +#include #include #include #include @@ -105,6 +108,7 @@ #include #include +#include #include @@ -112,7 +116,8 @@ struct pfsync_softc; union inet_template { - struct ip ipv4; + struct ip ipv4; + struct ip6_hdr ipv6; }; #define PFSYNC_MINPKT ( \ @@ -247,6 +252,7 @@ struct ifnet *sc_ifp; struct ifnet *sc_sync_if; struct ip_moptions sc_imo; + struct ip6_moptions sc_im6o; struct sockaddr_storage sc_sync_peer; uint32_t sc_flags; uint8_t sc_maxupdates; @@ -281,6 +287,8 @@ #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) +#define PFSYNC_DEFER_TIMEOUT 20 + static const char pfsyncname[] = "pfsync"; static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; @@ -293,13 +301,15 @@ #define V_pfsyncstats VNET(pfsyncstats) VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; #define V_pfsync_carp_adj VNET(pfsync_carp_adj) +VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT; +#define V_pfsync_defer_timeout VNET(pfsync_defer_timeout) static void pfsync_timeout(void *); static void pfsync_push(struct pfsync_bucket *); static void pfsync_push_all(struct pfsync_softc *); static void pfsyncintr(void *); static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, - struct in_mfilter *imf); + struct in_mfilter *, struct in6_mfilter *); static void pfsync_multicast_cleanup(struct pfsync_softc *); static void pfsync_pointers_init(void); static void pfsync_pointers_uninit(void); @@ -317,6 +327,8 @@ &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, &pfsync_buckets, 0, "Number of pfsync hash buckets"); +SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)"); static int pfsync_clone_create(struct if_clone *, int, caddr_t); static void pfsync_clone_destroy(struct ifnet *); @@ -358,11 +370,13 @@ struct pf_kstate *); #define PFSYNC_MAX_BULKTRIES 12 -#define PFSYNC_DEFER_TIMEOUT ((20 * hz) / 1000) VNET_DEFINE(struct if_clone *, pfsync_cloner); #define V_pfsync_cloner VNET(pfsync_cloner) +const struct in6_addr in6addr_linklocal_pfsync_group = + {{{ 0xff, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0 }}}; static int pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) { @@ -837,6 +851,109 @@ } #endif +#ifdef INET6 +static int +pfsync6_input(struct mbuf **mp, int *offp __unused, int proto __unused) +{ + struct pfsync_softc *sc = V_pfsyncif; + struct mbuf *m = *mp; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct pfsync_header *ph; + struct pfsync_subheader subh; + + int offset, len, flags = 0; + int rv; + uint16_t count; + + PF_RULES_RLOCK_TRACKER; + + *mp = NULL; + V_pfsyncstats.pfsyncs_ipackets++; + + /* Verify that we have a sync interface configured. */ + if (!sc || !sc->sc_sync_if || !V_pf_status.running || + (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + goto done; + + /* verify that the packet came in on the right interface */ + if (sc->sc_sync_if != m->m_pkthdr.rcvif) { + V_pfsyncstats.pfsyncs_badif++; + goto done; + } + + if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); + if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); + /* verify that the IP TTL is 255. */ + if (ip6->ip6_hlim != PFSYNC_DFLTTL) { + V_pfsyncstats.pfsyncs_badttl++; + goto done; + } + + + offset = sizeof(*ip6); + if (m->m_pkthdr.len < offset + sizeof(*ph)) { + V_pfsyncstats.pfsyncs_hdrops++; + goto done; + } + + if (offset + sizeof(*ph) > m->m_len) { + if (m_pullup(m, offset + sizeof(*ph)) == NULL) { + V_pfsyncstats.pfsyncs_hdrops++; + return (IPPROTO_DONE); + } + ip6 = mtod(m, struct ip6_hdr *); + } + ph = (struct pfsync_header *)((char *)ip6 + offset); + + /* verify the version */ + if (ph->version != PFSYNC_VERSION) { + V_pfsyncstats.pfsyncs_badver++; + goto done; + } + + len = ntohs(ph->len) + offset; + if (m->m_pkthdr.len < len) { + V_pfsyncstats.pfsyncs_badlen++; + goto done; + } + + /* + * Trusting pf_chksum during packet processing, as well as seeking + * in interface name tree, require holding PF_RULES_RLOCK(). + */ + PF_RULES_RLOCK(); + if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) + flags = PFSYNC_SI_CKSUM; + + offset += sizeof(*ph); + while (offset <= len - sizeof(subh)) { + m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); + offset += sizeof(subh); + + if (subh.action >= PFSYNC_ACT_MAX) { + V_pfsyncstats.pfsyncs_badact++; + PF_RULES_RUNLOCK(); + goto done; + } + + count = ntohs(subh.count); + V_pfsyncstats.pfsyncs_iacts[subh.action] += count; + rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); + if (rv == -1) { + PF_RULES_RUNLOCK(); + return (IPPROTO_DONE); + } + + offset += rv; + } + PF_RULES_RUNLOCK(); + +done: + m_freem(m); + return (IPPROTO_DONE); +} +#endif + static int pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action) { @@ -1696,6 +1813,19 @@ ip_fillid(ip); break; } +#endif +#ifdef INET6 + case AF_INET6: + { + struct ip6_hdr *ip6; + + ip6 = mtod(m, struct ip6_hdr *); + bcopy(&sc->sc_template.ipv6, ip6, sizeof(*ip6)); + aflen = offset = sizeof(*ip6); + + ip6->ip6_plen = htons(m->m_pkthdr.len); + break; + } #endif default: m_freem(m); @@ -1882,7 +2012,8 @@ TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); - callout_reset(&pd->pd_tmo, PFSYNC_DEFER_TIMEOUT, pfsync_defer_tmo, pd); + callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000, + pfsync_defer_tmo, pd); pfsync_push(b); PFSYNC_BUCKET_UNLOCK(b); @@ -2506,10 +2637,8 @@ error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); } else { - MPASS(false); - /* We don't support pfsync over IPv6. */ - /*error = ip6_output(m, NULL, NULL, - IP_RAWOUTPUT, &sc->sc_imo6, NULL);*/ + error = ip6_output(m, NULL, NULL, 0, + &sc->sc_im6o, NULL, NULL); } break; #endif @@ -2558,10 +2687,12 @@ static int pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, - struct in_mfilter *imf) + struct in_mfilter* imf, struct in6_mfilter* im6f) { struct ip_moptions *imo = &sc->sc_imo; + struct ip6_moptions *im6o = &sc->sc_im6o; int error; + struct sockaddr_in6 *syncpeer_sa6 = NULL; if (!(ifp->if_flags & IFF_MULTICAST)) return (EADDRNOTAVAIL); @@ -2572,9 +2703,12 @@ { ip_mfilter_init(&imo->imo_head); imo->imo_multicast_vif = -1; - if ((error = in_joingroup(ifp, &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, - &imf->imf_inm)) != 0) + if ((error = in_joingroup(ifp, + &(((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr), + NULL, &imf->imf_inm)) != 0) + { return (error); + } ip_mfilter_insert(&imo->imo_head, imf); imo->imo_multicast_ifp = ifp; @@ -2583,7 +2717,29 @@ break; } #endif +#ifdef INET6 + case AF_INET6: + { + syncpeer_sa6 = (struct sockaddr_in6 *)&sc->sc_sync_peer; + if ((error = in6_setscope(&syncpeer_sa6->sin6_addr, ifp, NULL))) + { + return (error); + } + ip6_mfilter_init(&im6o->im6o_head); + if ((error = in6_joingroup(ifp, &syncpeer_sa6->sin6_addr, NULL, + &(im6f->im6f_in6m), 0)) != 0) + { + return (error); + } + + ip6_mfilter_insert(&im6o->im6o_head, im6f); + im6o->im6o_multicast_ifp = ifp; + im6o->im6o_multicast_hlim = PFSYNC_DFLTTL; + im6o->im6o_multicast_loop = 0; + break; + } } +#endif return (0); } @@ -2592,7 +2748,9 @@ pfsync_multicast_cleanup(struct pfsync_softc *sc) { struct ip_moptions *imo = &sc->sc_imo; + struct ip6_moptions *im6o = &sc->sc_im6o; struct in_mfilter *imf; + struct in6_mfilter *im6f; while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { ip_mfilter_remove(&imo->imo_head, imf); @@ -2600,6 +2758,13 @@ ip_mfilter_free(imf); } imo->imo_multicast_ifp = NULL; + + while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { + ip6_mfilter_remove(&im6o->im6o_head, im6f); + in6_leavegroup(im6f->im6f_in6m, NULL); + ip6_mfilter_free(im6f); + } + im6o->im6o_multicast_ifp = NULL; } void @@ -2619,6 +2784,7 @@ */ ip_mfilter_init(&sc->sc_imo.imo_head); sc->sc_imo.imo_multicast_ifp = NULL; + sc->sc_im6o.im6o_multicast_ifp = NULL; sc->sc_sync_if = NULL; } @@ -2649,9 +2815,11 @@ static int pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) { - struct in_mfilter *imf = NULL; struct ifnet *sifp; - struct ip *ip; + struct in_mfilter *imf = NULL; + struct in6_mfilter *im6f = NULL; + struct sockaddr_in *status_sin; + struct sockaddr_in6 *status_sin6; int error; int c; @@ -2663,12 +2831,45 @@ else if ((sifp = ifunit_ref(status->syncdev)) == NULL) return (EINVAL); - struct sockaddr_in *status_sin = - (struct sockaddr_in *)&(status->syncpeer); - if (sifp != NULL && (status_sin->sin_addr.s_addr == 0 || - status_sin->sin_addr.s_addr == - htonl(INADDR_PFSYNC_GROUP))) - imf = ip_mfilter_alloc(M_WAITOK, 0, 0); + switch (status->syncpeer.ss_family) { + case AF_UNSPEC: + case AF_INET: { + status_sin = (struct sockaddr_in *)&(status->syncpeer); + if (sifp != NULL) { + if (status_sin->sin_addr.s_addr == 0 || + status_sin->sin_addr.s_addr == + htonl(INADDR_PFSYNC_GROUP)) { + status_sin->sin_family = AF_INET; + status_sin->sin_len = sizeof(*status_sin); + status_sin->sin_addr.s_addr = + htonl(INADDR_PFSYNC_GROUP); + } + + if (IN_MULTICAST(ntohl(status_sin->sin_addr.s_addr))) { + imf = ip_mfilter_alloc(M_WAITOK, 0, 0); + } + } + break; + } + case AF_INET6: { + status_sin6 = (struct sockaddr_in6*)&(status->syncpeer); + if (sifp != NULL) { + if (IN6_IS_ADDR_UNSPECIFIED(&status_sin6->sin6_addr) || + IN6_ARE_ADDR_EQUAL(&status_sin6->sin6_addr, + &in6addr_linklocal_pfsync_group)) { + status_sin6->sin6_family = AF_INET6; + status_sin6->sin6_len = sizeof(*status_sin6); + status_sin6->sin6_addr = + in6addr_linklocal_pfsync_group; + } + + if (IN6_IS_ADDR_MULTICAST(&status_sin6->sin6_addr)) { + im6f = ip6_mfilter_alloc(M_WAITOK, 0, 0); + } + } + break; + } + } PFSYNC_LOCK(sc); @@ -2685,13 +2886,31 @@ return (EINVAL); } - struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; - sc_sin->sin_family = AF_INET; - sc_sin->sin_len = sizeof(*sc_sin); - if (status_sin->sin_addr.s_addr == 0) { - sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); - } else { - sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; + switch (status->syncpeer.ss_family) { + case AF_INET: { + struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer); + struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; + sc_sin->sin_family = AF_INET; + sc_sin->sin_len = sizeof(*sc_sin); + if (status_sin->sin_addr.s_addr == 0) { + sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); + } else { + sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; + } + break; + } + case AF_INET6: { + struct sockaddr_in6 *status_sin = (struct sockaddr_in6 *)&(status->syncpeer); + struct sockaddr_in6 *sc_sin = (struct sockaddr_in6 *)&sc->sc_sync_peer; + sc_sin->sin6_family = AF_INET6; + sc_sin->sin6_len = sizeof(*sc_sin); + if(IN6_IS_ADDR_UNSPECIFIED(&status_sin->sin6_addr)) { + sc_sin->sin6_addr = in6addr_linklocal_pfsync_group; + } else { + sc_sin->sin6_addr = status_sin->sin6_addr; + } + break; + } } sc->sc_maxupdates = status->maxupdates; @@ -2725,12 +2944,20 @@ pfsync_multicast_cleanup(sc); - if (sc_sin->sin_addr.s_addr == htonl(INADDR_PFSYNC_GROUP)) { - error = pfsync_multicast_setup(sc, sifp, imf); + if (((sc->sc_sync_peer.ss_family == AF_INET) && + IN_MULTICAST(ntohl(((struct sockaddr_in *) + &sc->sc_sync_peer)->sin_addr.s_addr))) || + ((sc->sc_sync_peer.ss_family == AF_INET6) && + IN6_IS_ADDR_MULTICAST(&((struct sockaddr_in6*) + &sc->sc_sync_peer)->sin6_addr))) { + error = pfsync_multicast_setup(sc, sifp, imf, im6f); if (error) { if_rele(sifp); - ip_mfilter_free(imf); PFSYNC_UNLOCK(sc); + if (imf != NULL) + ip_mfilter_free(imf); + if (im6f != NULL) + ip6_mfilter_free(im6f); return (error); } } @@ -2738,17 +2965,39 @@ if_rele(sc->sc_sync_if); sc->sc_sync_if = sifp; - ip = &sc->sc_template.ipv4; - bzero(ip, sizeof(*ip)); - ip->ip_v = IPVERSION; - ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; - ip->ip_tos = IPTOS_LOWDELAY; - /* len and id are set later. */ - ip->ip_off = htons(IP_DF); - ip->ip_ttl = PFSYNC_DFLTTL; - ip->ip_p = IPPROTO_PFSYNC; - ip->ip_src.s_addr = INADDR_ANY; - ip->ip_dst.s_addr = sc_sin->sin_addr.s_addr; + switch (sc->sc_sync_peer.ss_family) { + case AF_INET: { + struct ip *ip; + ip = &sc->sc_template.ipv4; + bzero(ip, sizeof(*ip)); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; + ip->ip_tos = IPTOS_LOWDELAY; + /* len and id are set later. */ + ip->ip_off = htons(IP_DF); + ip->ip_ttl = PFSYNC_DFLTTL; + ip->ip_p = IPPROTO_PFSYNC; + ip->ip_src.s_addr = INADDR_ANY; + ip->ip_dst = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; + break; + } + case AF_INET6: { + struct ip6_hdr *ip6; + ip6 = &sc->sc_template.ipv6; + bzero(ip6, sizeof(*ip6)); + ip6->ip6_vfc = IPV6_VERSION; + ip6->ip6_hlim = PFSYNC_DFLTTL; + ip6->ip6_nxt = IPPROTO_PFSYNC; + ip6->ip6_dst = ((struct sockaddr_in6 *)&sc->sc_sync_peer)->sin6_addr; + + struct epoch_tracker et; + NET_EPOCH_ENTER(et); + in6_selectsrc_addr(if_getfib(sc->sc_sync_if), &ip6->ip6_dst, 0, + sc->sc_sync_if, &ip6->ip6_src, NULL); + NET_EPOCH_EXIT(et); + break; + } + } /* Request a full state table update. */ if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) @@ -2835,15 +3084,22 @@ static int pfsync_init(void) { -#ifdef INET int error; pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; +#ifdef INET error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); if (error) return (error); #endif +#ifdef INET6 + error = ip6proto_register(IPPROTO_PFSYNC, pfsync6_input, NULL); + if (error) { + ipproto_unregister(IPPROTO_PFSYNC); + return (error); + } +#endif return (0); } @@ -2856,6 +3112,9 @@ #ifdef INET ipproto_unregister(IPPROTO_PFSYNC); #endif +#ifdef INET6 + ip6proto_unregister(IPPROTO_PFSYNC); +#endif } static int diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -321,8 +321,6 @@ u_int8_t, sa_family_t); static void pf_print_state_parts(struct pf_kstate *, struct pf_state_key *, struct pf_state_key *); -static int pf_addr_wrap_neq(struct pf_addr_wrap *, - struct pf_addr_wrap *); static void pf_patch_8(struct mbuf *, u_int16_t *, u_int8_t *, u_int8_t, bool, u_int8_t); static struct pf_kstate *pf_find_state(struct pfi_kkif *, @@ -369,7 +367,7 @@ } while (0) #define PACKET_LOOPED(pd) ((pd)->pf_mtag && \ - (pd)->pf_mtag->flags & PF_PACKET_LOOPED) + (pd)->pf_mtag->flags & PF_MTAG_FLAG_PACKET_LOOPED) #define STATE_LOOKUP(i, k, d, s, pd) \ do { \ @@ -2051,7 +2049,7 @@ s->key[PF_SK_WIRE]->port[1], s->key[PF_SK_WIRE]->port[0], s->src.seqhi, s->src.seqlo + 1, - TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, s->rtableid); + TH_RST|TH_ACK, 0, 0, 0, true, s->tag, 0, s->rtableid); } LIST_REMOVE(s, entry); @@ -2429,7 +2427,7 @@ PF_SET_SKIP_STEPS(i); } -static int +int pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) { if (aw1->type != aw2->type) @@ -2800,8 +2798,8 @@ pf_build_tcp(const struct pf_krule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, - u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, - u_int16_t rtag, int rtableid) + u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, + bool skip_firewall, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid) { struct mbuf *m; int len, tlen; @@ -2849,9 +2847,10 @@ m_freem(m); return (NULL); } - if (tag) + if (skip_firewall) m->m_flags |= M_SKIP_FIREWALL; - pf_mtag->tag = rtag; + pf_mtag->tag = mtag_tag; + pf_mtag->flags = mtag_flags; if (rtableid >= 0) M_SETFIB(m, rtableid); @@ -2905,7 +2904,7 @@ th->th_seq = htonl(seq); th->th_ack = htonl(ack); th->th_off = tlen >> 2; - th->th_flags = flags; + th->th_flags = tcp_flags; th->th_win = htons(win); if (mss) { @@ -2951,14 +2950,14 @@ pf_send_tcp(const struct pf_krule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, - u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, - u_int16_t rtag, int rtableid) + u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, + bool skip_firewall, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid) { struct pf_send_entry *pfse; struct mbuf *m; - m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, flags, - win, mss, ttl, tag, rtag, rtableid); + m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, tcp_flags, + win, mss, ttl, skip_firewall, mtag_tag, mtag_flags, rtableid); if (m == NULL) return; @@ -3048,7 +3047,7 @@ pf_send_tcp(r, af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl, 1, 0, rtableid); + r->return_ttl, true, 0, 0, rtableid); } } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && r->return_icmp) @@ -3934,14 +3933,14 @@ SDT_PROBE3(pf, eth, test_rule, entry, dir, kif->pfik_ifp, m); mtag = pf_find_mtag(m); - if (mtag != NULL && mtag->flags & PF_TAG_DUMMYNET) { + if (mtag != NULL && mtag->flags & PF_MTAG_FLAG_DUMMYNET) { /* Dummynet re-injects packets after they've * completed their delay. We've already * processed them, so pass unconditionally. */ /* But only once. We may see the packet multiple times (e.g. * PFIL_IN/PFIL_OUT). */ - mtag->flags &= ~PF_TAG_DUMMYNET; + mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; return (PF_PASS); } @@ -4014,19 +4013,19 @@ else if (! pf_match_eth_addr(e->ether_dhost, &r->dst)) { SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, "dst"); - r = TAILQ_NEXT(r, entries); + r = r->skip[PFE_SKIP_DST_ADDR].ptr; } else if (src != NULL && PF_MISMATCHAW(&r->ipsrc.addr, src, af, r->ipsrc.neg, kif, M_GETFIB(m))) { SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, "ip_src"); - r = TAILQ_NEXT(r, entries); + r = r->skip[PFE_SKIP_SRC_IP_ADDR].ptr; } else if (dst != NULL && PF_MISMATCHAW(&r->ipdst.addr, dst, af, r->ipdst.neg, kif, M_GETFIB(m))) { SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, "ip_dst"); - r = TAILQ_NEXT(r, entries); + r = r->skip[PFE_SKIP_DST_IP_ADDR].ptr; } else if (r->match_tag && !pf_match_eth_tag(m, r, &tag, mtag ? mtag->tag : 0)) { @@ -4159,10 +4158,10 @@ PF_RULES_RUNLOCK(); - mtag->flags |= PF_TAG_DUMMYNET; + mtag->flags |= PF_MTAG_FLAG_DUMMYNET; ip_dn_io_ptr(m0, &dnflow); if (*m0 != NULL) - mtag->flags &= ~PF_TAG_DUMMYNET; + mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; } else { PF_RULES_RUNLOCK(); } @@ -4809,7 +4808,8 @@ s->src.mss = mss; pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->act.rtableid); + TH_SYN|TH_ACK, 0, s->src.mss, 0, true, 0, 0, + pd->act.rtableid); REASON_SET(&reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } @@ -5282,7 +5282,7 @@ pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), 0, TH_RST, 0, 0, - (*state)->rule.ptr->return_ttl, 1, 0, + (*state)->rule.ptr->return_ttl, true, 0, 0, (*state)->rtableid); src->seqlo = 0; src->seqhi = 1; @@ -5419,7 +5419,7 @@ pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, (*state)->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, + TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, true, 0, 0, (*state)->rtableid); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); @@ -5451,7 +5451,7 @@ &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], (*state)->dst.seqhi, 0, TH_SYN, 0, - (*state)->src.mss, 0, 0, (*state)->tag, + (*state)->src.mss, 0, false, (*state)->tag, 0, (*state)->rtableid); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); @@ -5466,13 +5466,13 @@ pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, - TH_ACK, (*state)->src.max_win, 0, 0, 0, - (*state)->tag, (*state)->rtableid); + TH_ACK, (*state)->src.max_win, 0, 0, false, + (*state)->tag, 0, (*state)->rtableid); pf_send_tcp((*state)->rule.ptr, pd->af, &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, - TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, + TH_ACK, (*state)->dst.max_win, 0, 0, true, 0, 0, (*state)->rtableid); (*state)->src.seqdiff = (*state)->dst.seqhi - (*state)->src.seqlo; @@ -6473,7 +6473,7 @@ } if (r_rt == PF_DUPTO) { - if ((pd->pf_mtag->flags & PF_DUPLICATED)) { + if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) { if (s == NULL) { ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; @@ -6494,7 +6494,7 @@ goto bad; } } else { - pd->pf_mtag->flags |= PF_DUPLICATED; + pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED; if (((m0 = m_dup(*m, M_NOWAIT)) == NULL)) { if (s) PF_STATE_UNLOCK(s); @@ -6686,7 +6686,7 @@ } if (r_rt == PF_DUPTO) { - if ((pd->pf_mtag->flags & PF_DUPLICATED)) { + if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) { if (s == NULL) { ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; @@ -6707,7 +6707,7 @@ goto bad; } } else { - pd->pf_mtag->flags |= PF_DUPLICATED; + pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED; if (((m0 = m_dup(*m, M_NOWAIT)) == NULL)) { if (s) PF_STATE_UNLOCK(s); @@ -7090,7 +7090,7 @@ } if (ifp != NULL) { - pd->pf_mtag->flags |= PF_TAG_ROUTE_TO; + pd->pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO; pd->pf_mtag->if_index = ifp->if_index; pd->pf_mtag->if_idxgen = ifp->if_idxgen; @@ -7106,11 +7106,11 @@ } if (pf_pdesc_to_dnflow(dir, pd, r, s, &dnflow)) { - pd->pf_mtag->flags |= PF_TAG_DUMMYNET; + pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNET; ip_dn_io_ptr(m0, &dnflow); if (*m0 != NULL) { - pd->pf_mtag->flags &= ~PF_TAG_ROUTE_TO; - pd->pf_mtag->flags &= ~PF_TAG_DUMMYNET; + pd->pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO; + pd->pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; } } } @@ -7177,8 +7177,8 @@ memcpy(&pd.act, default_actions, sizeof(pd.act)); pd.pf_mtag = pf_find_mtag(m); - if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_TAG_ROUTE_TO)) { - pd.pf_mtag->flags &= ~PF_TAG_ROUTE_TO; + if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) { + pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO; ifp = ifnet_byindexgen(pd.pf_mtag->if_index, pd.pf_mtag->if_idxgen); @@ -7200,14 +7200,14 @@ } if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL && - pd.pf_mtag->flags & PF_TAG_DUMMYNET) { + pd.pf_mtag->flags & PF_MTAG_FLAG_DUMMYNET) { /* Dummynet re-injects packets after they've * completed their delay. We've already * processed them, so pass unconditionally. */ /* But only once. We may see the packet multiple times (e.g. * PFIL_IN/PFIL_OUT). */ - pd.pf_mtag->flags &= ~PF_TAG_DUMMYNET; + pd.pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; PF_RULES_RUNLOCK(); return (PF_PASS); @@ -7222,12 +7222,12 @@ action = PF_DROP; goto done; } - pd.pf_mtag->flags |= PF_PACKET_LOOPED; + pd.pf_mtag->flags |= PF_MTAG_FLAG_PACKET_LOOPED; m_tag_delete(m, ipfwtag); } - if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { + if (pd.pf_mtag && pd.pf_mtag->flags & PF_MTAG_FLAG_FASTFWD_OURS_PRESENT) { m->m_flags |= M_FASTFWD_OURS; - pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; + pd.pf_mtag->flags &= ~PF_MTAG_FLAG_FASTFWD_OURS_PRESENT; } } else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { /* We do IP header normalization and packet reassembly here */ @@ -7545,7 +7545,7 @@ ("pf: failed to allocate tag\n")); } else { pd.pf_mtag->flags |= - PF_FASTFWD_OURS_PRESENT; + PF_MTAG_FLAG_FASTFWD_OURS_PRESENT; m->m_flags &= ~M_FASTFWD_OURS; } } @@ -7741,8 +7741,8 @@ memcpy(&pd.act, default_actions, sizeof(pd.act)); pd.pf_mtag = pf_find_mtag(m); - if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_TAG_ROUTE_TO)) { - pd.pf_mtag->flags &= ~PF_TAG_ROUTE_TO; + if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) { + pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO; ifp = ifnet_byindexgen(pd.pf_mtag->if_index, pd.pf_mtag->if_idxgen); @@ -7765,8 +7765,8 @@ } if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL && - pd.pf_mtag->flags & PF_TAG_DUMMYNET) { - pd.pf_mtag->flags &= ~PF_TAG_DUMMYNET; + pd.pf_mtag->flags & PF_MTAG_FLAG_DUMMYNET) { + pd.pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; /* Dummynet re-injects packets after they've * completed their delay. We've already * processed them, so pass unconditionally. */ @@ -8189,7 +8189,7 @@ /* If reassembled packet passed, create new fragments. */ if (action == PF_PASS && *m0 && dir == PF_OUT && - (mtag = m_tag_find(m, PF_REASSEMBLED, NULL)) != NULL) + (mtag = m_tag_find(m, PACKET_TAG_PF_REASSEMBLED, NULL)) != NULL) action = pf_refragment6(ifp, m0, mtag, pflags & PFIL_FWD); SDT_PROBE4(pf, ip, test6, done, action, reason, r, s); diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -195,6 +195,12 @@ #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif +VNET_DEFINE_STATIC(bool, pf_filter_local) = false; +#define V_pf_filter_local VNET(pf_filter_local) +SYSCTL_BOOL(_net_pf, OID_AUTO, filter_local, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(pf_filter_local), false, + "Enable filtering for packets delivered to local network stack"); + static void pf_init_tagset(struct pf_tagset *, unsigned int *, unsigned int); static void pf_cleanup_tagset(struct pf_tagset *); @@ -843,6 +849,12 @@ PF_SET_SKIP_STEPS(PFE_SKIP_SRC_ADDR); if (memcmp(&cur->dst, &prev->dst, sizeof(cur->dst)) != 0) PF_SET_SKIP_STEPS(PFE_SKIP_DST_ADDR); + if (cur->ipsrc.neg != prev->ipsrc.neg || + pf_addr_wrap_neq(&cur->ipsrc.addr, &prev->ipsrc.addr)) + PF_SET_SKIP_STEPS(PFE_SKIP_SRC_IP_ADDR); + if (cur->ipdst.neg != prev->ipdst.neg || + pf_addr_wrap_neq(&cur->ipdst.addr, &prev->ipdst.addr)) + PF_SET_SKIP_STEPS(PFE_SKIP_DST_IP_ADDR); prev = cur; cur = TAILQ_NEXT(cur, entries); @@ -6676,6 +6688,13 @@ pla.pa_hook = V_pf_ip4_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); + if (V_pf_filter_local) { + pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet_local_pfil_head; + pla.pa_hook = V_pf_ip4_out_hook; + ret = pfil_link(&pla); + MPASS(ret == 0); + } #endif #ifdef INET6 pha.pa_type = PFIL_TYPE_IP6; @@ -6697,6 +6716,13 @@ pla.pa_hook = V_pf_ip6_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); + if (V_pf_filter_local) { + pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet6_local_pfil_head; + pla.pa_hook = V_pf_ip6_out_hook; + ret = pfil_link(&pla); + MPASS(ret == 0); + } #endif atomic_store_bool(&V_pf_pfil_hooked, true); diff --git a/sys/netpfil/pf/pf_mtag.h b/sys/netpfil/pf/pf_mtag.h --- a/sys/netpfil/pf/pf_mtag.h +++ b/sys/netpfil/pf/pf_mtag.h @@ -36,14 +36,15 @@ #ifdef _KERNEL -#define PF_TAG_ROUTE_TO 0x01 -#define PF_TAG_DUMMYNET 0x02 -#define PF_TAG_TRANSLATE_LOCALHOST 0x04 -#define PF_PACKET_LOOPED 0x08 -#define PF_FASTFWD_OURS_PRESENT 0x10 -#define PF_REASSEMBLED 0x20 -#define PF_DUPLICATED 0x40 -#define PF_TAG_SYNCOOKIE_RECREATED 0x80 +/* pf_mtag -> flags */ +#define PF_MTAG_FLAG_ROUTE_TO 0x01 +#define PF_MTAG_FLAG_DUMMYNET 0x02 +#define PF_MTAG_FLAG_TRANSLATE_LOCALHOST 0x04 +#define PF_MTAG_FLAG_PACKET_LOOPED 0x08 +#define PF_MTAG_FLAG_FASTFWD_OURS_PRESENT 0x10 +/* 0x20 unused */ +#define PF_MTAG_FLAG_DUPLICATED 0x40 +#define PF_MTAG_FLAG_SYNCOOKIE_RECREATED 0x80 struct pf_mtag { void *hdr; /* saved hdr pos in mbuf, for ECN */ diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -898,8 +898,8 @@ m->m_pkthdr.len = plen; } - if ((mtag = m_tag_get(PF_REASSEMBLED, sizeof(struct pf_fragment_tag), - M_NOWAIT)) == NULL) + if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, + sizeof(struct pf_fragment_tag), M_NOWAIT)) == NULL) goto fail; ftag = (struct pf_fragment_tag *)(mtag + 1); ftag->ft_hdrlen = hdrlen; diff --git a/sys/netpfil/pf/pf_syncookies.c b/sys/netpfil/pf/pf_syncookies.c --- a/sys/netpfil/pf/pf_syncookies.c +++ b/sys/netpfil/pf/pf_syncookies.c @@ -267,7 +267,7 @@ MPASS(pd->proto == IPPROTO_TCP); PF_RULES_RASSERT(); - if (pd->pf_mtag && (pd->pf_mtag->tag & PF_TAG_SYNCOOKIE_RECREATED)) + if (pd->pf_mtag && (pd->pf_mtag->flags & PF_MTAG_FLAG_SYNCOOKIE_RECREATED)) return (0); if (V_pf_status.syncookies_mode != PF_SYNCOOKIES_ADAPTIVE) @@ -300,7 +300,7 @@ iss = pf_syncookie_generate(m, off, pd, mss); pf_send_tcp(NULL, pd->af, pd->dst, pd->src, *pd->dport, *pd->sport, iss, ntohl(pd->hdr.tcp.th_seq) + 1, TH_SYN|TH_ACK, 0, mss, - 0, 1, 0, pd->act.rtableid); + 0, true, 0, 0, pd->act.rtableid); counter_u64_add(V_pf_status.lcounters[KLCNT_SYNCOOKIES_SENT], 1); /* XXX Maybe only in adaptive mode? */ atomic_add_64(&V_pf_status.syncookies_inflight[V_pf_syncookie_status.oddeven], @@ -518,6 +518,6 @@ wscale = pf_syncookie_wstab[cookie.flags.wscale_idx]; return (pf_build_tcp(NULL, pd->af, pd->src, pd->dst, *pd->sport, - *pd->dport, seq, 0, TH_SYN, wscale, mss, ttl, 0, - PF_TAG_SYNCOOKIE_RECREATED, pd->act.rtableid)); + *pd->dport, seq, 0, TH_SYN, wscale, mss, ttl, false, 0, + PF_MTAG_FLAG_SYNCOOKIE_RECREATED, pd->act.rtableid)); } diff --git a/sys/netpfil/pf/pfsync_nv.c b/sys/netpfil/pf/pfsync_nv.c --- a/sys/netpfil/pf/pfsync_nv.c +++ b/sys/netpfil/pf/pfsync_nv.c @@ -35,6 +35,11 @@ #include #include +#include + +#include +#include + #include int @@ -42,6 +47,7 @@ struct sockaddr_storage *sa) { int af; + int error; if (!nvlist_exists_number(nvl, "af")) return (EINVAL); @@ -74,6 +80,11 @@ return (EINVAL); memcpy(in6, addr, sizeof(*in6)); + + error = sa6_embedscope(in6, V_ip6_use_defzone); + if (error) + return (error); + break; } #endif @@ -106,6 +117,7 @@ #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)sa; + sa6_recoverscope(in6); nvlist_add_number(nvl, "af", in6->sin6_family); nvlist_add_binary(nvl, "address", in6, sizeof(*in6)); break; diff --git a/sys/riscv/riscv/intr_machdep.c b/sys/riscv/riscv/intr_machdep.c --- a/sys/riscv/riscv/intr_machdep.c +++ b/sys/riscv/riscv/intr_machdep.c @@ -163,6 +163,9 @@ active_irq = frame->tf_scause & SCAUSE_CODE; + CTR3(KTR_TRAP, "%s: irq=%d, umode=%d", __func__, active_irq, + TRAPF_USERMODE(frame)); + switch (active_irq) { case IRQ_SOFTWARE_USER: case IRQ_SOFTWARE_SUPERVISOR: diff --git a/sys/riscv/riscv/trap.c b/sys/riscv/riscv/trap.c --- a/sys/riscv/riscv/trap.c +++ b/sys/riscv/riscv/trap.c @@ -291,8 +291,8 @@ return; #endif - CTR3(KTR_TRAP, "do_trap_supervisor: curthread: %p, sepc: %lx, frame: %p", - curthread, frame->tf_sepc, frame); + CTR4(KTR_TRAP, "%s: exception=%lu, sepc=%lx, stval=%lx", __func__, + exception, frame->tf_sepc, frame->tf_stval); switch (exception) { case SCAUSE_LOAD_ACCESS_FAULT: @@ -365,8 +365,8 @@ } intr_enable(); - CTR3(KTR_TRAP, "do_trap_user: curthread: %p, sepc: %lx, frame: %p", - curthread, frame->tf_sepc, frame); + CTR4(KTR_TRAP, "%s: exception=%lu, sepc=%lx, stval=%lx", __func__, + exception, frame->tf_sepc, frame->tf_stval); switch (exception) { case SCAUSE_LOAD_ACCESS_FAULT: diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -1385,6 +1385,7 @@ #define PACKET_TAG_CARP 28 /* CARP info */ #define PACKET_TAG_IPSEC_NAT_T_PORTS 29 /* two uint16_t */ #define PACKET_TAG_ND_OUTGOING 30 /* ND outgoing */ +#define PACKET_TAG_PF_REASSEMBLED 31 /* Specific cookies and tags. */ diff --git a/sys/vm/vm_radix.c b/sys/vm/vm_radix.c --- a/sys/vm/vm_radix.c +++ b/sys/vm/vm_radix.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -285,21 +286,22 @@ } /* - * Returns the slot where two keys differ. + * Returns the level where two keys differ. * It cannot accept 2 equal keys. */ static __inline uint16_t vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2) { - uint16_t clev; KASSERT(index1 != index2, ("%s: passing the same key value %jx", __func__, (uintmax_t)index1)); + CTASSERT(sizeof(long long) >= sizeof(vm_pindex_t)); - index1 ^= index2; - for (clev = VM_RADIX_LIMIT;; clev--) - if (vm_radix_slot(index1, clev) != 0) - return (clev); + /* + * From the highest-order bit where the indexes differ, + * compute the highest level in the trie where they differ. + */ + return ((flsll(index1 ^ index2) - 1) / VM_RADIX_WIDTH); } /* diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -338,7 +338,9 @@ #define CPUTPM1_HWP_PECI_OVR 0x00010000 #define CPUTPM1_HWP_FLEXIBLE 0x00020000 #define CPUTPM1_HWP_FAST_MSR 0x00040000 +#define CPUTPM1_HW_FEEDBACK 0x00080000 #define CPUTPM1_HWP_IGN_IDLE 0x00100000 +#define CPUTPM1_THREAD_DIRECTOR 0x00800000 /* Ebx. */ #define CPUTPM_B_NSENSINTTHRESH 0x0000000f diff --git a/tests/sys/kern/unix_seqpacket_test.c b/tests/sys/kern/unix_seqpacket_test.c --- a/tests/sys/kern/unix_seqpacket_test.c +++ b/tests/sys/kern/unix_seqpacket_test.c @@ -829,7 +829,7 @@ ATF_CHECK_EQ(0, shutdown(s2, SHUT_RDWR)); ATF_REQUIRE(SIG_ERR != signal(SIGPIPE, shutdown_send_sigpipe_handler)); datalen = strlen(data) + 1; /* +1 for the null */ - (void)send(s2, data, sizeof(*data), MSG_EOR); + (void)send(s2, data, datalen, MSG_EOR); ATF_CHECK_EQ(1, got_sigpipe); close(s); close(s2); diff --git a/tests/sys/netinet/libalias/2_natout.c b/tests/sys/netinet/libalias/2_natout.c --- a/tests/sys/netinet/libalias/2_natout.c +++ b/tests/sys/netinet/libalias/2_natout.c @@ -202,7 +202,7 @@ { struct libalias *la = LibAliasInit(NULL); struct ip *po, *pi; - struct udphdr *ui, *uo; + struct udphdr *ui __unused, *uo; uint16_t sport = 0x1234; uint16_t dport = 0x5678; uint16_t aport; diff --git a/tests/sys/netpfil/common/utils.subr b/tests/sys/netpfil/common/utils.subr --- a/tests/sys/netpfil/common/utils.subr +++ b/tests/sys/netpfil/common/utils.subr @@ -55,11 +55,10 @@ jexec ${jname} ipfw -q -f flush jexec ${jname} /bin/sh $cwd/ipfw.rule elif [ ${fw} == "pf" ]; then + jexec ${jname} sysctl net.pf.filter_local=1 jexec ${jname} pfctl -e jexec ${jname} pfctl -F all jexec ${jname} pfctl -f $cwd/pf.rule - jexec ${jname} pfilctl link -o pf:default-out inet-local - jexec ${jname} pfilctl link -o pf:default-out6 inet6-local elif [ ${fw} == "ipf" ]; then jexec ${jname} ipf -E jexec ${jname} ipf -Fa -f $cwd/ipf.rule diff --git a/tests/sys/netpfil/pf/fragmentation_compat.sh b/tests/sys/netpfil/pf/fragmentation_compat.sh --- a/tests/sys/netpfil/pf/fragmentation_compat.sh +++ b/tests/sys/netpfil/pf/fragmentation_compat.sh @@ -112,7 +112,8 @@ "scrub fragment reassemble" \ "block in" \ "pass in inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv }" \ - "pass in inet6 proto icmp6 icmp6-type { echoreq, echorep }" + "pass in inet6 proto icmp6 icmp6-type { echoreq, echorep }" \ + "set skip on lo" # Host test atf_check -s exit:0 -o ignore \ diff --git a/tests/sys/netpfil/pf/fragmentation_pass.sh b/tests/sys/netpfil/pf/fragmentation_pass.sh --- a/tests/sys/netpfil/pf/fragmentation_pass.sh +++ b/tests/sys/netpfil/pf/fragmentation_pass.sh @@ -116,7 +116,8 @@ "pass keep state" \ "block in" \ "pass in inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv }" \ - "pass in inet6 proto icmp6 icmp6-type { echoreq, echorep }" + "pass in inet6 proto icmp6 icmp6-type { echoreq, echorep }" \ + "set skip on lo" # Host test atf_check -s exit:0 -o ignore \ diff --git a/tests/sys/netpfil/pf/killstate.sh b/tests/sys/netpfil/pf/killstate.sh --- a/tests/sys/netpfil/pf/killstate.sh +++ b/tests/sys/netpfil/pf/killstate.sh @@ -60,7 +60,8 @@ jexec alcatraz pfctl -e pft_set_rules alcatraz "block all" \ - "pass in proto icmp" + "pass in proto icmp" \ + "set skip on lo" # Sanity check & establish state atf_check -s exit:0 -o ignore ${common_dir}/pft_ping.py \ @@ -126,7 +127,8 @@ jexec alcatraz pfctl -e pft_set_rules alcatraz "block all" \ - "pass in proto icmp6" + "pass in proto icmp6" \ + "set skip on lo" # Sanity check & establish state atf_check -s exit:0 -o ignore ${common_dir}/pft_ping.py \ @@ -189,7 +191,8 @@ pft_set_rules alcatraz "block all" \ "pass in proto tcp label bar" \ - "pass in proto icmp label foo" + "pass in proto icmp label foo" \ + "set skip on lo" # Sanity check & establish state atf_check -s exit:0 -o ignore ${common_dir}/pft_ping.py \ @@ -251,7 +254,8 @@ jexec alcatraz pfctl -e pft_set_rules alcatraz "block all" \ - "pass in proto icmp label foo label bar" + "pass in proto icmp label foo label bar" \ + "set skip on lo" # Sanity check & establish state atf_check -s exit:0 -o ignore ${common_dir}/pft_ping.py \ @@ -281,7 +285,8 @@ fi pft_set_rules alcatraz "block all" \ - "pass in proto icmp label foo label bar" + "pass in proto icmp label foo label bar" \ + "set skip on lo" # Reestablish state atf_check -s exit:0 -o ignore ${common_dir}/pft_ping.py \ @@ -329,7 +334,8 @@ jexec alcatraz pfctl -e pft_set_rules alcatraz "block all" \ - "pass in reply-to (${epair}b 192.0.2.1) proto icmp" + "pass in reply-to (${epair}b 192.0.2.1) proto icmp" \ + "set skip on lo" # Sanity check & establish state # Note: use pft_ping so we always use the same ID, so pf considers all @@ -469,7 +475,8 @@ jexec alcatraz pfctl -e pft_set_rules alcatraz "block all" \ - "pass in proto icmp" + "pass in proto icmp" \ + "set skip on lo" # Sanity check & establish state atf_check -s exit:0 -o ignore ${common_dir}/pft_ping.py \ @@ -525,7 +532,8 @@ pft_set_rules alcatraz "block all" \ "pass in proto tcp" \ - "pass in proto icmp" + "pass in proto icmp" \ + "set skip on lo" # Sanity check & establish state atf_check -s exit:0 -o ignore ${common_dir}/pft_ping.py \ diff --git a/tests/sys/netpfil/pf/map_e.sh b/tests/sys/netpfil/pf/map_e.sh --- a/tests/sys/netpfil/pf/map_e.sh +++ b/tests/sys/netpfil/pf/map_e.sh @@ -66,7 +66,8 @@ pft_set_rules echo "block return all" \ "pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 19720:19723 to (${epair_echo}b) port 7" \ "pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 36104:36107 to (${epair_echo}b) port 7" \ - "pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 52488:52491 to (${epair_echo}b) port 7" + "pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 52488:52491 to (${epair_echo}b) port 7" \ + "set skip on lo" i=0 while [ ${i} -lt ${NC_TRY_COUNT} ] diff --git a/tests/sys/netpfil/pf/pass_block.sh b/tests/sys/netpfil/pf/pass_block.sh --- a/tests/sys/netpfil/pf/pass_block.sh +++ b/tests/sys/netpfil/pf/pass_block.sh @@ -230,7 +230,8 @@ --replyif ${epair_one}a pft_set_rules alcatraz \ - "block quick from urpf-failed" + "block quick from urpf-failed" \ + "set skip on lo" jexec alcatraz pfctl -e # Correct source still works diff --git a/tests/sys/netpfil/pf/pfsync.sh b/tests/sys/netpfil/pf/pfsync.sh --- a/tests/sys/netpfil/pf/pfsync.sh +++ b/tests/sys/netpfil/pf/pfsync.sh @@ -125,10 +125,6 @@ { pfsynct_init - if [ "$(atf_config_get ci false)" = "true" ]; then - atf_skip "Skip know failing test (likely related to https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=260460)" - fi - epair_sync=$(vnet_mkepair) epair_in=$(vnet_mkepair) epair_out=$(vnet_mkepair) @@ -141,6 +137,9 @@ jexec alcatraz arp -s 203.0.113.2 00:01:02:03:04:05 jexec alcatraz sysctl net.inet.ip.forwarding=1 + # Set a long defer delay + jexec alcatraz sysctl net.pfsync.defer_delay=2500 + jexec alcatraz ifconfig pfsync0 \ syncdev ${epair_sync}a \ maxupd 1 \ @@ -153,6 +152,7 @@ route add -net 203.0.113.0/24 198.51.100.1 # Enable pf + jexec alcatraz sysctl net.pf.filter_local=0 jexec alcatraz pfctl -e pft_set_rules alcatraz \ "set skip on ${epair_sync}a" \ @@ -702,6 +702,130 @@ pft_cleanup } +atf_test_case "basic_ipv6_unicast" "cleanup" +basic_ipv6_unicast_head() +{ + atf_set descr 'Basic pfsync test (IPv6)' + atf_set require.user root +} + +basic_ipv6_unicast_body() +{ + pfsynct_init + + epair_sync=$(vnet_mkepair) + epair_one=$(vnet_mkepair) + epair_two=$(vnet_mkepair) + + vnet_mkjail one ${epair_one}a ${epair_sync}a + vnet_mkjail two ${epair_two}a ${epair_sync}b + + # pfsync interface + jexec one ifconfig ${epair_sync}a inet6 fd2c::1/64 up + jexec one ifconfig ${epair_one}a inet6 fd2b::1/64 up + jexec one ifconfig pfsync0 \ + syncdev ${epair_sync}a \ + syncpeer fd2c::2 \ + maxupd 1 \ + up + jexec two ifconfig ${epair_two}a inet6 fd2b::2/64 up + jexec two ifconfig ${epair_sync}b inet6 fd2c::2/64 up + jexec two ifconfig pfsync0 \ + syncdev ${epair_sync}b \ + syncpeer fd2c::1 \ + maxupd 1 \ + up + + # Enable pf! + jexec one pfctl -e + pft_set_rules one \ + "block on ${epair_sync}a inet" \ + "pass out keep state" + jexec two pfctl -e + pft_set_rules two \ + "block on ${epair_sync}b inet" \ + "pass out keep state" + + ifconfig ${epair_one}b inet6 fd2b::f0/64 up + + ping6 -c 1 -S fd2b::f0 fd2b::1 + + # Give pfsync time to do its thing + sleep 2 + + if ! jexec two pfctl -s states | grep icmp | grep fd2b::1 | \ + grep fd2b::f0 ; then + atf_fail "state not found on synced host" + fi +} + +basic_ipv6_unicast_cleanup() +{ + pfsynct_cleanup +} + +atf_test_case "basic_ipv6" "cleanup" +basic_ipv6_head() +{ + atf_set descr 'Basic pfsync test (IPv6)' + atf_set require.user root +} + +basic_ipv6_body() +{ + pfsynct_init + + epair_sync=$(vnet_mkepair) + epair_one=$(vnet_mkepair) + epair_two=$(vnet_mkepair) + + vnet_mkjail one ${epair_one}a ${epair_sync}a + vnet_mkjail two ${epair_two}a ${epair_sync}b + + # pfsync interface + jexec one ifconfig ${epair_sync}a inet6 fd2c::1/64 up + jexec one ifconfig ${epair_one}a inet6 fd2b::1/64 up + jexec one ifconfig pfsync0 \ + syncdev ${epair_sync}a \ + syncpeer ff12::f0 \ + maxupd 1 \ + up + jexec two ifconfig ${epair_two}a inet6 fd2b::2/64 up + jexec two ifconfig ${epair_sync}b inet6 fd2c::2/64 up + jexec two ifconfig pfsync0 \ + syncdev ${epair_sync}b \ + syncpeer ff12::f0 \ + maxupd 1 \ + up + + # Enable pf! + jexec one pfctl -e + pft_set_rules one \ + "block on ${epair_sync}a inet" \ + "pass out keep state" + jexec two pfctl -e + pft_set_rules two \ + "block on ${epair_sync}b inet" \ + "pass out keep state" + + ifconfig ${epair_one}b inet6 fd2b::f0/64 up + + ping6 -c 1 -S fd2b::f0 fd2b::1 + + # Give pfsync time to do its thing + sleep 2 + + if ! jexec two pfctl -s states | grep icmp | grep fd2b::1 | \ + grep fd2b::f0 ; then + atf_fail "state not found on synced host" + fi +} + +basic_ipv6_cleanup() +{ + pfsynct_cleanup +} + atf_init_test_cases() { atf_add_test_case "basic" @@ -712,4 +836,6 @@ atf_add_test_case "pfsync_pbr" atf_add_test_case "ipsec" atf_add_test_case "timeout" + atf_add_test_case "basic_ipv6_unicast" + atf_add_test_case "basic_ipv6" } diff --git a/tests/sys/netpfil/pf/pfsync_defer.py b/tests/sys/netpfil/pf/pfsync_defer.py --- a/tests/sys/netpfil/pf/pfsync_defer.py +++ b/tests/sys/netpfil/pf/pfsync_defer.py @@ -119,13 +119,12 @@ if not got_ping: sys.exit(2) - if got_pfsync > got_ping: + # Deferred packets are delayed around 2.5s (unless the pfsync peer, which + # we don't have here, acks their state update earlier) + # Expect at least a second of delay, to be somewhat robust against + # scheduling-induced jitter. + if (sent_ping + 1) > got_ping: sys.exit(3) - # Deferred packets are delayed up to 20ms (unless the pfsync peer, which we - # don't have here, acks their state update earlier) - if (sent_ping + 0.020) > got_ping: - sys.exit(4) - if __name__ == '__main__': main() diff --git a/tests/sys/netpfil/pf/route_to.sh b/tests/sys/netpfil/pf/route_to.sh --- a/tests/sys/netpfil/pf/route_to.sh +++ b/tests/sys/netpfil/pf/route_to.sh @@ -230,7 +230,8 @@ "block in" \ "block out" \ "pass out quick route-to (${epair_cl_two}a 203.0.113.129) inet proto tcp from 203.0.113.128 to any port 7" \ - "pass out on ${epair_cl_one}a inet proto tcp from any to any port 7" + "pass out on ${epair_cl_one}a inet proto tcp from any to any port 7" \ + "set skip on lo" # This should work result=$(jexec client nc -N -w 1 192.0.2.2 7 | wc -c) diff --git a/tests/sys/netpfil/pf/set_skip.sh b/tests/sys/netpfil/pf/set_skip.sh --- a/tests/sys/netpfil/pf/set_skip.sh +++ b/tests/sys/netpfil/pf/set_skip.sh @@ -101,7 +101,7 @@ vnet_mkjail alcatraz jexec alcatraz pfctl -e pft_set_rules alcatraz "set skip on epair" \ - "block" + "block on ! lo" epair=$(vnet_mkepair) ifconfig ${epair}a 192.0.2.2/24 up diff --git a/tests/sys/netpfil/pf/table.sh b/tests/sys/netpfil/pf/table.sh --- a/tests/sys/netpfil/pf/table.sh +++ b/tests/sys/netpfil/pf/table.sh @@ -52,7 +52,8 @@ "table counters { 192.0.2.1 }" \ "block all" \ "pass in from to any" \ - "pass out from any to " + "pass out from any to " \ + "set skip on lo" atf_check -s exit:0 -o ignore ping -c 3 192.0.2.2 @@ -91,7 +92,8 @@ "table counters { 2001:db8:42::1 }" \ "block all" \ "pass in from to any" \ - "pass out from any to " + "pass out from any to " \ + "set skip on lo" atf_check -s exit:0 -o ignore ping -6 -c 3 2001:db8:42::2 diff --git a/tools/build/options/WITHOUT_CAPSICUM b/tools/build/options/WITHOUT_CAPSICUM --- a/tools/build/options/WITHOUT_CAPSICUM +++ b/tools/build/options/WITHOUT_CAPSICUM @@ -1,2 +1 @@ -.\" $FreeBSD$ -Do not build Capsicum support into system programs. +This option has no effect. diff --git a/tools/build/options/WITHOUT_CASPER b/tools/build/options/WITHOUT_CASPER --- a/tools/build/options/WITHOUT_CASPER +++ b/tools/build/options/WITHOUT_CASPER @@ -1,2 +1 @@ -.\" $FreeBSD$ -Do not build Casper program and related libraries. +This option has no effect. diff --git a/tools/build/options/makeman b/tools/build/options/makeman --- a/tools/build/options/makeman +++ b/tools/build/options/makeman @@ -117,10 +117,17 @@ exit 1 ;; esac + requireds=`env -i make -f ${srcdir}/share/mk/src.opts.mk \ + -V '${__REQUIRED_OPTIONS:ts,}'` env -i ${make} .MAKE.MODE=normal "$@" showconfig __MAKE_CONF=/dev/null \ SRCCONF=/dev/null | while read var _ val ; do opt=${var#MK_} + case ,${requireds}, in + *,${opt},*) + continue + ;; + esac case ${val} in yes) echo ${yes_prefix}_${opt} diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c --- a/usr.bin/diff/diffreg.c +++ b/usr.bin/diff/diffreg.c @@ -682,14 +682,14 @@ static void check(FILE *f1, FILE *f2, int flags) { - int i, j, jackpot, c, d; + int i, j, /* jackpot, */ c, d; long ctold, ctnew; rewind(f1); rewind(f2); j = 1; ixold[0] = ixnew[0] = 0; - jackpot = 0; + /* jackpot = 0; */ ctold = ctnew = 0; for (i = 1; i <= len[0]; i++) { if (J[i] == 0) { @@ -759,7 +759,7 @@ } } if (chrtran(c) != chrtran(d)) { - jackpot++; + /* jackpot++; */ J[i] = 0; if (c != '\n' && c != EOF) ctold += skipline(f1); diff --git a/usr.bin/mkuzip/mkuzip.c b/usr.bin/mkuzip/mkuzip.c --- a/usr.bin/mkuzip/mkuzip.c +++ b/usr.bin/mkuzip/mkuzip.c @@ -128,7 +128,6 @@ uint64_t offset, last_offset; struct cloop_header hdr; struct mkuz_conveyor *cvp; - void *c_ctx; struct mkuz_blk_info *chit; size_t ncpusz, ncpu, magiclen; double st, et; @@ -255,7 +254,7 @@ errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu", cfs.cbound_blksz, (size_t)MAXPHYS); - c_ctx = cfs.handler->f_init(&comp_level); + cfs.handler->f_init(&comp_level); cfs.comp_level = comp_level; cfs.iname = argv[0]; diff --git a/usr.bin/procstat/procstat_files.c b/usr.bin/procstat/procstat_files.c --- a/usr.bin/procstat/procstat_files.c +++ b/usr.bin/procstat/procstat_files.c @@ -267,10 +267,9 @@ static void print_capability(cap_rights_t *rightsp, u_int capwidth) { - u_int count, i, width; + u_int count, i; count = 0; - width = 0; for (i = width_capability(rightsp); i < capwidth; i++) { if (i != 0) xo_emit(" "); @@ -282,9 +281,6 @@ if (cap_rights_is_set(rightsp, cap_desc[i].cd_right)) { xo_emit("{D:/%s}{l:capabilities/%s}", count ? "," : "", cap_desc[i].cd_desc); - width += strlen(cap_desc[i].cd_desc); - if (count) - width++; count++; } } diff --git a/usr.bin/seq/seq.c b/usr.bin/seq/seq.c --- a/usr.bin/seq/seq.c +++ b/usr.bin/seq/seq.c @@ -88,14 +88,14 @@ { const char *sep, *term; struct lconv *locale; - char pad, *fmt, *cur_print, *last_print; - double first, last, incr, last_shown_value, cur, step; + char pad, *fmt, *cur_print, *last_print, *prev_print; + double first, last, incr, prev, cur, step; int c, errflg, equalize; pad = ZERO; fmt = NULL; first = 1.0; - last = incr = last_shown_value = 0.0; + last = incr = prev = 0.0; c = errflg = equalize = 0; sep = "\n"; term = NULL; @@ -186,7 +186,7 @@ cur = first + incr * step++) { printf(fmt, cur); fputs(sep, stdout); - last_shown_value = cur; + prev = cur; } /* @@ -194,10 +194,9 @@ * * We might have, so check if the printable version of the last * computed value ('cur') and desired 'last' value are equal. If they - * are equal after formatting truncation, but 'cur' and - * 'last_shown_value' are not equal, it means the exit condition of the - * loop held true due to a rounding error and we still need to print - * 'last'. + * are equal after formatting truncation, but 'cur' and 'prev' are not + * equal, it means the exit condition of the loop held true due to a + * rounding error and we still need to print 'last'. */ if (asprintf(&cur_print, fmt, cur) < 0) { err(1, "asprintf"); @@ -205,12 +204,17 @@ if (asprintf(&last_print, fmt, last) < 0) { err(1, "asprintf"); } - if (strcmp(cur_print, last_print) == 0 && cur != last_shown_value) { + if (asprintf(&prev_print, fmt, prev) < 0) { + err(1, "asprintf"); + } + if (strcmp(cur_print, last_print) == 0 && + strcmp(cur_print, prev_print) != 0) { fputs(last_print, stdout); fputs(sep, stdout); } free(cur_print); free(last_print); + free(prev_print); if (term != NULL) fputs(term, stdout); diff --git a/usr.bin/units/units.c b/usr.bin/units/units.c --- a/usr.bin/units/units.c +++ b/usr.bin/units/units.c @@ -763,11 +763,9 @@ EditLine *el; HistEvent ev; int inputsz; - char const * history_file; quiet = false; readfile = false; - history_file = NULL; outputformat = numfmt; quit = false; while ((optchar = getopt_long(argc, argv, "+ehf:o:qtvH:UV", longopts, NULL)) != -1) { @@ -783,7 +781,7 @@ readunits(optarg); break; case 'H': - history_file = optarg; + /* Ignored, for compatibility with GNU units. */ break; case 'q': quiet = true; diff --git a/usr.sbin/ac/ac.c b/usr.sbin/ac/ac.c --- a/usr.sbin/ac/ac.c +++ b/usr.sbin/ac/ac.c @@ -408,17 +408,15 @@ struct utmpx *usr, usht; struct tm *ltm; struct timeval prev_secs, ut_timecopy, secs, clock_shift, now; - int day, rfound; + int day; day = -1; timerclear(&prev_secs); /* Minimum acceptable date == 1970. */ timerclear(&secs); timerclear(&clock_shift); - rfound = 0; if (setutxdb(UTXDB_LOG, file) != 0) err(1, "%s", file); while ((usr = getutxent()) != NULL) { - rfound++; ut_timecopy = usr->ut_tv; /* Don't let the time run backwards. */ if (timercmp(&ut_timecopy, &prev_secs, <)) diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -88,6 +88,7 @@ .if ${MK_BHYVE_SNAPSHOT} != "no" SRCS+= snapshot.c +SRCS+= migration.c .endif CFLAGS.kernemu_dev.c+= -I${SRCTOP}/sys/amd64 diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c --- a/usr.sbin/bhyve/acpi.c +++ b/usr.sbin/bhyve/acpi.c @@ -91,9 +91,6 @@ static int dsdt_indent_level; static int dsdt_error; -static struct basl_table *rsdt; -static struct basl_table *xsdt; - struct basl_fio { int fd; FILE *fp; @@ -532,10 +529,7 @@ BASL_EXEC(basl_table_add_pointer(table, ACPI_SIG_DSDT, offsetof(ACPI_TABLE_FADT, XDsdt), sizeof(fadt.XDsdt))); - BASL_EXEC(basl_table_append_pointer(rsdt, ACPI_SIG_FADT, - ACPI_RSDT_ENTRY_SIZE)); - BASL_EXEC(basl_table_append_pointer(xsdt, ACPI_SIG_FADT, - ACPI_XSDT_ENTRY_SIZE)); + BASL_EXEC(basl_table_register_to_rsdt(table)); return (0); } @@ -557,10 +551,7 @@ hpet.Flags = ACPI_HPET_PAGE_PROTECT4; BASL_EXEC(basl_table_append_content(table, &hpet, sizeof(hpet))); - BASL_EXEC(basl_table_append_pointer(rsdt, ACPI_SIG_HPET, - ACPI_RSDT_ENTRY_SIZE)); - BASL_EXEC(basl_table_append_pointer(xsdt, ACPI_SIG_HPET, - ACPI_XSDT_ENTRY_SIZE)); + BASL_EXEC(basl_table_register_to_rsdt(table)); return (0); } @@ -635,10 +626,7 @@ BASL_EXEC(basl_table_append_bytes(table, &madt_lapic_nmi, sizeof(madt_lapic_nmi))); - BASL_EXEC(basl_table_append_pointer(rsdt, ACPI_SIG_MADT, - ACPI_RSDT_ENTRY_SIZE)); - BASL_EXEC(basl_table_append_pointer(xsdt, ACPI_SIG_MADT, - ACPI_XSDT_ENTRY_SIZE)); + BASL_EXEC(basl_table_register_to_rsdt(table)); return (0); } @@ -663,10 +651,7 @@ BASL_EXEC(basl_table_append_bytes(table, &mcfg_allocation, sizeof(mcfg_allocation))); - BASL_EXEC(basl_table_append_pointer(rsdt, ACPI_SIG_MCFG, - ACPI_RSDT_ENTRY_SIZE)); - BASL_EXEC(basl_table_append_pointer(xsdt, ACPI_SIG_MCFG, - ACPI_XSDT_ENTRY_SIZE)); + BASL_EXEC(basl_table_register_to_rsdt(table)); return (0); } @@ -708,19 +693,6 @@ return (0); } -static int -build_rsdt(struct vmctx *const ctx) -{ - BASL_EXEC( - basl_table_create(&rsdt, ctx, ACPI_SIG_RSDT, BASL_TABLE_ALIGNMENT)); - - /* Header */ - BASL_EXEC(basl_table_append_header(rsdt, ACPI_SIG_RSDT, 1, 1)); - /* Pointers (added by other build_XXX funcs) */ - - return (0); -} - static int build_spcr(struct vmctx *const ctx) { @@ -744,23 +716,7 @@ spcr.TerminalType = ACPI_SPCR_TERMINAL_TYPE_VT_UTF8; BASL_EXEC(basl_table_append_content(table, &spcr, sizeof(spcr))); - BASL_EXEC(basl_table_append_pointer(rsdt, ACPI_SIG_SPCR, - ACPI_RSDT_ENTRY_SIZE)); - BASL_EXEC(basl_table_append_pointer(xsdt, ACPI_SIG_SPCR, - ACPI_XSDT_ENTRY_SIZE)); - - return (0); -} - -static int -build_xsdt(struct vmctx *const ctx) -{ - BASL_EXEC( - basl_table_create(&xsdt, ctx, ACPI_SIG_XSDT, BASL_TABLE_ALIGNMENT)); - - /* Header */ - BASL_EXEC(basl_table_append_header(xsdt, ACPI_SIG_XSDT, 1, 1)); - /* Pointers (added by other build_XXX funcs) */ + BASL_EXEC(basl_table_register_to_rsdt(table)); return (0); } @@ -790,7 +746,7 @@ if (getenv("BHYVE_ACPI_KEEPTMPS")) basl_keep_temps = 1; - BASL_EXEC(basl_init()); + BASL_EXEC(basl_init(ctx)); BASL_EXEC(basl_make_templates()); @@ -802,8 +758,6 @@ * first table after XSDT. */ BASL_EXEC(build_rsdp(ctx)); - BASL_EXEC(build_rsdt(ctx)); - BASL_EXEC(build_xsdt(ctx)); BASL_EXEC(build_fadt(ctx)); BASL_EXEC(build_madt(ctx)); BASL_EXEC(build_hpet(ctx)); diff --git a/usr.sbin/bhyve/basl.h b/usr.sbin/bhyve/basl.h --- a/usr.sbin/bhyve/basl.h +++ b/usr.sbin/bhyve/basl.h @@ -67,7 +67,7 @@ uint8_t bit_width, uint8_t bit_offset, uint8_t access_width, uint64_t address); int basl_finish(void); -int basl_init(void); +int basl_init(struct vmctx *ctx); int basl_table_add_checksum(struct basl_table *const table, const uint32_t off, const uint32_t start, const uint32_t len); int basl_table_add_length(struct basl_table *const table, const uint32_t off, @@ -97,3 +97,5 @@ const uint8_t src_signature[ACPI_NAMESEG_SIZE], uint8_t size); int basl_table_create(struct basl_table **table, struct vmctx *ctx, const uint8_t *name, uint32_t alignment); +/* Adds the table to RSDT and XSDT */ +int basl_table_register_to_rsdt(struct basl_table *table); diff --git a/usr.sbin/bhyve/basl.c b/usr.sbin/bhyve/basl.c --- a/usr.sbin/bhyve/basl.c +++ b/usr.sbin/bhyve/basl.c @@ -58,6 +58,8 @@ basl_tables); static struct qemu_loader *basl_loader; +static struct basl_table *rsdt; +static struct basl_table *xsdt; static __inline uint64_t basl_le_dec(void *pp, size_t len) @@ -358,10 +360,41 @@ return (0); } +static int +basl_init_rsdt(struct vmctx *const ctx) +{ + BASL_EXEC( + basl_table_create(&rsdt, ctx, ACPI_SIG_RSDT, BASL_TABLE_ALIGNMENT)); + + /* Header */ + BASL_EXEC(basl_table_append_header(rsdt, ACPI_SIG_RSDT, 1, 1)); + /* Pointers (added by basl_table_register_to_rsdt) */ + + return (0); +} + +static int +basl_init_xsdt(struct vmctx *const ctx) +{ + BASL_EXEC( + basl_table_create(&xsdt, ctx, ACPI_SIG_XSDT, BASL_TABLE_ALIGNMENT)); + + /* Header */ + BASL_EXEC(basl_table_append_header(xsdt, ACPI_SIG_XSDT, 1, 1)); + /* Pointers (added by basl_table_register_to_rsdt) */ + + return (0); +} + int -basl_init(void) +basl_init(struct vmctx *const ctx) { - return (qemu_loader_create(&basl_loader, QEMU_FWCFG_FILE_TABLE_LOADER)); + BASL_EXEC(basl_init_rsdt(ctx)); + BASL_EXEC(basl_init_xsdt(ctx)); + BASL_EXEC( + qemu_loader_create(&basl_loader, QEMU_FWCFG_FILE_TABLE_LOADER)); + + return (0); } int @@ -627,3 +660,20 @@ return (0); } + +int +basl_table_register_to_rsdt(struct basl_table *table) +{ + const ACPI_TABLE_HEADER *header; + + assert(table != NULL); + + header = (const ACPI_TABLE_HEADER *)table->data; + + BASL_EXEC(basl_table_append_pointer(rsdt, header->Signature, + ACPI_RSDT_ENTRY_SIZE)); + BASL_EXEC(basl_table_append_pointer(xsdt, header->Signature, + ACPI_XSDT_ENTRY_SIZE)); + + return (0); +} diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -80,6 +80,11 @@ .Op Fl o Ar var Ns Cm = Ns Ar value .Op Fl p Ar vcpu Ns Cm \&: Ns Ar hostcpu .Op Fl r Ar file +.Oo Fl R +.Sm off +.Ar host Op Cm \&: Ar port +.Sm on +.Oc .Sm off .Oo Fl s\~ .Ar slot Cm \&, Ar emulation Op Cm \&, Ar conf @@ -276,6 +281,13 @@ .Fl l options. The count of vCPUs and memory configuration are read from the snapshot. +.It Fl R Ar host Ns Op Cm \&: Ns Ar port +Receive migration from a source guest. +Await for a connection from +.Ar host +on the specified +.Ar port +and resume execution. The default migration port is 24983. .It Fl S Wire guest memory. .It Fl s Cm help @@ -482,6 +494,12 @@ Disable emulation of guest trim requests via .Dv DIOCGDELETE requests. +.It Li bootindex= Ns Ar index +Add the device to the bootorder at +.Ar index . +A fwcfg file is used to specify the bootorder. +The guest firmware may ignore or doesn't support this fwcfg file. +In that case, this feature doesn't work as expected. .El .Pp SCSI device backends: @@ -499,6 +517,12 @@ .It Cm iid= Ns Ar IID Initiator ID to use when sending requests to specified CTL port. The default value is 0. +.It Li bootindex= Ns Ar index +Add the device to the bootorder at +.Ar index . +A fwcfg file is used to specify the bootorder. +The guest firmware may ignore or doesn't support this fwcfg file. +In that case, this feature doesn't work as expected. .El .Pp 9P device backends: @@ -596,6 +620,12 @@ .Ar romfile as option ROM to the PCI device. The ROM will be loaded by firmware and should be capable of initializing the device. +.It Li bootindex= Ns Ar index +Add the device to the bootorder at +.Ar index . +A fwcfg file is used to specify the bootorder. +The guest firmware may ignore or doesn't support this fwcfg file. +In that case, this feature doesn't work as expected. .El .Pp Guest memory must be wired using the diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -98,6 +98,9 @@ #include "kernemu_dev.h" #include "mem.h" #include "mevent.h" +#ifdef BHYVE_SNAPSHOT +#include "migration.h" +#endif #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" @@ -231,6 +234,7 @@ " -p: pin 'vcpu' to 'hostcpu'\n" #ifdef BHYVE_SNAPSHOT " -r: path to checkpoint file\n" + " -R: the source vm host and port for migration\n" #endif " -S: guest memory cannot be swapped\n" " -s: PCI slot config\n" @@ -549,17 +553,31 @@ assert(error == 0); } -static int +static void fbsdrun_deletecpu(int vcpu) { + static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; + static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; + pthread_mutex_lock(&resetcpu_mtx); if (!CPU_ISSET(vcpu, &cpumask)) { fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); exit(4); } - CPU_CLR_ATOMIC(vcpu, &cpumask); - return (CPU_EMPTY(&cpumask)); + CPU_CLR(vcpu, &cpumask); + + if (vcpu != BSP) { + pthread_cond_signal(&resetcpu_cond); + pthread_mutex_unlock(&resetcpu_mtx); + pthread_exit(NULL); + /* NOTREACHED */ + } + + while (!CPU_EMPTY(&cpumask)) { + pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); + } + pthread_mutex_unlock(&resetcpu_mtx); } static int @@ -814,9 +832,6 @@ return (VMEXIT_ABORT); } -static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; - static int vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) { @@ -830,19 +845,6 @@ fbsdrun_deletecpu(vcpuid); - if (vcpuid != BSP) { - pthread_mutex_lock(&resetcpu_mtx); - pthread_cond_signal(&resetcpu_cond); - pthread_mutex_unlock(&resetcpu_mtx); - pthread_exit(NULL); - } - - pthread_mutex_lock(&resetcpu_mtx); - while (!CPU_EMPTY(&cpumask)) { - pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); - } - pthread_mutex_unlock(&resetcpu_mtx); - switch (how) { case VM_SUSPEND_RESET: exit(0); @@ -926,7 +928,7 @@ return (error); } -static vmexit_handler_t handler[VM_EXITCODE_MAX] = { +static const vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, [VM_EXITCODE_INOUT_STR] = vmexit_inout, [VM_EXITCODE_VMX] = vmexit_vmx, @@ -942,6 +944,8 @@ [VM_EXITCODE_DEBUG] = vmexit_debug, [VM_EXITCODE_BPT] = vmexit_breakpoint, [VM_EXITCODE_IPI] = vmexit_ipi, + [VM_EXITCODE_HLT] = vmexit_hlt, + [VM_EXITCODE_PAUSE] = vmexit_pause, }; static void @@ -1008,7 +1012,7 @@ } static void -fbsdrun_set_capabilities(struct vcpu *vcpu, bool bsp) +fbsdrun_set_capabilities(struct vcpu *vcpu) { int err, tmp; @@ -1019,8 +1023,6 @@ exit(4); } vm_set_capability(vcpu, VM_CAP_HALT_EXIT, 1); - if (bsp) - handler[VM_EXITCODE_HLT] = vmexit_hlt; } if (get_config_bool_default("x86.vmexit_on_pause", false)) { @@ -1034,8 +1036,6 @@ exit(4); } vm_set_capability(vcpu, VM_CAP_PAUSE_EXIT, 1); - if (bsp) - handler[VM_EXITCODE_PAUSE] = vmexit_pause; } if (get_config_bool_default("x86.x2apic", false)) @@ -1082,7 +1082,11 @@ exit(4); } } else { +#ifndef BHYVE_SNAPSHOT if (!romboot) { +#else + if (!romboot && !get_config_bool_default("is_migrated", false)) { +#endif /* * If the virtual machine was just created then a * bootrom must be configured to boot it. @@ -1122,7 +1126,7 @@ int error; if (!bsp) { - fbsdrun_set_capabilities(vi->vcpu, false); + fbsdrun_set_capabilities(vi->vcpu); /* * Enable the 'unrestricted guest' mode for APs. @@ -1227,9 +1231,11 @@ const char *optstr, *value, *vmname; #ifdef BHYVE_SNAPSHOT char *restore_file; + char *migration_host; struct restore_state rstate; restore_file = NULL; + migration_host = NULL; #endif init_config(); @@ -1237,7 +1243,7 @@ progname = basename(argv[0]); #ifdef BHYVE_SNAPSHOT - optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:r:"; + optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:r:R:"; #else optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:"; #endif @@ -1294,6 +1300,10 @@ case 'r': restore_file = optarg; break; + case 'R': + migration_host = optarg; + set_config_bool("is_migrated", true); + break; #endif case 's': if (strncmp(optarg, "help", strlen(optarg)) == 0) { @@ -1418,7 +1428,7 @@ exit(4); } - fbsdrun_set_capabilities(bsp, true); + fbsdrun_set_capabilities(bsp); /* Allocate per-VCPU resources. */ vcpu_info = calloc(guest_ncpus, sizeof(*vcpu_info)); @@ -1510,38 +1520,51 @@ spinup_vcpu(&vcpu_info[vcpuid], vcpuid == BSP); #ifdef BHYVE_SNAPSHOT - if (restore_file != NULL) { - fprintf(stdout, "Pausing pci devs...\r\n"); - if (vm_pause_user_devs() != 0) { + if (restore_file != NULL || migration_host != NULL) { + fprintf(stdout, "Pausing pci devs...\n"); + if (vm_pause_devices() != 0) { fprintf(stderr, "Failed to pause PCI device state.\n"); exit(1); } - fprintf(stdout, "Restoring vm mem...\r\n"); - if (restore_vm_mem(ctx, &rstate) != 0) { - fprintf(stderr, "Failed to restore VM memory.\n"); - exit(1); - } + if (restore_file != NULL) { + fprintf(stdout, "Restoring vm mem...\n"); + if (restore_vm_mem(ctx, &rstate) != 0) { + fprintf(stderr, + "Failed to restore VM memory.\n"); + exit(1); + } - fprintf(stdout, "Restoring pci devs...\r\n"); - if (vm_restore_user_devs(&rstate) != 0) { - fprintf(stderr, "Failed to restore PCI device state.\n"); - exit(1); + fprintf(stdout, "Restoring pci devs...\n"); + if (vm_restore_devices(&rstate) != 0) { + fprintf(stderr, + "Failed to restore PCI device state.\n"); + exit(1); + } + + fprintf(stdout, "Restoring kernel structs...\n"); + if (vm_restore_kern_structs(ctx, &rstate) != 0) { + fprintf(stderr, + "Failed to restore kernel structs.\n"); + exit(1); + } } - fprintf(stdout, "Restoring kernel structs...\r\n"); - if (vm_restore_kern_structs(ctx, &rstate) != 0) { - fprintf(stderr, "Failed to restore kernel structs.\n"); - exit(1); + if (migration_host != NULL) { + fprintf(stdout, "Starting the migration process...\n"); + if (receive_vm_migration(ctx, migration_host) != 0) { + fprintf(stderr, "Failed to migrate the vm.\n"); + exit(1); + } } - fprintf(stdout, "Resuming pci devs...\r\n"); - if (vm_resume_user_devs() != 0) { + fprintf(stdout, "Resuming pci devs...\n"); + if (vm_resume_devices() != 0) { fprintf(stderr, "Failed to resume PCI device state.\n"); exit(1); } } -#endif +#endif /* BHYVE_SNAPSHOT */ error = vm_get_register(bsp, VM_REG_GUEST_RIP, &rip); assert(error == 0); @@ -1609,8 +1632,9 @@ #endif #ifdef BHYVE_SNAPSHOT - if (restore_file != NULL) { + if (restore_file != NULL) destroy_restore_state(&rstate); + if (restore_file != NULL || migration_host != NULL) { if (vm_restore_time(ctx) < 0) err(EX_OSERR, "Unable to restore time"); diff --git a/usr.sbin/bhyve/block_if.h b/usr.sbin/bhyve/block_if.h --- a/usr.sbin/bhyve/block_if.h +++ b/usr.sbin/bhyve/block_if.h @@ -62,11 +62,13 @@ struct iovec br_iov[BLOCKIF_IOV_MAX]; }; +struct pci_devinst; struct blockif_ctxt; typedef void blockif_resize_cb(struct blockif_ctxt *, void *, size_t); int blockif_legacy_config(nvlist_t *nvl, const char *opts); +int blockif_add_boot_device(struct pci_devinst *const pi, struct blockif_ctxt *const bc); struct blockif_ctxt *blockif_open(nvlist_t *nvl, const char *ident); int blockif_register_resize_callback(struct blockif_ctxt *bc, blockif_resize_cb *cb, void *cb_arg); diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -122,6 +122,7 @@ TAILQ_HEAD(, blockif_elem) bc_pendq; TAILQ_HEAD(, blockif_elem) bc_busyq; struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; + int bc_bootindex; }; static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; @@ -466,12 +467,22 @@ return (pci_parse_legacy_config(nvl, cp + 1)); } +int +blockif_add_boot_device(struct pci_devinst *const pi, + struct blockif_ctxt *const bc) +{ + if (bc->bc_bootindex < 0) + return (0); + + return (pci_emul_add_boot_device(pi, bc->bc_bootindex)); +} + struct blockif_ctxt * blockif_open(nvlist_t *nvl, const char *ident) { char tname[MAXCOMLEN + 1]; char name[MAXPATHLEN]; - const char *path, *pssval, *ssval; + const char *path, *pssval, *ssval, *bootindex_val; char *cp; struct blockif_ctxt *bc; struct stat sbuf; @@ -480,6 +491,7 @@ int extra, fd, i, sectsz; int ro, candelete, geom, ssopt, pssopt; int nodelete; + int bootindex; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; @@ -493,6 +505,7 @@ ssopt = 0; ro = 0; nodelete = 0; + bootindex = -1; if (get_config_bool_node_default(nvl, "nocache", false)) extra |= O_DIRECT; @@ -525,6 +538,11 @@ } } + bootindex_val = get_config_value_node(nvl, "bootindex"); + if (bootindex_val != NULL) { + bootindex = atoi(bootindex_val); + } + path = get_config_value_node(nvl, "path"); if (path == NULL) { EPRINTLN("Missing \"path\" for block device."); @@ -644,6 +662,7 @@ TAILQ_INIT(&bc->bc_freeq); TAILQ_INIT(&bc->bc_pendq); TAILQ_INIT(&bc->bc_busyq); + bc->bc_bootindex = bootindex; for (i = 0; i < BLOCKIF_MAXREQ; i++) { bc->bc_reqs[i].be_status = BST_FREE; TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -37,7 +37,6 @@ #include #include -#include #include #include diff --git a/usr.sbin/bhyve/kernemu_dev.c b/usr.sbin/bhyve/kernemu_dev.c --- a/usr.sbin/bhyve/kernemu_dev.c +++ b/usr.sbin/bhyve/kernemu_dev.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include struct vm; struct vm_hpet_cap; diff --git a/usr.sbin/bhyve/migration.h b/usr.sbin/bhyve/migration.h new file mode 100644 --- /dev/null +++ b/usr.sbin/bhyve/migration.h @@ -0,0 +1,27 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2017-2020 Elena Mihailescu + * Copyright (c) 2017-2020 Darius Mihai + * Copyright (c) 2017-2020 Mihai Carabas + * + * The migration feature was developed under sponsorships + * from Matthew Grooms. + * + */ + +#pragma once + +#include +#include + +#define DEFAULT_MIGRATION_PORT 24983 + +struct vmctx; + +struct migrate_req { + char host[MAXHOSTNAMELEN]; + unsigned int port; +}; + +int receive_vm_migration(struct vmctx *ctx, char *migration_data); diff --git a/usr.sbin/bhyve/migration.c b/usr.sbin/bhyve/migration.c new file mode 100644 --- /dev/null +++ b/usr.sbin/bhyve/migration.c @@ -0,0 +1,98 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2017-2020 Elena Mihailescu + * Copyright (c) 2017-2020 Darius Mihai + * Copyright (c) 2017-2020 Mihai Carabas + * + * The migration feature was developed under sponsorships + * from Matthew Grooms. + * + */ + +#include +#include +#include +#include +#include + +#include +#ifndef WITHOUT_CAPSICUM +#include +#include +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include + +#include "migration.h" +#include "pci_emul.h" +#include "snapshot.h" + + +#ifdef BHYVE_DEBUG +#define DPRINTF(FMT, ...) \ +({ \ + fprintf(stderr, "%s: " FMT "\n", __func__, ##__VA_ARGS__); \ + }) +#else +#define DPRINTF(FMT, ...) +#endif + +#define EPRINTF(FMT, ...) \ +({ \ + fprintf(stderr, "%s: " FMT "\n", __func__, ##__VA_ARGS__); \ + }) + +int +receive_vm_migration(struct vmctx *ctx, char *migration_data) +{ + struct migrate_req req; + size_t len; + char *hostname, *pos; + unsigned int port = DEFAULT_MIGRATION_PORT; + int rc; + + assert(ctx != NULL); + assert(migration_data != NULL); + + memset(req.host, 0, MAXHOSTNAMELEN); + hostname = strdup(migration_data); + + if ((pos = strchr(hostname, ':')) != NULL) { + *pos = '\0'; + pos = pos + 1; + + rc = sscanf(pos, "%u", &port); + + if (rc <= 0) { + EPRINTF("Could not parse the port"); + free(hostname); + return (EINVAL); + } + } + req.port = port; + + len = strlen(hostname); + if (len > MAXHOSTNAMELEN - 1) { + EPRINTF("Hostname length %lu bigger than maximum allowed %d", + len, MAXHOSTNAMELEN - 1); + free(hostname); + return (EINVAL); + } + + strlcpy(req.host, hostname, MAXHOSTNAMELEN); + + // rc = vm_recv_migrate_req(ctx, req); + rc = EOPNOTSUPP; + EPRINTF("Migration not implemented yet"); + + free(hostname); + return (rc); +} diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c --- a/usr.sbin/bhyve/pci_ahci.c +++ b/usr.sbin/bhyve/pci_ahci.c @@ -2477,6 +2477,13 @@ ret = 1; goto open_fail; } + + ret = blockif_add_boot_device(pi, bctxt); + if (ret) { + sc->ports = p; + goto open_fail; + } + sc->port[p].bctx = bctxt; sc->port[p].pr_sc = sc; sc->port[p].port = p; diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h --- a/usr.sbin/bhyve/pci_emul.h +++ b/usr.sbin/bhyve/pci_emul.h @@ -234,6 +234,8 @@ enum pcibar_type type, uint64_t size); int pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size, void **const addr); +int pci_emul_add_boot_device(struct pci_devinst *const pi, + const int bootindex); int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type); void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, @@ -263,9 +265,10 @@ uint64_t pci_ecfg_base(void); int pci_bus_configured(int bus); #ifdef BHYVE_SNAPSHOT +struct pci_devinst *pci_next(const struct pci_devinst *cursor); int pci_snapshot(struct vm_snapshot_meta *meta); -int pci_pause(const char *dev_name); -int pci_resume(const char *dev_name); +int pci_pause(struct pci_devinst *pdi); +int pci_resume(struct pci_devinst *pdi); #endif static __inline void diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -62,6 +62,7 @@ #include "pci_irq.h" #include "pci_lpc.h" #include "pci_passthru.h" +#include "qemu_fwcfg.h" #define CONF1_ADDR_PORT 0x0cf8 #define CONF1_DATA_PORT 0x0cfc @@ -121,6 +122,14 @@ static TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER(pci_bars); +struct boot_device { + TAILQ_ENTRY(boot_device) boot_device_chain; + struct pci_devinst *pdi; + int bootindex; +}; +static TAILQ_HEAD(boot_list, boot_device) boot_devices = TAILQ_HEAD_INITIALIZER( + boot_devices); + #define PCI_EMUL_IOBASE 0x2000 #define PCI_EMUL_IOLIMIT 0x10000 @@ -955,6 +964,45 @@ return (0); } +int +pci_emul_add_boot_device(struct pci_devinst *pi, int bootindex) +{ + struct boot_device *new_device, *device; + + /* don't permit a negative bootindex */ + if (bootindex < 0) { + errx(4, "Invalid bootindex %d for %s", bootindex, pi->pi_name); + } + + /* alloc new boot device */ + new_device = calloc(1, sizeof(struct boot_device)); + if (new_device == NULL) { + return (ENOMEM); + } + new_device->pdi = pi; + new_device->bootindex = bootindex; + + /* search for boot device with higher boot index */ + TAILQ_FOREACH(device, &boot_devices, boot_device_chain) { + if (device->bootindex == bootindex) { + errx(4, + "Could not set bootindex %d for %s. Bootindex already occupied by %s", + bootindex, pi->pi_name, device->pdi->pi_name); + } else if (device->bootindex > bootindex) { + break; + } + } + + /* add boot device to queue */ + if (device == NULL) { + TAILQ_INSERT_TAIL(&boot_devices, new_device, boot_device_chain); + } else { + TAILQ_INSERT_BEFORE(device, new_device, boot_device_chain); + } + + return (0); +} + #define CAP_START_OFFSET 0x40 static int pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) @@ -1029,7 +1077,8 @@ pdi->pi_lintr.pirq_pin = 0; pdi->pi_lintr.ioapic_irq = 0; pdi->pi_d = pde; - snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); + snprintf(pdi->pi_name, PI_NAMESZ, "%s@pci.%d.%d.%d", pde->pe_emu, bus, + slot, func); /* Disable legacy interrupts */ pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); @@ -1361,6 +1410,27 @@ return (PCI_EMUL_ECFG_BASE); } +static int +init_bootorder(void) +{ + struct boot_device *device; + FILE *fp; + char *bootorder; + size_t bootorder_len; + + if (TAILQ_EMPTY(&boot_devices)) + return (0); + + fp = open_memstream(&bootorder, &bootorder_len); + TAILQ_FOREACH(device, &boot_devices, boot_device_chain) { + fprintf(fp, "/pci@i0cf8/pci@%d,%d\n", + device->pdi->pi_slot, device->pdi->pi_func); + } + fclose(fp); + + return (qemu_fwcfg_add_file("bootorder", bootorder_len, bootorder)); +} + #define BUSIO_ROUNDUP 32 #define BUSMEM32_ROUNDUP (1024 * 1024) #define BUSMEM64_ROUNDUP (512 * 1024 * 1024) @@ -1390,6 +1460,8 @@ pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64); pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; + TAILQ_INIT(&boot_devices); + for (bus = 0; bus < MAXBUSES; bus++) { snprintf(node_name, sizeof(node_name), "pci.%d", bus); nvl = find_config_node(node_name); @@ -1497,6 +1569,11 @@ } lpc_pirq_routed(); + if ((error = init_bootorder()) != 0) { + warnx("%s: Unable to init bootorder", __func__); + return (error); + } + /* * The guest physical memory map looks like the following: * [0, lowmem) guest system memory @@ -2363,42 +2440,6 @@ return (ret); } -static int -pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde, - struct pci_devinst **pdi) -{ - struct businfo *bi; - struct slotinfo *si; - struct funcinfo *fi; - int bus, slot, func; - - assert(dev_name != NULL); - assert(pde != NULL); - assert(pdi != NULL); - - for (bus = 0; bus < MAXBUSES; bus++) { - if ((bi = pci_businfo[bus]) == NULL) - continue; - - for (slot = 0; slot < MAXSLOTS; slot++) { - si = &bi->slotinfo[slot]; - for (func = 0; func < MAXFUNCS; func++) { - fi = &si->si_funcs[func]; - if (fi->fi_pde == NULL) - continue; - if (strcmp(dev_name, fi->fi_pde->pe_emu) != 0) - continue; - - *pde = fi->fi_pde; - *pdi = fi->fi_devi; - return (0); - } - } - } - - return (EINVAL); -} - int pci_snapshot(struct vm_snapshot_meta *meta) { @@ -2408,57 +2449,26 @@ assert(meta->dev_name != NULL); - ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi); - if (ret != 0) { - fprintf(stderr, "%s: no such name: %s\r\n", - __func__, meta->dev_name); - memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); - return (0); - } + pdi = meta->dev_data; + pde = pdi->pi_d; - meta->dev_data = pdi; - - if (pde->pe_snapshot == NULL) { - fprintf(stderr, "%s: not implemented yet for: %s\r\n", - __func__, meta->dev_name); - return (-1); - } + if (pde->pe_snapshot == NULL) + return (ENOTSUP); ret = pci_snapshot_pci_dev(meta); - if (ret != 0) { - fprintf(stderr, "%s: failed to snapshot pci dev\r\n", - __func__); - return (-1); - } - - ret = (*pde->pe_snapshot)(meta); + if (ret == 0) + ret = (*pde->pe_snapshot)(meta); return (ret); } int -pci_pause(const char *dev_name) +pci_pause(struct pci_devinst *pdi) { - struct pci_devemu *pde; - struct pci_devinst *pdi; - int ret; - - assert(dev_name != NULL); - - ret = pci_find_slotted_dev(dev_name, &pde, &pdi); - if (ret != 0) { - /* - * It is possible to call this function without - * checking that the device is inserted first. - */ - fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); - return (0); - } + struct pci_devemu *pde = pdi->pi_d; if (pde->pe_pause == NULL) { /* The pause/resume functionality is optional. */ - fprintf(stderr, "%s: not implemented for: %s\n", - __func__, dev_name); return (0); } @@ -2466,28 +2476,12 @@ } int -pci_resume(const char *dev_name) +pci_resume(struct pci_devinst *pdi) { - struct pci_devemu *pde; - struct pci_devinst *pdi; - int ret; - - assert(dev_name != NULL); - - ret = pci_find_slotted_dev(dev_name, &pde, &pdi); - if (ret != 0) { - /* - * It is possible to call this function without - * checking that the device is inserted first. - */ - fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); - return (0); - } + struct pci_devemu *pde = pdi->pi_d; if (pde->pe_resume == NULL) { /* The pause/resume functionality is optional. */ - fprintf(stderr, "%s: not implemented for: %s\n", - __func__, dev_name); return (0); } @@ -2664,6 +2658,42 @@ } #ifdef BHYVE_SNAPSHOT +struct pci_devinst * +pci_next(const struct pci_devinst *cursor) +{ + unsigned bus = 0, slot = 0, func = 0; + struct businfo *bi; + struct slotinfo *si; + struct funcinfo *fi; + + bus = cursor ? cursor->pi_bus : 0; + slot = cursor ? cursor->pi_slot : 0; + func = cursor ? (cursor->pi_func + 1) : 0; + + for (; bus < MAXBUSES; bus++) { + if ((bi = pci_businfo[bus]) == NULL) + continue; + + if (slot >= MAXSLOTS) + slot = 0; + + for (; slot < MAXSLOTS; slot++) { + si = &bi->slotinfo[slot]; + if (func >= MAXFUNCS) + func = 0; + for (; func < MAXFUNCS; func++) { + fi = &si->si_funcs[func]; + if (fi->fi_devi == NULL) + continue; + + return (fi->fi_devi); + } + } + } + + return (NULL); +} + static int pci_emul_snapshot(struct vm_snapshot_meta *meta __unused) { diff --git a/usr.sbin/bhyve/pci_hostbridge.c b/usr.sbin/bhyve/pci_hostbridge.c --- a/usr.sbin/bhyve/pci_hostbridge.c +++ b/usr.sbin/bhyve/pci_hostbridge.c @@ -86,6 +86,14 @@ return (0); } +#ifdef BHYVE_SNAPSHOT +static int +pci_de_snapshot(struct vm_snapshot_meta *meta __unused) +{ + return (0); +} +#endif + static const struct pci_devemu pci_de_amd_hostbridge = { .pe_emu = "amd_hostbridge", .pe_legacy_config = pci_amd_hostbridge_legacy_config, @@ -96,5 +104,8 @@ static const struct pci_devemu pci_de_hostbridge = { .pe_emu = "hostbridge", .pe_init = pci_hostbridge_init, +#ifdef BHYVE_SNAPSHOT + .pe_snapshot = pci_de_snapshot, +#endif }; PCI_EMUL_SET(pci_de_hostbridge); diff --git a/usr.sbin/bhyve/pci_nvme.c b/usr.sbin/bhyve/pci_nvme.c --- a/usr.sbin/bhyve/pci_nvme.c +++ b/usr.sbin/bhyve/pci_nvme.c @@ -3159,6 +3159,14 @@ sc->dataset_management = NVME_DATASET_MANAGEMENT_DISABLE; } + value = get_config_value_node(nvl, "bootindex"); + if (value != NULL) { + if (pci_emul_add_boot_device(sc->nsc_pi, atoi(value))) { + EPRINTLN("Invalid bootindex %d", atoi(value)); + return (-1); + } + } + value = get_config_value_node(nvl, "ram"); if (value != NULL) { uint64_t sz = strtoull(value, NULL, 10); diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c --- a/usr.sbin/bhyve/pci_virtio_block.c +++ b/usr.sbin/bhyve/pci_virtio_block.c @@ -471,6 +471,11 @@ return (1); } + if (blockif_add_boot_device(pi, bctxt)) { + perror("Invalid boot device"); + return (1); + } + size = blockif_size(bctxt); sectsz = blockif_sectsz(bctxt); blockif_psectsz(bctxt, &sts, &sto); diff --git a/usr.sbin/bhyve/pci_virtio_scsi.c b/usr.sbin/bhyve/pci_virtio_scsi.c --- a/usr.sbin/bhyve/pci_virtio_scsi.c +++ b/usr.sbin/bhyve/pci_virtio_scsi.c @@ -709,6 +709,15 @@ if (value != NULL) sc->vss_iid = strtoul(value, NULL, 10); + value = get_config_value_node(nvl, "bootindex"); + if (value != NULL) { + if (pci_emul_add_boot_device(pi, atoi(value))) { + EPRINTLN("Invalid bootindex %d", atoi(value)); + free(sc); + return (-1); + } + } + devname = get_config_value_node(nvl, "dev"); if (devname == NULL) devname = "/dev/cam/ctl"; diff --git a/usr.sbin/bhyve/snapshot.h b/usr.sbin/bhyve/snapshot.h --- a/usr.sbin/bhyve/snapshot.h +++ b/usr.sbin/bhyve/snapshot.h @@ -95,9 +95,9 @@ int restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate); int vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate); -int vm_restore_user_devs(struct restore_state *rstate); -int vm_pause_user_devs(void); -int vm_resume_user_devs(void); +int vm_restore_devices(struct restore_state *rstate); +int vm_pause_devices(void); +int vm_resume_devices(void); int get_checkpoint_msg(int conn_fd, struct vmctx *ctx); void *checkpoint_thread(void *param); diff --git a/usr.sbin/bhyve/snapshot.c b/usr.sbin/bhyve/snapshot.c --- a/usr.sbin/bhyve/snapshot.c +++ b/usr.sbin/bhyve/snapshot.c @@ -47,7 +47,6 @@ #include #include -#include #ifndef WITHOUT_CAPSICUM #include @@ -86,6 +85,7 @@ #include "ioapic.h" #include "mem.h" #include "mevent.h" +#include "migration.h" #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" @@ -119,10 +119,10 @@ #define SNAPSHOT_BUFFER_SIZE (20 * MB) -#define JSON_STRUCT_ARR_KEY "structs" +#define JSON_KERNEL_ARR_KEY "kern_structs" #define JSON_DEV_ARR_KEY "devices" #define JSON_BASIC_METADATA_KEY "basic metadata" -#define JSON_SNAPSHOT_REQ_KEY "snapshot_req" +#define JSON_SNAPSHOT_REQ_KEY "device" #define JSON_SIZE_KEY "size" #define JSON_FILE_OFFSET_KEY "file_offset" @@ -138,20 +138,6 @@ _a < _b ? _a : _b; \ }) -static const struct vm_snapshot_dev_info snapshot_devs[] = { - { "atkbdc", atkbdc_snapshot, NULL, NULL }, - { "virtio-net", pci_snapshot, pci_pause, pci_resume }, - { "virtio-blk", pci_snapshot, pci_pause, pci_resume }, - { "virtio-rnd", pci_snapshot, NULL, NULL }, - { "lpc", pci_snapshot, NULL, NULL }, - { "fbuf", pci_snapshot, NULL, NULL }, - { "xhci", pci_snapshot, NULL, NULL }, - { "e1000", pci_snapshot, NULL, NULL }, - { "ahci", pci_snapshot, pci_pause, pci_resume }, - { "ahci-hd", pci_snapshot, pci_pause, pci_resume }, - { "ahci-cd", pci_snapshot, pci_pause, pci_resume }, -}; - static const struct vm_snapshot_kern_info snapshot_kern_structs[] = { { "vhpet", STRUCT_VHPET }, { "vm", STRUCT_VM }, @@ -415,51 +401,6 @@ } \ } while(0) -static void * -lookup_struct(enum snapshot_req struct_id, struct restore_state *rstate, - size_t *struct_size) -{ - const ucl_object_t *structs = NULL, *obj = NULL; - ucl_object_iter_t it = NULL; - int64_t snapshot_req, size, file_offset; - - structs = ucl_object_lookup(rstate->meta_root_obj, JSON_STRUCT_ARR_KEY); - if (structs == NULL) { - fprintf(stderr, "Failed to find '%s' object.\n", - JSON_STRUCT_ARR_KEY); - return (NULL); - } - - if (ucl_object_type(structs) != UCL_ARRAY) { - fprintf(stderr, "Object '%s' is not an array.\n", - JSON_STRUCT_ARR_KEY); - return (NULL); - } - - while ((obj = ucl_object_iterate(structs, &it, true)) != NULL) { - snapshot_req = -1; - JSON_GET_INT_OR_RETURN(JSON_SNAPSHOT_REQ_KEY, obj, - &snapshot_req, NULL); - assert(snapshot_req >= 0); - if ((enum snapshot_req) snapshot_req == struct_id) { - JSON_GET_INT_OR_RETURN(JSON_SIZE_KEY, obj, - &size, NULL); - assert(size >= 0); - - JSON_GET_INT_OR_RETURN(JSON_FILE_OFFSET_KEY, obj, - &file_offset, NULL); - assert(file_offset >= 0); - assert((uint64_t)file_offset + size <= - rstate->kdata_len); - - *struct_size = (size_t)size; - return ((uint8_t *)rstate->kdata_map + file_offset); - } - } - - return (NULL); -} - static void * lookup_check_dev(const char *dev_name, struct restore_state *rstate, const ucl_object_t *obj, size_t *data_size) @@ -488,15 +429,15 @@ return (NULL); } -static void* -lookup_dev(const char *dev_name, struct restore_state *rstate, - size_t *data_size) +static void * +lookup_dev(const char *dev_name, const char *key, struct restore_state *rstate, + size_t *data_size) { const ucl_object_t *devs = NULL, *obj = NULL; ucl_object_iter_t it = NULL; void *ret; - devs = ucl_object_lookup(rstate->meta_root_obj, JSON_DEV_ARR_KEY); + devs = ucl_object_lookup(rstate->meta_root_obj, key); if (devs == NULL) { fprintf(stderr, "Failed to find '%s' object.\n", JSON_DEV_ARR_KEY); @@ -861,95 +802,69 @@ return (0); } -static int -vm_restore_kern_struct(struct vmctx *ctx, struct restore_state *rstate, - const struct vm_snapshot_kern_info *info) +int +vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate) { - void *struct_ptr; - size_t struct_size; - int ret; - struct vm_snapshot_meta *meta; - - struct_ptr = lookup_struct(info->req, rstate, &struct_size); - if (struct_ptr == NULL) { - fprintf(stderr, "%s: Failed to lookup struct %s\r\n", - __func__, info->struct_name); - ret = -1; - goto done; - } - - if (struct_size == 0) { - fprintf(stderr, "%s: Kernel struct size was 0 for: %s\r\n", - __func__, info->struct_name); - ret = -1; - goto done; - } + for (unsigned i = 0; i < nitems(snapshot_kern_structs); i++) { + const struct vm_snapshot_kern_info *info; + struct vm_snapshot_meta *meta; + void *data; + size_t size; - meta = &(struct vm_snapshot_meta) { - .dev_name = info->struct_name, - .dev_req = info->req, + info = &snapshot_kern_structs[i]; + data = lookup_dev(info->struct_name, JSON_KERNEL_ARR_KEY, rstate, &size); + if (data == NULL) + errx(EX_DATAERR, "Cannot find kern struct %s", + info->struct_name); - .buffer.buf_start = struct_ptr, - .buffer.buf_size = struct_size, + if (size == 0) + errx(EX_DATAERR, "data with zero size for %s", + info->struct_name); - .buffer.buf = struct_ptr, - .buffer.buf_rem = struct_size, + meta = &(struct vm_snapshot_meta) { + .dev_name = info->struct_name, + .dev_req = info->req, - .op = VM_SNAPSHOT_RESTORE, - }; + .buffer.buf_start = data, + .buffer.buf_size = size, - ret = vm_snapshot_req(ctx, meta); - if (ret != 0) { - fprintf(stderr, "%s: Failed to restore struct: %s\r\n", - __func__, info->struct_name); - goto done; - } - -done: - return (ret); -} + .buffer.buf = data, + .buffer.buf_rem = size, -int -vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate) -{ - size_t i; - int ret; + .op = VM_SNAPSHOT_RESTORE, + }; - for (i = 0; i < nitems(snapshot_kern_structs); i++) { - ret = vm_restore_kern_struct(ctx, rstate, - &snapshot_kern_structs[i]); - if (ret != 0) - return (ret); + if (vm_snapshot_req(ctx, meta)) + err(EX_DATAERR, "Failed to restore %s", + info->struct_name); } - return (0); } static int -vm_restore_user_dev(struct restore_state *rstate, - const struct vm_snapshot_dev_info *info) +vm_restore_device(struct restore_state *rstate, vm_snapshot_dev_cb func, + const char *name, void *data) { void *dev_ptr; size_t dev_size; int ret; struct vm_snapshot_meta *meta; - dev_ptr = lookup_dev(info->dev_name, rstate, &dev_size); + dev_ptr = lookup_dev(name, JSON_DEV_ARR_KEY, rstate, &dev_size); + if (dev_ptr == NULL) { - fprintf(stderr, "Failed to lookup dev: %s\r\n", info->dev_name); - fprintf(stderr, "Continuing the restore/migration process\r\n"); - return (0); + EPRINTLN("Failed to lookup dev: %s", name); + return (EINVAL); } if (dev_size == 0) { - fprintf(stderr, "%s: Device size is 0. " - "Assuming %s is not used\r\n", - __func__, info->dev_name); - return (0); + EPRINTLN("Restore device size is 0: %s", name); + return (EINVAL); } meta = &(struct vm_snapshot_meta) { - .dev_name = info->dev_name, + .dev_name = name, + .dev_data = data, .buffer.buf_start = dev_ptr, .buffer.buf_size = dev_size, @@ -960,74 +875,66 @@ .op = VM_SNAPSHOT_RESTORE, }; - ret = (*info->snapshot_cb)(meta); + ret = func(meta); if (ret != 0) { - fprintf(stderr, "Failed to restore dev: %s\r\n", - info->dev_name); - return (-1); + EPRINTLN("Failed to restore dev: %s %d", name, ret); + return (ret); } return (0); } - int -vm_restore_user_devs(struct restore_state *rstate) +vm_restore_devices(struct restore_state *rstate) { - size_t i; int ret; + struct pci_devinst *pdi = NULL; - for (i = 0; i < nitems(snapshot_devs); i++) { - ret = vm_restore_user_dev(rstate, &snapshot_devs[i]); - if (ret != 0) + while ((pdi = pci_next(pdi)) != NULL) { + ret = vm_restore_device(rstate, pci_snapshot, pdi->pi_name, pdi); + if (ret) return (ret); } - return 0; + return (vm_restore_device(rstate, atkbdc_snapshot, "atkbdc", NULL)); } int -vm_pause_user_devs(void) +vm_pause_devices(void) { - const struct vm_snapshot_dev_info *info; - size_t i; int ret; + struct pci_devinst *pdi = NULL; - for (i = 0; i < nitems(snapshot_devs); i++) { - info = &snapshot_devs[i]; - if (info->pause_cb == NULL) - continue; - - ret = info->pause_cb(info->dev_name); - if (ret != 0) + while ((pdi = pci_next(pdi)) != NULL) { + ret = pci_pause(pdi); + if (ret) { + EPRINTLN("Cannot pause dev %s: %d", pdi->pi_name, ret); return (ret); + } } return (0); } int -vm_resume_user_devs(void) +vm_resume_devices(void) { - const struct vm_snapshot_dev_info *info; - size_t i; int ret; + struct pci_devinst *pdi = NULL; - for (i = 0; i < nitems(snapshot_devs); i++) { - info = &snapshot_devs[i]; - if (info->resume_cb == NULL) - continue; - - ret = info->resume_cb(info->dev_name); - if (ret != 0) + while ((pdi = pci_next(pdi)) != NULL) { + ret = pci_resume(pdi); + if (ret) { + EPRINTLN("Cannot resume '%s': %d", pdi->pi_name, ret); return (ret); + } } return (0); } static int -vm_snapshot_kern_struct(struct vmctx *ctx, int data_fd, xo_handle_t *xop, +vm_save_kern_struct(struct vmctx *ctx, int data_fd, xo_handle_t *xop, const char *array_key, struct vm_snapshot_meta *meta, off_t *offset) { int ret; @@ -1054,12 +961,11 @@ /* Write metadata. */ xo_open_instance_h(xop, array_key); - xo_emit_h(xop, "{:debug_name/%s}\n", meta->dev_name); - xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%d}\n", - meta->dev_req); + xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%s}\n", + meta->dev_name); xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size); xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset); - xo_close_instance_h(xop, JSON_STRUCT_ARR_KEY); + xo_close_instance_h(xop, JSON_KERNEL_ARR_KEY); *offset += data_size; @@ -1068,7 +974,7 @@ } static int -vm_snapshot_kern_structs(struct vmctx *ctx, int data_fd, xo_handle_t *xop) +vm_save_kern_structs(struct vmctx *ctx, int data_fd, xo_handle_t *xop) { int ret, error; size_t buf_size, i, offset; @@ -1093,7 +999,7 @@ .op = VM_SNAPSHOT_SAVE, }; - xo_open_list_h(xop, JSON_STRUCT_ARR_KEY); + xo_open_list_h(xop, JSON_KERNEL_ARR_KEY); for (i = 0; i < nitems(snapshot_kern_structs); i++) { meta->dev_name = snapshot_kern_structs[i].struct_name; meta->dev_req = snapshot_kern_structs[i].req; @@ -1102,14 +1008,14 @@ meta->buffer.buf = meta->buffer.buf_start; meta->buffer.buf_rem = meta->buffer.buf_size; - ret = vm_snapshot_kern_struct(ctx, data_fd, xop, + ret = vm_save_kern_struct(ctx, data_fd, xop, JSON_DEV_ARR_KEY, meta, &offset); if (ret != 0) { error = -1; goto err_vm_snapshot_kern_data; } } - xo_close_list_h(xop, JSON_STRUCT_ARR_KEY); + xo_close_list_h(xop, JSON_KERNEL_ARR_KEY); err_vm_snapshot_kern_data: if (buffer != NULL) @@ -1160,16 +1066,21 @@ } static int -vm_snapshot_user_dev(const struct vm_snapshot_dev_info *info, - int data_fd, xo_handle_t *xop, - struct vm_snapshot_meta *meta, off_t *offset) +vm_snapshot_device(vm_snapshot_dev_cb func, const char *dev_name, + void *devdata, int data_fd, xo_handle_t *xop, + struct vm_snapshot_meta *meta, off_t *offset) { int ret; - ret = (*info->snapshot_cb)(meta); + memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); + meta->buffer.buf = meta->buffer.buf_start; + meta->buffer.buf_rem = meta->buffer.buf_size; + meta->dev_name = dev_name; + meta->dev_data = devdata; + + ret = func(meta); if (ret != 0) { - fprintf(stderr, "Failed to snapshot %s; ret=%d\r\n", - meta->dev_name, ret); + EPRINTLN("Failed to snapshot %s; ret=%d", dev_name, ret); return (ret); } @@ -1182,13 +1093,14 @@ } static int -vm_snapshot_user_devs(int data_fd, xo_handle_t *xop) +vm_snapshot_devices(int data_fd, xo_handle_t *xop) { int ret; off_t offset; void *buffer; - size_t buf_size, i; + size_t buf_size; struct vm_snapshot_meta *meta; + struct pci_devinst *pdi; buf_size = SNAPSHOT_BUFFER_SIZE; @@ -1214,20 +1126,18 @@ xo_open_list_h(xop, JSON_DEV_ARR_KEY); - /* Restore other devices that support this feature */ - for (i = 0; i < nitems(snapshot_devs); i++) { - meta->dev_name = snapshot_devs[i].dev_name; - - memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); - meta->buffer.buf = meta->buffer.buf_start; - meta->buffer.buf_rem = meta->buffer.buf_size; - - ret = vm_snapshot_user_dev(&snapshot_devs[i], data_fd, xop, - meta, &offset); + /* Save PCI devices */ + pdi = NULL; + while ((pdi = pci_next(pdi)) != NULL) { + ret = vm_snapshot_device(pci_snapshot, pdi->pi_name, pdi, + data_fd, xop, meta, &offset); if (ret != 0) goto snapshot_err; } + ret = vm_snapshot_device(atkbdc_snapshot, "atkbdc", NULL, + data_fd, xop, meta, &offset); + xo_close_list_h(xop, JSON_DEV_ARR_KEY); snapshot_err: @@ -1364,7 +1274,7 @@ vm_vcpu_pause(ctx); - ret = vm_pause_user_devs(); + ret = vm_pause_devices(); if (ret != 0) { fprintf(stderr, "Could not pause devices\r\n"); error = ret; @@ -1385,15 +1295,14 @@ goto done; } - - ret = vm_snapshot_kern_structs(ctx, kdata_fd, xop); + ret = vm_save_kern_structs(ctx, kdata_fd, xop); if (ret != 0) { fprintf(stderr, "Failed to snapshot vm kernel data.\n"); error = -1; goto done; } - ret = vm_snapshot_user_devs(kdata_fd, xop); + ret = vm_snapshot_devices(kdata_fd, xop); if (ret != 0) { fprintf(stderr, "Failed to snapshot device state.\n"); error = -1; @@ -1408,7 +1317,7 @@ } done: - ret = vm_resume_user_devs(); + ret = vm_resume_devices(); if (ret != 0) fprintf(stderr, "Could not resume devices\r\n"); vm_vcpu_resume(ctx); @@ -1491,6 +1400,40 @@ } IPC_COMMAND(ipc_cmd_set, checkpoint, vm_do_checkpoint); +static int +vm_do_migrate(struct vmctx __unused *ctx, const nvlist_t *nvl) +{ + size_t len; + struct migrate_req req; + + if (!nvlist_exists_string(nvl, "hostname") || + !nvlist_exists_number(nvl, "port")) + return (EINVAL); + + memset(&req, 0, sizeof(struct migrate_req)); + req.port = nvlist_get_number(nvl, "port"); + + len = strlen(nvlist_get_string(nvl, "hostname")); + if (len > MAXHOSTNAMELEN - 1) { + EPRINTLN("Hostname length %lu bigger than maximum allowed %d", + len, MAXHOSTNAMELEN - 1); + return (EINVAL); + } + + strlcpy(req.host, nvlist_get_string(nvl, "hostname"), MAXHOSTNAMELEN); + + printf("%s: IP address used for migration: %s;\n" + "Port used for migration: %d\n", + __func__, + req.host, + req.port); + + // return (vm_send_migrate_req(ctx, req, nvlist_get_bool(nvl, "live"))); + EPRINTLN("Migration operation not implemented yet\n"); + return (EOPNOTSUPP); +} +IPC_COMMAND(ipc_cmd_set, migrate, vm_do_migrate); + void init_snapshot(void) { diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c --- a/usr.sbin/bhyve/task_switch.c +++ b/usr.sbin/bhyve/task_switch.c @@ -34,7 +34,6 @@ #include #include -#include #include #include #include diff --git a/usr.sbin/bhyve/tpm_device.c b/usr.sbin/bhyve/tpm_device.c --- a/usr.sbin/bhyve/tpm_device.c +++ b/usr.sbin/bhyve/tpm_device.c @@ -35,9 +35,22 @@ void *intf_sc; }; +static int +tpm_build_acpi_table(const struct acpi_device *const dev) +{ + const struct tpm_device *const tpm = acpi_device_get_softc(dev); + + if (tpm->intf->build_acpi_table == NULL) { + return (0); + } + + return (tpm->intf->build_acpi_table(tpm->intf_sc, tpm->vm_ctx)); +} + static const struct acpi_device_emul tpm_acpi_device_emul = { .name = TPM_ACPI_DEVICE_NAME, .hid = TPM_ACPI_HARDWARE_ID, + .build_table = tpm_build_acpi_table, }; void @@ -128,7 +141,7 @@ } if (dev->intf->init) { - error = dev->intf->init(&dev->intf_sc); + error = dev->intf->init(&dev->intf_sc, dev->emul, dev->emul_sc); if (error) goto err_out; } diff --git a/usr.sbin/bhyve/tpm_emul.h b/usr.sbin/bhyve/tpm_emul.h --- a/usr.sbin/bhyve/tpm_emul.h +++ b/usr.sbin/bhyve/tpm_emul.h @@ -18,5 +18,7 @@ int (*init)(void **sc, nvlist_t *nvl); void (*deinit)(void *sc); + int (*execute_cmd)(void *sc, void *cmd, uint32_t cmd_size, void *rsp, + uint32_t rsp_size); }; #define TPM_EMUL_SET(x) DATA_SET(tpm_emul_set, x) diff --git a/usr.sbin/bhyve/tpm_intf.h b/usr.sbin/bhyve/tpm_intf.h --- a/usr.sbin/bhyve/tpm_intf.h +++ b/usr.sbin/bhyve/tpm_intf.h @@ -1,35 +1,38 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2022 Beckhoff Automation GmbH & Co. KG - * Author: Corvin Köhne - */ - -#pragma once - -#include "config.h" -#include "tpm_device.h" - -#define TPM_INTF_TYPE_FIFO_PTP 0x0 -#define TPM_INTF_TYPE_CRB 0x1 -#define TPM_INTF_TYPE_FIFO_TIS 0xF - -#define TPM_INTF_VERSION_FIFO 0 -#define TPM_INTF_VERSION_CRB 1 - -#define TPM_INTF_CAP_CRB_DATA_XFER_SIZE_4 0 -#define TPM_INTF_CAP_CRB_DATA_XFER_SIZE_8 1 -#define TPM_INTF_CAP_CRB_DATA_XFER_SIZE_32 2 -#define TPM_INTF_CAP_CRB_DATA_XFER_SIZE_64 3 - -#define TPM_INTF_SELECTOR_FIFO 0 -#define TPM_INTF_SELECTOR_CRB 1 - -struct tpm_intf { - const char *name; - - int (*init)(void **sc); - void (*deinit)(void *sc); - int (*build_acpi_table)(void *sc); -}; -#define TPM_INTF_SET(x) DATA_SET(tpm_intf_set, x) +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2022 Beckhoff Automation GmbH & Co. KG + * Author: Corvin Köhne + */ + +#pragma once + +#include + +#include "config.h" +#include "tpm_device.h" +#include "tpm_emul.h" + +#define TPM_INTF_TYPE_FIFO_PTP 0x0 +#define TPM_INTF_TYPE_CRB 0x1 +#define TPM_INTF_TYPE_FIFO_TIS 0xF + +#define TPM_INTF_VERSION_FIFO 0 +#define TPM_INTF_VERSION_CRB 1 + +#define TPM_INTF_CAP_CRB_DATA_XFER_SIZE_4 0 +#define TPM_INTF_CAP_CRB_DATA_XFER_SIZE_8 1 +#define TPM_INTF_CAP_CRB_DATA_XFER_SIZE_32 2 +#define TPM_INTF_CAP_CRB_DATA_XFER_SIZE_64 3 + +#define TPM_INTF_SELECTOR_FIFO 0 +#define TPM_INTF_SELECTOR_CRB 1 + +struct tpm_intf { + const char *name; + + int (*init)(void **sc, struct tpm_emul *emul, void *emul_sc); + void (*deinit)(void *sc); + int (*build_acpi_table)(void *sc, struct vmctx *vm_ctx); +}; +#define TPM_INTF_SET(x) DATA_SET(tpm_intf_set, x) diff --git a/usr.sbin/bhyve/tpm_intf_crb.c b/usr.sbin/bhyve/tpm_intf_crb.c --- a/usr.sbin/bhyve/tpm_intf_crb.c +++ b/usr.sbin/bhyve/tpm_intf_crb.c @@ -26,17 +26,28 @@ #include "config.h" #include "mem.h" #include "qemu_fwcfg.h" +#include "tpm_device.h" #include "tpm_intf.h" #define TPM_CRB_ADDRESS 0xFED40000 #define TPM_CRB_REGS_SIZE 0x1000 +#define TPM_CRB_CONTROL_AREA_ADDRESS \ + (TPM_CRB_ADDRESS + offsetof(struct tpm_crb_regs, ctrl_req)) +#define TPM_CRB_CONTROL_AREA_SIZE TPM_CRB_REGS_SIZE + #define TPM_CRB_DATA_BUFFER_ADDRESS \ (TPM_CRB_ADDRESS + offsetof(struct tpm_crb_regs, data_buffer)) #define TPM_CRB_DATA_BUFFER_SIZE 0xF80 #define TPM_CRB_LOCALITIES_MAX 5 +#define TPM_CRB_LOG_AREA_MINIMUM_SIZE (64 * 1024) + +#define TPM_CRB_LOG_AREA_FWCFG_NAME "etc/tpm/log" + +#define TPM_CRB_INTF_NAME "crb" + struct tpm_crb_regs { union tpm_crb_reg_loc_state { struct { @@ -156,16 +167,82 @@ } while (0) struct tpm_crb { + struct tpm_emul *emul; + void *emul_sc; + uint8_t tpm_log_area[TPM_CRB_LOG_AREA_MINIMUM_SIZE]; struct tpm_crb_regs regs; + pthread_t thread; + pthread_mutex_t mutex; + pthread_cond_t cond; + bool closing; }; +static void * +tpm_crb_thread(void *const arg) +{ + struct tpm_crb *const crb = arg; + + pthread_mutex_lock(&crb->mutex); + for (;;) { + pthread_cond_wait(&crb->cond, &crb->mutex); + + if (crb->closing) + break; + + const uint64_t cmd_addr = CRB_CMD_ADDR_READ(crb->regs); + const uint64_t rsp_addr = CRB_RSP_ADDR_READ(crb->regs); + const uint32_t cmd_size = CRB_CMD_SIZE_READ(crb->regs); + const uint32_t rsp_size = CRB_RSP_SIZE_READ(crb->regs); + + const uint64_t cmd_off = cmd_addr - TPM_CRB_DATA_BUFFER_ADDRESS; + const uint64_t rsp_off = rsp_addr - TPM_CRB_DATA_BUFFER_ADDRESS; + + if (cmd_off > TPM_CRB_DATA_BUFFER_SIZE || + cmd_off + cmd_size > TPM_CRB_DATA_BUFFER_SIZE || + rsp_off > TPM_CRB_DATA_BUFFER_SIZE || + rsp_off + rsp_size > TPM_CRB_DATA_BUFFER_SIZE) { + warnx( + "%s: invalid cmd [%16lx, %16lx] --> [%16lx, %16lx]\n\r", + __func__, cmd_addr, cmd_addr + cmd_size, rsp_addr, + rsp_addr + rsp_size); + break; + } + + /* + * The command response buffer interface uses a single buffer + * for sending a command to and receiving a response from the + * tpm. To avoid reading old data from the command buffer which + * might be a security issue, we zero out the command buffer + * before writing the response into it. The rsp_size parameter + * is controlled by the guest and it's not guaranteed that the + * response has a size of rsp_size (e.g. if the tpm returned an + * error, the response would have a different size than + * expected). For that reason, use a second buffer for the + * response. + */ + uint8_t rsp[TPM_CRB_DATA_BUFFER_SIZE] = { 0 }; + crb->emul->execute_cmd(crb->emul_sc, + &crb->regs.data_buffer[cmd_off], cmd_size, &rsp[rsp_off], + rsp_size); + + memset(crb->regs.data_buffer, 0, TPM_CRB_DATA_BUFFER_SIZE); + memcpy(&crb->regs.data_buffer[rsp_off], &rsp[rsp_off], rsp_size); + + crb->regs.ctrl_start.start = false; + } + pthread_mutex_unlock(&crb->mutex); + + return (NULL); +} + static int -tpm_crb_init(void **sc) +tpm_crb_init(void **sc, struct tpm_emul *emul, void *emul_sc) { struct tpm_crb *crb = NULL; int error; assert(sc != NULL); + assert(emul != NULL); crb = calloc(1, sizeof(struct tpm_crb)); if (crb == NULL) { @@ -176,6 +253,9 @@ memset(crb, 0, sizeof(*crb)); + crb->emul = emul; + crb->emul_sc = emul_sc; + crb->regs.loc_state.tpm_req_valid_sts = true; crb->regs.loc_state.tpm_established = true; @@ -200,6 +280,33 @@ CRB_RSP_SIZE_WRITE(crb->regs, TPM_CRB_DATA_BUFFER_SIZE); CRB_RSP_ADDR_WRITE(crb->regs, TPM_CRB_DATA_BUFFER_ADDRESS); + error = qemu_fwcfg_add_file(TPM_CRB_LOG_AREA_FWCFG_NAME, + TPM_CRB_LOG_AREA_MINIMUM_SIZE, crb->tpm_log_area); + if (error) { + warnx("%s: failed to add fwcfg file", __func__); + goto err_out; + } + + error = pthread_mutex_init(&crb->mutex, NULL); + if (error) { + warnc(error, "%s: failed to init mutex", __func__); + goto err_out; + } + + error = pthread_cond_init(&crb->cond, NULL); + if (error) { + warnc(error, "%s: failed to init cond", __func__); + goto err_out; + } + + error = pthread_create(&crb->thread, NULL, tpm_crb_thread, crb); + if (error) { + warnx("%s: failed to create thread\n", __func__); + goto err_out; + } + + pthread_set_name_np(crb->thread, "tpm_intf_crb"); + *sc = crb; return (0); @@ -221,12 +328,54 @@ crb = sc; + crb->closing = true; + pthread_cond_signal(&crb->cond); + pthread_join(crb->thread, NULL); + + pthread_cond_destroy(&crb->cond); + pthread_mutex_destroy(&crb->mutex); + free(crb); } +static int +tpm_crb_build_acpi_table(void *sc __unused, struct vmctx *vm_ctx) +{ + struct basl_table *table; + + BASL_EXEC(basl_table_create(&table, vm_ctx, ACPI_SIG_TPM2, + BASL_TABLE_ALIGNMENT)); + + /* Header */ + BASL_EXEC(basl_table_append_header(table, ACPI_SIG_TPM2, 4, 1)); + /* Platform Class */ + BASL_EXEC(basl_table_append_int(table, 0, 2)); + /* Reserved */ + BASL_EXEC(basl_table_append_int(table, 0, 2)); + /* Control Address */ + BASL_EXEC( + basl_table_append_int(table, TPM_CRB_CONTROL_AREA_ADDRESS, 8)); + /* Start Method == (7) Command Response Buffer */ + BASL_EXEC(basl_table_append_int(table, 7, 4)); + /* Start Method Specific Parameters */ + uint8_t parameters[12] = { 0 }; + BASL_EXEC(basl_table_append_bytes(table, parameters, 12)); + /* Log Area Minimum Length */ + BASL_EXEC( + basl_table_append_int(table, TPM_CRB_LOG_AREA_MINIMUM_SIZE, 4)); + /* Log Area Start Address */ + BASL_EXEC( + basl_table_append_fwcfg(table, TPM_CRB_LOG_AREA_FWCFG_NAME, 1, 8)); + + BASL_EXEC(basl_table_register_to_rsdt(table)); + + return (0); +} + static struct tpm_intf tpm_intf_crb = { - .name = "crb", + .name = TPM_CRB_INTF_NAME, .init = tpm_crb_init, .deinit = tpm_crb_deinit, + .build_acpi_table = tpm_crb_build_acpi_table, }; TPM_INTF_SET(tpm_intf_crb); diff --git a/usr.sbin/bhyvectl/bhyvectl.8 b/usr.sbin/bhyvectl/bhyvectl.8 --- a/usr.sbin/bhyvectl/bhyvectl.8 +++ b/usr.sbin/bhyvectl/bhyvectl.8 @@ -41,6 +41,11 @@ .Op Fl -force-poweroff .Op Fl -checkpoint= Ns Ar .Op Fl -suspend= Ns Ar +.Oo +.Fl -migrate= Ns Ar host Ns Op Cm \&: Ns Ar port +| +.Fl -migrate-live= Ns Ar host Ns Op Cm \&: Ns Ar port +.Oc .Sh DESCRIPTION The .Nm @@ -85,6 +90,20 @@ .Fl -checkpoint . The virtual machine will terminate after the snapshot has been saved. +.It Fl -migrate= Ns Ar host Ns Op Cm \&: Ns Ar port +Warm migrate the virtual machine to a +.Ar host +on the specified +.Ar port . +The default migration port is 24983. +The virtual machine will be destroyed after the migration finishes. +.It Fl -migrate-live= Ns Ar host Ns Op Cm \&: Ns Ar port +Live migrate the virtual machine to a +.Ar host +on the specified +.Ar port . +The default migration port is 24983. +The virtual machine will be destroyed after the migration finishes. .El .Sh EXIT STATUS .Ex -std diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c --- a/usr.sbin/bhyvectl/bhyvectl.c +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -65,6 +65,7 @@ #ifdef BHYVE_SNAPSHOT #include "snapshot.h" +#include "migration.h" #endif #define MB (1UL << 20) @@ -87,6 +88,7 @@ " [--destroy]\n" #ifdef BHYVE_SNAPSHOT " [--checkpoint= | --suspend=]\n" + " [--migrate=[:] | --migrate-live=[:]]\n" #endif " [--get-all]\n" " [--get-stats]\n" @@ -299,6 +301,7 @@ static int get_cpu_topology; #ifdef BHYVE_SNAPSHOT static int vm_suspend_opt; +static int vm_migrate_live; #endif /* @@ -589,6 +592,8 @@ #ifdef BHYVE_SNAPSHOT SET_CHECKPOINT_FILE, SET_SUSPEND_FILE, + MIGRATE_VM, + MIGRATE_VM_LIVE, #endif }; @@ -1456,6 +1461,8 @@ #ifdef BHYVE_SNAPSHOT { "checkpoint", REQ_ARG, 0, SET_CHECKPOINT_FILE}, { "suspend", REQ_ARG, 0, SET_SUSPEND_FILE}, + { "migrate", REQ_ARG, 0, MIGRATE_VM}, + { "migrate-live", REQ_ARG, 0, MIGRATE_VM_LIVE}, #endif }; @@ -1743,7 +1750,42 @@ return (send_message(vmname, nvl)); } -#endif + +static int +migration_request(const char *vmname, const char *migrate_vm, bool live) +{ + nvlist_t *nvl; + char *hostname, *pos; + int rc; + unsigned int port = DEFAULT_MIGRATION_PORT; + + hostname = strdup(migrate_vm); + + if ((pos = strchr(hostname, ':')) != NULL) { + *pos = '\0'; + pos = pos + 1; + + rc = sscanf(pos, "%u", &port); + + if (rc <= 0) { + fprintf(stderr, "Could not parse the port\n"); + free(hostname); + return (EINVAL); + } + } + + nvl = nvlist_create(0); + nvlist_add_string(nvl, "cmd", "migrate"); + nvlist_add_string(nvl, "hostname", hostname); + nvlist_add_number(nvl, "port", port); + nvlist_add_bool(nvl, "live", live); + + free(hostname); + + return (send_message(vmname, nvl)); +} + +#endif /* BHYVE_SNAPSHOT */ int main(int argc, char *argv[]) @@ -1763,7 +1805,7 @@ struct tm tm; struct option *opts; #ifdef BHYVE_SNAPSHOT - char *checkpoint_file = NULL; + char *checkpoint_file = NULL, *migrate_host = NULL; #endif cpu_intel = cpu_vendor_intel(); @@ -1932,6 +1974,14 @@ checkpoint_file = optarg; vm_suspend_opt = (ch == SET_SUSPEND_FILE); break; + case MIGRATE_VM: + case MIGRATE_VM_LIVE: + if (migrate_host != NULL) + usage(cpu_intel); + + migrate_host = optarg; + vm_migrate_live = (ch == MIGRATE_VM_LIVE); + break; #endif default: usage(cpu_intel); @@ -2414,6 +2464,9 @@ #ifdef BHYVE_SNAPSHOT if (!error && checkpoint_file) error = snapshot_request(vmname, checkpoint_file, vm_suspend_opt); + + if (!error && migrate_host) + error = migration_request(vmname, migrate_host, vm_migrate_live); #endif free (opts); diff --git a/usr.sbin/bluetooth/ath3kfw/ath3k_fw.c b/usr.sbin/bluetooth/ath3kfw/ath3k_fw.c --- a/usr.sbin/bluetooth/ath3kfw/ath3k_fw.c +++ b/usr.sbin/bluetooth/ath3kfw/ath3k_fw.c @@ -49,7 +49,6 @@ struct stat sb; unsigned char *buf; ssize_t r; - int i; fd = open(fwname, O_RDONLY); if (fd < 0) { @@ -70,7 +69,6 @@ return (0); } - i = 0; /* XXX handle partial reads */ r = read(fd, buf, sb.st_size); if (r < 0) { diff --git a/usr.sbin/bluetooth/ath3kfw/main.c b/usr.sbin/bluetooth/ath3kfw/main.c --- a/usr.sbin/bluetooth/ath3kfw/main.c +++ b/usr.sbin/bluetooth/ath3kfw/main.c @@ -155,7 +155,7 @@ ret = ath3k_load_patch(hdl, fw_path); if (ret < 0) { ath3k_err("Loading patch file failed\n"); - return (ret); + return (ret); } ret = ath3k_load_syscfg(hdl, fw_path); @@ -199,7 +199,7 @@ /* free it */ ath3k_fw_free(&fw); - return (0); + return (ret); } /* diff --git a/usr.sbin/bluetooth/iwmbtfw/iwmbt_fw.c b/usr.sbin/bluetooth/iwmbtfw/iwmbt_fw.c --- a/usr.sbin/bluetooth/iwmbtfw/iwmbt_fw.c +++ b/usr.sbin/bluetooth/iwmbtfw/iwmbt_fw.c @@ -50,7 +50,6 @@ struct stat sb; unsigned char *buf; ssize_t r; - int i; fd = open(fwname, O_RDONLY); if (fd < 0) { @@ -71,7 +70,6 @@ return (0); } - i = 0; /* XXX handle partial reads */ r = read(fd, buf, sb.st_size); if (r < 0) { diff --git a/usr.sbin/bsnmpd/modules/snmp_bridge/bridge_sys.c b/usr.sbin/bsnmpd/modules/snmp_bridge/bridge_sys.c --- a/usr.sbin/bsnmpd/modules/snmp_bridge/bridge_sys.c +++ b/usr.sbin/bsnmpd/modules/snmp_bridge/bridge_sys.c @@ -1223,7 +1223,7 @@ int bridge_update_memif(struct bridge_if *bif) { - int added, updated; + int updated; uint32_t i; int32_t buf_len; struct ifbreq *b_req_buf, *b_req; @@ -1234,7 +1234,7 @@ if ((buf_len = bridge_port_get_iflist(bif, &b_req_buf)) < 0) return (-1); - added = updated = 0; + updated = 0; #define BP_FOUND 0x01 for (i = 0; i < buf_len / sizeof(struct ifbreq); i++) { @@ -1249,7 +1249,6 @@ if ((bp = bridge_port_find(m_if->index, bif)) == NULL && (bp = bridge_new_port(m_if, bif)) != NULL) { bp->status = RowStatus_active; - added++; } if (bp != NULL) { diff --git a/usr.sbin/fwget/fwget.sh b/usr.sbin/fwget/fwget.sh --- a/usr.sbin/fwget/fwget.sh +++ b/usr.sbin/fwget/fwget.sh @@ -7,7 +7,7 @@ # Copyright 2023 Bjoern A. Zeeb # # Redistribution and use in source and binary forms, with or without -# modification, are permitted providing that the following conditions +# modification, are permitted providing that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. @@ -27,12 +27,12 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -: ${LIBEXEC_PATH:="/usr/libexec/fwget"} +: "${LIBEXEC_PATH:='/usr/libexec/fwget'}" usage() { cat <extended_table_length ); printf( " extended table checksum:\t%d\n", cth->extended_table_checksum ); - totalSize = cth->base_table_length - sizeof( struct MPCTH ); - puts( SEP_LINE ); printf( "MP Config Base Table Entries:\n\n" ); diff --git a/usr.sbin/nscd/cacheplcs.c b/usr.sbin/nscd/cacheplcs.c --- a/usr.sbin/nscd/cacheplcs.c +++ b/usr.sbin/nscd/cacheplcs.c @@ -159,11 +159,9 @@ cache_queue_policy_get_next_item(struct cache_policy_ *policy, struct cache_policy_item_ *item) { - struct cache_queue_policy_ *queue_policy; struct cache_queue_policy_item_ *queue_item; TRACE_IN(cache_queue_policy_get_next_item); - queue_policy = (struct cache_queue_policy_ *)policy; queue_item = (struct cache_queue_policy_item_ *)item; TRACE_OUT(cache_queue_policy_get_next_item); @@ -174,11 +172,9 @@ cache_queue_policy_get_prev_item(struct cache_policy_ *policy, struct cache_policy_item_ *item) { - struct cache_queue_policy_ *queue_policy; struct cache_queue_policy_item_ *queue_item; TRACE_IN(cache_queue_policy_get_prev_item); - queue_policy = (struct cache_queue_policy_ *)policy; queue_item = (struct cache_queue_policy_item_ *)item; TRACE_OUT(cache_queue_policy_get_prev_item); diff --git a/usr.sbin/nscd/nscd.c b/usr.sbin/nscd/nscd.c --- a/usr.sbin/nscd/nscd.c +++ b/usr.sbin/nscd/nscd.c @@ -105,7 +105,6 @@ struct configuration_entry *config_entry; size_t size, i; - int res; TRACE_IN(init_cache_); @@ -120,14 +119,14 @@ * We should register common entries now - multipart entries * would be registered automatically during the queries. */ - res = register_cache_entry(retval, (struct cache_entry_params *) + register_cache_entry(retval, (struct cache_entry_params *) &config_entry->positive_cache_params); config_entry->positive_cache_entry = find_cache_entry(retval, config_entry->positive_cache_params.cep.entry_name); assert(config_entry->positive_cache_entry != INVALID_CACHE_ENTRY); - res = register_cache_entry(retval, (struct cache_entry_params *) + register_cache_entry(retval, (struct cache_entry_params *) &config_entry->negative_cache_params); config_entry->negative_cache_entry = find_cache_entry(retval, config_entry->negative_cache_params.cep.entry_name); diff --git a/usr.sbin/nscd/nscdcli.c b/usr.sbin/nscd/nscdcli.c --- a/usr.sbin/nscd/nscdcli.c +++ b/usr.sbin/nscd/nscdcli.c @@ -139,7 +139,6 @@ struct kevent eventlist; int nevents; ssize_t result; - int res; TRACE_IN(send_credentials); memset(&cmsg, 0, sizeof(cmsg)); @@ -158,7 +157,7 @@ EV_SET(&eventlist, connection->sockfd, EVFILT_WRITE, EV_ADD, NOTE_LOWAT, sizeof(int), NULL); - res = kevent(connection->write_queue, &eventlist, 1, NULL, 0, NULL); + kevent(connection->write_queue, &eventlist, 1, NULL, 0, NULL); nevents = kevent(connection->write_queue, NULL, 0, &eventlist, 1, NULL); if ((nevents == 1) && (eventlist.filter == EVFILT_WRITE)) { diff --git a/usr.sbin/rpc.lockd/kern.c b/usr.sbin/rpc.lockd/kern.c --- a/usr.sbin/rpc.lockd/kern.c +++ b/usr.sbin/rpc.lockd/kern.c @@ -572,16 +572,11 @@ show(LOCKD_MSG *mp) { static char hex[] = "0123456789abcdef"; - struct fid *fidp; - fsid_t *fsidp; size_t len; u_int8_t *p, *t, buf[NFS_SMALLFH*3+1]; syslog(LOG_DEBUG, "process ID: %lu\n", (long)mp->lm_msg_ident.pid); - fsidp = (fsid_t *)&mp->lm_fh; - fidp = (struct fid *)((u_int8_t *)&mp->lm_fh + sizeof(fsid_t)); - for (t = buf, p = (u_int8_t *)mp->lm_fh, len = mp->lm_fh_len; len > 0; ++p, --len) { diff --git a/usr.sbin/service/service.8 b/usr.sbin/service/service.8 --- a/usr.sbin/service/service.8 +++ b/usr.sbin/service/service.8 @@ -48,6 +48,7 @@ .Nm .Op Fl j Ar jail .Op Fl v +.Op Fl E Ar var=value .Ar script .Ar command .Sh DESCRIPTION @@ -67,6 +68,13 @@ .Pp The options are as follows: .Bl -tag -width F1 +.It Fl E Ar var=value +Set the environment variable +.Ar var +to the specified +.Ar value +before starting the script. +This option can be used multiple times. .It Fl e List services that are enabled. The list of scripts to check is compiled using @@ -117,6 +125,9 @@ which is how they are set in .Pa /etc/rc at boot time. +If the +.Fl E +option is used, the corresponding variable is set accordingly. .Sh EXIT STATUS .Ex -std .Sh EXAMPLES @@ -126,6 +137,7 @@ .Bd -literal -offset -ident service named status service -j dns named status +service -E LC_ALL=C.UTF-8 named start service -rv .Ed .Pp diff --git a/usr.sbin/service/service.sh b/usr.sbin/service/service.sh --- a/usr.sbin/service/service.sh +++ b/usr.sbin/service/service.sh @@ -37,21 +37,23 @@ echo "${0##*/} [-j ] -e" echo "${0##*/} [-j ] -R" echo "${0##*/} [-j ] [-v] -l | -r" - echo "${0##*/} [-j ] [-v] start|stop|etc." + echo "${0##*/} [-j ] [-v] [-E var=value] start|stop|etc." echo "${0##*/} -h" echo '' - echo "-j Perform actions within the named jail" - echo '-e Show services that are enabled' - echo "-R Stop and start enabled $local_startup services" - echo "-l List all scripts in /etc/rc.d and $local_startup" - echo '-r Show the results of boot time rcorder' - echo '-v Verbose' + echo "-j Perform actions within the named jail" + echo "-E n=val Set variable n to val before executing the rc.d script" + echo '-e Show services that are enabled' + echo "-R Stop and start enabled $local_startup services" + echo "-l List all scripts in /etc/rc.d and $local_startup" + echo '-r Show the results of boot time rcorder' + echo '-v Verbose' echo '' } -while getopts 'j:ehlrRv' COMMAND_LINE_ARGUMENT ; do +while getopts 'j:E:ehlrRv' COMMAND_LINE_ARGUMENT ; do case "${COMMAND_LINE_ARGUMENT}" in j) JAIL="${OPTARG}" ;; + E) VARS="${VARS} ${OPTARG}" ;; e) ENABLED=eopt ;; h) usage ; exit 0 ;; l) LIST=lopt ;; @@ -72,6 +74,9 @@ [ -n "${RCORDER}" ] && args="${args} -r" [ -n "${RESTART}" ] && args="${args} -R" [ -n "${VERBOSE}" ] && args="${args} -v" + for var in ${VARS}; do + args="${args} -E ${var}" + done # Call jexec(8) with the rebuild args and any positional args that # were left in $@ @@ -171,7 +176,7 @@ for dir in /etc/rc.d $local_startup; do if [ -x "$dir/$script" ]; then [ -n "$VERBOSE" ] && echo "$script is located in $dir" - exec env -i -L -/daemon HOME=/ PATH=/sbin:/bin:/usr/sbin:/usr/bin "$dir/$script" "$@" + exec env -i -L -/daemon HOME=/ PATH=/sbin:/bin:/usr/sbin:/usr/bin ${VARS} "$dir/$script" "$@" fi done diff --git a/usr.sbin/ypldap/entries.c b/usr.sbin/ypldap/entries.c --- a/usr.sbin/ypldap/entries.c +++ b/usr.sbin/ypldap/entries.c @@ -40,7 +40,6 @@ void flatten_entries(struct env *env) { - size_t wrlen; size_t len; char *linep; char *endp; @@ -56,7 +55,6 @@ * * An extra octet is alloced to make space for an additional NUL. */ - wrlen = env->sc_user_line_len; if ((linep = calloc(1, env->sc_user_line_len + 1)) == NULL) { /* * XXX: try allocating a smaller chunk of memory @@ -78,7 +76,6 @@ free(ue->ue_line); ue->ue_line = endp; endp += len; - wrlen -= len; /* * To save memory strdup(3) the netid_line which originally used @@ -94,7 +91,6 @@ env->sc_user_lines = linep; log_debug("done pushing users"); - wrlen = env->sc_group_line_len; if ((linep = calloc(1, env->sc_group_line_len + 1)) == NULL) { /* * XXX: try allocating a smaller chunk of memory @@ -115,7 +111,6 @@ free(ge->ge_line); ge->ge_line = endp; endp += len; - wrlen -= len; } env->sc_group_lines = linep; log_debug("done pushing groups"); diff --git a/usr.sbin/ypldap/ldapclient.c b/usr.sbin/ypldap/ldapclient.c --- a/usr.sbin/ypldap/ldapclient.c +++ b/usr.sbin/ypldap/ldapclient.c @@ -346,7 +346,7 @@ pid_t ldapclient(int pipe_main2client[2]) { - pid_t pid, dns_pid; + pid_t pid; int pipe_dns[2]; struct passwd *pw; struct event ev_sigint; @@ -371,7 +371,7 @@ if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_dns) == -1) fatal("socketpair"); - dns_pid = ypldap_dns(pipe_dns, pw); + ypldap_dns(pipe_dns, pw); close(pipe_dns[1]); #ifndef DEBUG diff --git a/usr.sbin/ypldap/yp.c b/usr.sbin/ypldap/yp.c --- a/usr.sbin/ypldap/yp.c +++ b/usr.sbin/ypldap/yp.c @@ -268,9 +268,6 @@ int yp_check(struct svc_req *req) { - struct sockaddr_in *caller; - - caller = svc_getcaller(req->rq_xprt); /* * We might want to know who we allow here. */